]>
Commit | Line | Data |
---|---|---|
0316feee JB |
1 | --- gdal-1.11.0/alg/gdalgrid_priv.h.orig 2014-04-16 22:04:48.000000000 +0200 |
2 | +++ gdal-1.11.0/alg/gdalgrid_priv.h 2014-05-11 20:50:49.579220569 +0200 | |
3 | @@ -51,6 +51,21 @@ | |
4 | const float *pafZ; | |
5 | } GDALGridExtraParameters; | |
6 | ||
7 | +#ifdef HAVE_SSE_AT_COMPILE_TIME | |
8 | +int CPLHaveRuntimeSSE(); | |
9 | + | |
10 | +CPLErr | |
11 | +GDALGridInverseDistanceToAPower2NoSmoothingNoSearchSSE( | |
12 | + const void *poOptions, | |
13 | + GUInt32 nPoints, | |
14 | + const double *unused_padfX, | |
15 | + const double *unused_padfY, | |
16 | + const double *unused_padfZ, | |
17 | + double dfXPoint, double dfYPoint, | |
18 | + double *pdfValue, | |
19 | + void* hExtraParamsIn ); | |
20 | +#endif | |
21 | + | |
22 | #ifdef HAVE_AVX_AT_COMPILE_TIME | |
23 | int CPLHaveRuntimeAVX(); | |
24 | ||
25 | --- gdal-1.11.0/alg/gdalgridsse.cpp.orig 1970-01-01 01:00:00.000000000 +0100 | |
26 | +++ gdal-1.11.0/alg/gdalgridsse.cpp 2014-05-11 21:54:46.609140595 +0200 | |
27 | @@ -0,0 +1,210 @@ | |
28 | +#include "gdalgrid.h" | |
29 | +#include "gdalgrid_priv.h" | |
30 | + | |
31 | +#ifdef HAVE_SSE_AT_COMPILE_TIME | |
32 | +#include <xmmintrin.h> | |
33 | + | |
34 | +/************************************************************************/ | |
35 | +/* CPLHaveRuntimeSSE() */ | |
36 | +/************************************************************************/ | |
37 | + | |
38 | +#define CPUID_SSE_EDX_BIT 25 | |
39 | + | |
40 | +#if (defined(_M_X64) || defined(__x86_64)) | |
41 | + | |
42 | +int CPLHaveRuntimeSSE() | |
43 | +{ | |
44 | + return TRUE; | |
45 | +} | |
46 | + | |
47 | +#elif defined(__GNUC__) && defined(__i386__) | |
48 | + | |
49 | +int CPLHaveRuntimeSSE() | |
50 | +{ | |
51 | + int cpuinfo[4] = {0,0,0,0}; | |
52 | + GCC_CPUID(1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]); | |
53 | + return (cpuinfo[3] & (1 << CPUID_SSE_EDX_BIT)) != 0; | |
54 | +} | |
55 | + | |
56 | +#elif defined(_MSC_VER) && defined(_M_IX86) | |
57 | + | |
58 | +#if _MSC_VER <= 1310 | |
59 | +static void inline __cpuid(int cpuinfo[4], int level) | |
60 | +{ | |
61 | + __asm | |
62 | + { | |
63 | + push ebx | |
64 | + push esi | |
65 | + | |
66 | + mov esi,cpuinfo | |
67 | + mov eax,level | |
68 | + cpuid | |
69 | + mov dword ptr [esi], eax | |
70 | + mov dword ptr [esi+4],ebx | |
71 | + mov dword ptr [esi+8],ecx | |
72 | + mov dword ptr [esi+0Ch],edx | |
73 | + | |
74 | + pop esi | |
75 | + pop ebx | |
76 | + } | |
77 | +} | |
78 | +#else | |
79 | +#include <intrin.h> | |
80 | +#endif | |
81 | + | |
82 | +int CPLHaveRuntimeSSE() | |
83 | +{ | |
84 | + int cpuinfo[4] = {0,0,0,0}; | |
85 | + __cpuid(cpuinfo, 1); | |
86 | + return (cpuinfo[3] & (1 << CPUID_SSE_EDX_BIT)) != 0; | |
87 | +} | |
88 | + | |
89 | +#else | |
90 | + | |
91 | +int CPLHaveRuntimeSSE() | |
92 | +{ | |
93 | + return FALSE; | |
94 | +} | |
95 | +#endif | |
96 | + | |
97 | +/************************************************************************/ | |
98 | +/* GDALGridInverseDistanceToAPower2NoSmoothingNoSearchSSE() */ | |
99 | +/************************************************************************/ | |
100 | + | |
101 | +CPLErr | |
102 | +GDALGridInverseDistanceToAPower2NoSmoothingNoSearchSSE( | |
103 | + const void *poOptions, | |
104 | + GUInt32 nPoints, | |
d6fd1627 JB |
105 | + CPL_UNUSED const double *unused_padfX, |
106 | + CPL_UNUSED const double *unused_padfY, | |
107 | + CPL_UNUSED const double *unused_padfZ, | |
0316feee JB |
108 | + double dfXPoint, double dfYPoint, |
109 | + double *pdfValue, | |
110 | + void* hExtraParamsIn ) | |
111 | +{ | |
112 | + size_t i = 0; | |
113 | + GDALGridExtraParameters* psExtraParams = (GDALGridExtraParameters*) hExtraParamsIn; | |
114 | + const float* pafX = psExtraParams->pafX; | |
115 | + const float* pafY = psExtraParams->pafY; | |
116 | + const float* pafZ = psExtraParams->pafZ; | |
117 | + | |
118 | + const float fEpsilon = 0.0000000000001f; | |
119 | + const float fXPoint = (float)dfXPoint; | |
120 | + const float fYPoint = (float)dfYPoint; | |
121 | + const __m128 xmm_small = _mm_load1_ps((float*)&fEpsilon); | |
122 | + const __m128 xmm_x = _mm_load1_ps((float*)&fXPoint); | |
123 | + const __m128 xmm_y = _mm_load1_ps((float*)&fYPoint); | |
124 | + __m128 xmm_nominator = _mm_setzero_ps(); | |
125 | + __m128 xmm_denominator = _mm_setzero_ps(); | |
126 | + int mask = 0; | |
127 | + | |
128 | +#if defined(__x86_64) || defined(_M_X64) | |
129 | + /* This would also work in 32bit mode, but there are only 8 XMM registers */ | |
130 | + /* whereas we have 16 for 64bit */ | |
131 | +#define LOOP_SIZE 8 | |
132 | + size_t nPointsRound = (nPoints / LOOP_SIZE) * LOOP_SIZE; | |
133 | + for ( i = 0; i < nPointsRound; i += LOOP_SIZE ) | |
134 | + { | |
135 | + __m128 xmm_rx = _mm_sub_ps(_mm_load_ps(pafX + i), xmm_x); /* rx = pafX[i] - fXPoint */ | |
136 | + __m128 xmm_rx_4 = _mm_sub_ps(_mm_load_ps(pafX + i + 4), xmm_x); | |
137 | + __m128 xmm_ry = _mm_sub_ps(_mm_load_ps(pafY + i), xmm_y); /* ry = pafY[i] - fYPoint */ | |
138 | + __m128 xmm_ry_4 = _mm_sub_ps(_mm_load_ps(pafY + i + 4), xmm_y); | |
139 | + __m128 xmm_r2 = _mm_add_ps(_mm_mul_ps(xmm_rx, xmm_rx), /* r2 = rx * rx + ry * ry */ | |
140 | + _mm_mul_ps(xmm_ry, xmm_ry)); | |
141 | + __m128 xmm_r2_4 = _mm_add_ps(_mm_mul_ps(xmm_rx_4, xmm_rx_4), | |
142 | + _mm_mul_ps(xmm_ry_4, xmm_ry_4)); | |
143 | + __m128 xmm_invr2 = _mm_rcp_ps(xmm_r2); /* invr2 = 1.0f / r2 */ | |
144 | + __m128 xmm_invr2_4 = _mm_rcp_ps(xmm_r2_4); | |
145 | + xmm_nominator = _mm_add_ps(xmm_nominator, /* nominator += invr2 * pafZ[i] */ | |
146 | + _mm_mul_ps(xmm_invr2, _mm_load_ps(pafZ + i))); | |
147 | + xmm_nominator = _mm_add_ps(xmm_nominator, | |
148 | + _mm_mul_ps(xmm_invr2_4, _mm_load_ps(pafZ + i + 4))); | |
149 | + xmm_denominator = _mm_add_ps(xmm_denominator, xmm_invr2); /* denominator += invr2 */ | |
150 | + xmm_denominator = _mm_add_ps(xmm_denominator, xmm_invr2_4); | |
151 | + mask = _mm_movemask_ps(_mm_cmplt_ps(xmm_r2, xmm_small)) | /* if( r2 < fEpsilon) */ | |
152 | + (_mm_movemask_ps(_mm_cmplt_ps(xmm_r2_4, xmm_small)) << 4); | |
153 | + if( mask ) | |
154 | + break; | |
155 | + } | |
156 | +#else | |
157 | +#define LOOP_SIZE 4 | |
158 | + size_t nPointsRound = (nPoints / LOOP_SIZE) * LOOP_SIZE; | |
159 | + for ( i = 0; i < nPointsRound; i += LOOP_SIZE ) | |
160 | + { | |
161 | + __m128 xmm_rx = _mm_sub_ps(_mm_load_ps((float*)pafX + i), xmm_x); /* rx = pafX[i] - fXPoint */ | |
162 | + __m128 xmm_ry = _mm_sub_ps(_mm_load_ps((float*)pafY + i), xmm_y); /* ry = pafY[i] - fYPoint */ | |
163 | + __m128 xmm_r2 = _mm_add_ps(_mm_mul_ps(xmm_rx, xmm_rx), /* r2 = rx * rx + ry * ry */ | |
164 | + _mm_mul_ps(xmm_ry, xmm_ry)); | |
165 | + __m128 xmm_invr2 = _mm_rcp_ps(xmm_r2); /* invr2 = 1.0f / r2 */ | |
166 | + xmm_nominator = _mm_add_ps(xmm_nominator, /* nominator += invr2 * pafZ[i] */ | |
167 | + _mm_mul_ps(xmm_invr2, _mm_load_ps((float*)pafZ + i))); | |
168 | + xmm_denominator = _mm_add_ps(xmm_denominator, xmm_invr2); /* denominator += invr2 */ | |
169 | + mask = _mm_movemask_ps(_mm_cmplt_ps(xmm_r2, xmm_small)); /* if( r2 < fEpsilon) */ | |
170 | + if( mask ) | |
171 | + break; | |
172 | + } | |
173 | +#endif | |
174 | + | |
175 | + /* Find which i triggered r2 < fEpsilon */ | |
176 | + if( mask ) | |
177 | + { | |
178 | + for(int j = 0; j < LOOP_SIZE; j++ ) | |
179 | + { | |
180 | + if( mask & (1 << j) ) | |
181 | + { | |
182 | + (*pdfValue) = (pafZ)[i + j]; | |
183 | + return CE_None; | |
184 | + } | |
185 | + } | |
186 | + } | |
187 | + | |
188 | + /* Get back nominator and denominator values for XMM registers */ | |
189 | + float afNominator[4], afDenominator[4]; | |
190 | + _mm_storeu_ps(afNominator, xmm_nominator); | |
191 | + _mm_storeu_ps(afDenominator, xmm_denominator); | |
192 | + | |
193 | + float fNominator = afNominator[0] + afNominator[1] + | |
194 | + afNominator[2] + afNominator[3]; | |
195 | + float fDenominator = afDenominator[0] + afDenominator[1] + | |
196 | + afDenominator[2] + afDenominator[3]; | |
197 | + | |
198 | + /* Do the few remaining loop iterations */ | |
199 | + for ( ; i < nPoints; i++ ) | |
200 | + { | |
201 | + const float fRX = pafX[i] - fXPoint; | |
202 | + const float fRY = pafY[i] - fYPoint; | |
203 | + const float fR2 = | |
204 | + fRX * fRX + fRY * fRY; | |
205 | + | |
206 | + // If the test point is close to the grid node, use the point | |
207 | + // value directly as a node value to avoid singularity. | |
208 | + if ( fR2 < 0.0000000000001 ) | |
209 | + { | |
210 | + break; | |
211 | + } | |
212 | + else | |
213 | + { | |
214 | + const float fInvR2 = 1.0f / fR2; | |
215 | + fNominator += fInvR2 * pafZ[i]; | |
216 | + fDenominator += fInvR2; | |
217 | + } | |
218 | + } | |
219 | + | |
220 | + if( i != nPoints ) | |
221 | + { | |
222 | + (*pdfValue) = pafZ[i]; | |
223 | + } | |
224 | + else | |
225 | + if ( fDenominator == 0.0 ) | |
226 | + { | |
227 | + (*pdfValue) = | |
228 | + ((GDALGridInverseDistanceToAPowerOptions*)poOptions)->dfNoDataValue; | |
229 | + } | |
230 | + else | |
231 | + (*pdfValue) = fNominator / fDenominator; | |
232 | + | |
233 | + return CE_None; | |
234 | +} | |
235 | + | |
236 | + | |
237 | +#endif | |
238 | --- gdal-1.11.0/alg/gdalgrid.cpp.orig 2014-04-16 22:04:48.000000000 +0200 | |
239 | +++ gdal-1.11.0/alg/gdalgrid.cpp 2014-05-11 21:27:49.735840961 +0200 | |
240 | @@ -36,10 +36,6 @@ | |
241 | #include "cpl_multiproc.h" | |
242 | #include "gdalgrid_priv.h" | |
243 | ||
244 | -#ifdef HAVE_SSE_AT_COMPILE_TIME | |
245 | -#include <xmmintrin.h> | |
246 | -#endif | |
247 | - | |
d6fd1627 | 248 | CPL_CVSID("$Id: gdalgrid.cpp 27729 2014-09-24 00:40:16Z goatbar $"); |
0316feee JB |
249 | |
250 | #define TO_RADIANS (3.14159265358979323846 / 180.0) | |
251 | @@ -53,74 +49,6 @@ | |
252 | #endif /* DBL_MAX */ | |
253 | ||
254 | /************************************************************************/ | |
255 | -/* CPLHaveRuntimeSSE() */ | |
256 | -/************************************************************************/ | |
257 | - | |
258 | -#ifdef HAVE_SSE_AT_COMPILE_TIME | |
259 | - | |
260 | -#define CPUID_SSE_EDX_BIT 25 | |
261 | - | |
262 | -#if (defined(_M_X64) || defined(__x86_64)) | |
263 | - | |
264 | -static int CPLHaveRuntimeSSE() | |
265 | -{ | |
266 | - return TRUE; | |
267 | -} | |
268 | - | |
269 | -#elif defined(__GNUC__) && defined(__i386__) | |
270 | - | |
271 | -static int CPLHaveRuntimeSSE() | |
272 | -{ | |
273 | - int cpuinfo[4] = {0,0,0,0}; | |
274 | - GCC_CPUID(1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]); | |
275 | - return (cpuinfo[3] & (1 << CPUID_SSE_EDX_BIT)) != 0; | |
276 | -} | |
277 | - | |
278 | -#elif defined(_MSC_VER) && defined(_M_IX86) | |
279 | - | |
280 | -#if _MSC_VER <= 1310 | |
281 | -static void inline __cpuid(int cpuinfo[4], int level) | |
282 | -{ | |
283 | - __asm | |
284 | - { | |
285 | - push ebx | |
286 | - push esi | |
287 | - | |
288 | - mov esi,cpuinfo | |
289 | - mov eax,level | |
290 | - cpuid | |
291 | - mov dword ptr [esi], eax | |
292 | - mov dword ptr [esi+4],ebx | |
293 | - mov dword ptr [esi+8],ecx | |
294 | - mov dword ptr [esi+0Ch],edx | |
295 | - | |
296 | - pop esi | |
297 | - pop ebx | |
298 | - } | |
299 | -} | |
300 | -#else | |
301 | -#include <intrin.h> | |
302 | -#endif | |
303 | - | |
304 | -static int CPLHaveRuntimeSSE() | |
305 | -{ | |
306 | - int cpuinfo[4] = {0,0,0,0}; | |
307 | - __cpuid(cpuinfo, 1); | |
308 | - return (cpuinfo[3] & (1 << CPUID_SSE_EDX_BIT)) != 0; | |
309 | -} | |
310 | - | |
311 | -#else | |
312 | - | |
313 | -static int CPLHaveRuntimeSSE() | |
314 | -{ | |
315 | - return FALSE; | |
316 | -} | |
317 | - | |
318 | -#endif | |
319 | - | |
320 | -#endif // HAVE_SSE_AT_COMPILE_TIME | |
321 | - | |
322 | -/************************************************************************/ | |
323 | /* GDALGridGetPointBounds() */ | |
324 | /************************************************************************/ | |
325 | ||
326 | @@ -394,148 +322,6 @@ | |
327 | } | |
328 | ||
329 | /************************************************************************/ | |
330 | -/* GDALGridInverseDistanceToAPower2NoSmoothingNoSearchSSE() */ | |
331 | -/************************************************************************/ | |
332 | - | |
333 | -#ifdef HAVE_SSE_AT_COMPILE_TIME | |
334 | - | |
335 | -static CPLErr | |
336 | -GDALGridInverseDistanceToAPower2NoSmoothingNoSearchSSE( | |
337 | - const void *poOptions, | |
338 | - GUInt32 nPoints, | |
d6fd1627 JB |
339 | - CPL_UNUSED const double *unused_padfX, |
340 | - CPL_UNUSED const double *unused_padfY, | |
341 | - CPL_UNUSED const double *unused_padfZ, | |
0316feee JB |
342 | - double dfXPoint, double dfYPoint, |
343 | - double *pdfValue, | |
344 | - void* hExtraParamsIn ) | |
345 | -{ | |
346 | - size_t i = 0; | |
347 | - GDALGridExtraParameters* psExtraParams = (GDALGridExtraParameters*) hExtraParamsIn; | |
348 | - const float* pafX = psExtraParams->pafX; | |
349 | - const float* pafY = psExtraParams->pafY; | |
350 | - const float* pafZ = psExtraParams->pafZ; | |
351 | - | |
352 | - const float fEpsilon = 0.0000000000001f; | |
353 | - const float fXPoint = (float)dfXPoint; | |
354 | - const float fYPoint = (float)dfYPoint; | |
355 | - const __m128 xmm_small = _mm_load1_ps((float*)&fEpsilon); | |
356 | - const __m128 xmm_x = _mm_load1_ps((float*)&fXPoint); | |
357 | - const __m128 xmm_y = _mm_load1_ps((float*)&fYPoint); | |
358 | - __m128 xmm_nominator = _mm_setzero_ps(); | |
359 | - __m128 xmm_denominator = _mm_setzero_ps(); | |
360 | - int mask = 0; | |
361 | - | |
362 | -#if defined(__x86_64) || defined(_M_X64) | |
363 | - /* This would also work in 32bit mode, but there are only 8 XMM registers */ | |
364 | - /* whereas we have 16 for 64bit */ | |
365 | -#define LOOP_SIZE 8 | |
366 | - size_t nPointsRound = (nPoints / LOOP_SIZE) * LOOP_SIZE; | |
367 | - for ( i = 0; i < nPointsRound; i += LOOP_SIZE ) | |
368 | - { | |
369 | - __m128 xmm_rx = _mm_sub_ps(_mm_load_ps(pafX + i), xmm_x); /* rx = pafX[i] - fXPoint */ | |
370 | - __m128 xmm_rx_4 = _mm_sub_ps(_mm_load_ps(pafX + i + 4), xmm_x); | |
371 | - __m128 xmm_ry = _mm_sub_ps(_mm_load_ps(pafY + i), xmm_y); /* ry = pafY[i] - fYPoint */ | |
372 | - __m128 xmm_ry_4 = _mm_sub_ps(_mm_load_ps(pafY + i + 4), xmm_y); | |
373 | - __m128 xmm_r2 = _mm_add_ps(_mm_mul_ps(xmm_rx, xmm_rx), /* r2 = rx * rx + ry * ry */ | |
374 | - _mm_mul_ps(xmm_ry, xmm_ry)); | |
375 | - __m128 xmm_r2_4 = _mm_add_ps(_mm_mul_ps(xmm_rx_4, xmm_rx_4), | |
376 | - _mm_mul_ps(xmm_ry_4, xmm_ry_4)); | |
377 | - __m128 xmm_invr2 = _mm_rcp_ps(xmm_r2); /* invr2 = 1.0f / r2 */ | |
378 | - __m128 xmm_invr2_4 = _mm_rcp_ps(xmm_r2_4); | |
379 | - xmm_nominator = _mm_add_ps(xmm_nominator, /* nominator += invr2 * pafZ[i] */ | |
380 | - _mm_mul_ps(xmm_invr2, _mm_load_ps(pafZ + i))); | |
381 | - xmm_nominator = _mm_add_ps(xmm_nominator, | |
382 | - _mm_mul_ps(xmm_invr2_4, _mm_load_ps(pafZ + i + 4))); | |
383 | - xmm_denominator = _mm_add_ps(xmm_denominator, xmm_invr2); /* denominator += invr2 */ | |
384 | - xmm_denominator = _mm_add_ps(xmm_denominator, xmm_invr2_4); | |
385 | - mask = _mm_movemask_ps(_mm_cmplt_ps(xmm_r2, xmm_small)) | /* if( r2 < fEpsilon) */ | |
386 | - (_mm_movemask_ps(_mm_cmplt_ps(xmm_r2_4, xmm_small)) << 4); | |
387 | - if( mask ) | |
388 | - break; | |
389 | - } | |
390 | -#else | |
391 | -#define LOOP_SIZE 4 | |
392 | - size_t nPointsRound = (nPoints / LOOP_SIZE) * LOOP_SIZE; | |
393 | - for ( i = 0; i < nPointsRound; i += LOOP_SIZE ) | |
394 | - { | |
395 | - __m128 xmm_rx = _mm_sub_ps(_mm_load_ps((float*)pafX + i), xmm_x); /* rx = pafX[i] - fXPoint */ | |
396 | - __m128 xmm_ry = _mm_sub_ps(_mm_load_ps((float*)pafY + i), xmm_y); /* ry = pafY[i] - fYPoint */ | |
397 | - __m128 xmm_r2 = _mm_add_ps(_mm_mul_ps(xmm_rx, xmm_rx), /* r2 = rx * rx + ry * ry */ | |
398 | - _mm_mul_ps(xmm_ry, xmm_ry)); | |
399 | - __m128 xmm_invr2 = _mm_rcp_ps(xmm_r2); /* invr2 = 1.0f / r2 */ | |
400 | - xmm_nominator = _mm_add_ps(xmm_nominator, /* nominator += invr2 * pafZ[i] */ | |
401 | - _mm_mul_ps(xmm_invr2, _mm_load_ps((float*)pafZ + i))); | |
402 | - xmm_denominator = _mm_add_ps(xmm_denominator, xmm_invr2); /* denominator += invr2 */ | |
403 | - mask = _mm_movemask_ps(_mm_cmplt_ps(xmm_r2, xmm_small)); /* if( r2 < fEpsilon) */ | |
404 | - if( mask ) | |
405 | - break; | |
406 | - } | |
407 | -#endif | |
408 | - | |
409 | - /* Find which i triggered r2 < fEpsilon */ | |
410 | - if( mask ) | |
411 | - { | |
412 | - for(int j = 0; j < LOOP_SIZE; j++ ) | |
413 | - { | |
414 | - if( mask & (1 << j) ) | |
415 | - { | |
416 | - (*pdfValue) = (pafZ)[i + j]; | |
417 | - return CE_None; | |
418 | - } | |
419 | - } | |
420 | - } | |
421 | - | |
422 | - /* Get back nominator and denominator values for XMM registers */ | |
423 | - float afNominator[4], afDenominator[4]; | |
424 | - _mm_storeu_ps(afNominator, xmm_nominator); | |
425 | - _mm_storeu_ps(afDenominator, xmm_denominator); | |
426 | - | |
427 | - float fNominator = afNominator[0] + afNominator[1] + | |
428 | - afNominator[2] + afNominator[3]; | |
429 | - float fDenominator = afDenominator[0] + afDenominator[1] + | |
430 | - afDenominator[2] + afDenominator[3]; | |
431 | - | |
432 | - /* Do the few remaining loop iterations */ | |
433 | - for ( ; i < nPoints; i++ ) | |
434 | - { | |
435 | - const float fRX = pafX[i] - fXPoint; | |
436 | - const float fRY = pafY[i] - fYPoint; | |
437 | - const float fR2 = | |
438 | - fRX * fRX + fRY * fRY; | |
439 | - | |
440 | - // If the test point is close to the grid node, use the point | |
441 | - // value directly as a node value to avoid singularity. | |
442 | - if ( fR2 < 0.0000000000001 ) | |
443 | - { | |
444 | - break; | |
445 | - } | |
446 | - else | |
447 | - { | |
448 | - const float fInvR2 = 1.0f / fR2; | |
449 | - fNominator += fInvR2 * pafZ[i]; | |
450 | - fDenominator += fInvR2; | |
451 | - } | |
452 | - } | |
453 | - | |
454 | - if( i != nPoints ) | |
455 | - { | |
456 | - (*pdfValue) = pafZ[i]; | |
457 | - } | |
458 | - else | |
459 | - if ( fDenominator == 0.0 ) | |
460 | - { | |
461 | - (*pdfValue) = | |
462 | - ((GDALGridInverseDistanceToAPowerOptions*)poOptions)->dfNoDataValue; | |
463 | - } | |
464 | - else | |
465 | - (*pdfValue) = fNominator / fDenominator; | |
466 | - | |
467 | - return CE_None; | |
468 | -} | |
469 | -#endif // HAVE_SSE_AT_COMPILE_TIME | |
470 | - | |
471 | -/************************************************************************/ | |
472 | /* GDALGridMovingAverage() */ | |
473 | /************************************************************************/ | |
474 | ||
0316feee JB |
475 | --- gdal-1.11.0/alg/GNUmakefile.orig 2014-04-16 22:04:48.000000000 +0200 |
476 | +++ gdal-1.11.0/alg/GNUmakefile 2014-05-11 21:56:55.699137906 +0200 | |
477 | @@ -16,6 +16,10 @@ | |
478 | CPPFLAGS := -DHAVE_AVX_AT_COMPILE_TIME $(CPPFLAGS) | |
479 | endif | |
480 | ||
481 | +ifeq ($(HAVE_SSE_AT_COMPILE_TIME),yes) | |
482 | +CPPFLAGS := -DHAVE_SSE_AT_COMPILE_TIME $(CPPFLAGS) | |
483 | +endif | |
484 | + | |
485 | ifeq ($(HAVE_GEOS),yes) | |
486 | CPPFLAGS := -DHAVE_GEOS=1 $(GEOS_CFLAGS) $(CPPFLAGS) | |
487 | endif | |
488 | @@ -26,11 +30,14 @@ | |
489 | ||
490 | CPPFLAGS := $(GDAL_INCLUDE) $(CPPFLAGS) $(OPENCL_FLAGS) | |
491 | ||
492 | -default: $(OBJ:.o=.$(OBJ_EXT)) gdalgridavx.$(OBJ_EXT) | |
493 | +default: $(OBJ:.o=.$(OBJ_EXT)) gdalgridavx.$(OBJ_EXT) gdalgridsse.$(OBJ_EXT) | |
494 | ||
495 | gdalgridavx.$(OBJ_EXT): gdalgridavx.cpp | |
496 | $(CXX) $(CXXFLAGS) $(AVXFLAGS) $(CPPFLAGS) -c -o $@ $< | |
497 | ||
498 | +gdalgridsse.$(OBJ_EXT): gdalgridsse.cpp | |
499 | + $(CXX) $(CXXFLAGS) $(SSEFLAGS) $(CPPFLAGS) -c -o $@ $< | |
500 | + | |
501 | clean: | |
502 | $(RM) *.o $(O_OBJ) | |
503 | ||
504 | --- gdal-1.11.0/configure.in.orig 2014-05-11 20:11:46.272602746 +0200 | |
505 | +++ gdal-1.11.0/configure.in 2014-05-11 22:00:20.125800312 +0200 | |
506 | @@ -240,12 +240,12 @@ | |
507 | echo '#endif' >> detectsse.cpp | |
508 | if test -z "`${CXX} ${CXXFLAGS} -o detectsse detectsse.cpp 2>&1`" ; then | |
509 | AC_MSG_RESULT([yes]) | |
510 | - SSEFLAGS="-DHAVE_SSE_AT_COMPILE_TIME" | |
511 | + SSEFLAGS="" | |
512 | HAVE_SSE_AT_COMPILE_TIME=yes | |
513 | else | |
514 | if test -z "`${CXX} ${CXXFLAGS} -msse -o detectsse detectsse.cpp 2>&1`" ; then | |
515 | AC_MSG_RESULT([yes]) | |
516 | - SSEFLAGS="-msse -DHAVE_SSE_AT_COMPILE_TIME" | |
517 | + SSEFLAGS="-msse" | |
518 | HAVE_SSE_AT_COMPILE_TIME=yes | |
519 | else | |
520 | AC_MSG_RESULT([no]) | |
521 | @@ -279,16 +279,14 @@ | |
522 | esac | |
523 | fi | |
524 | ||
525 | - if test "$HAVE_SSE_AT_COMPILE_TIME" = "yes"; then | |
526 | - CFLAGS="$CFLAGS $SSEFLAGS" | |
527 | - CXXFLAGS="$CXXFLAGS $SSEFLAGS" | |
528 | - fi | |
529 | - | |
530 | rm -f detectsse* | |
531 | else | |
532 | AC_MSG_RESULT([no]) | |
533 | fi | |
534 | ||
535 | +AC_SUBST(SSEFLAGS,$SSEFLAGS) | |
536 | +AC_SUBST(HAVE_SSE_AT_COMPILE_TIME,$HAVE_SSE_AT_COMPILE_TIME) | |
537 | + | |
538 | dnl --------------------------------------------------------------------------- | |
539 | dnl Check AVX availability | |
540 | dnl --------------------------------------------------------------------------- | |
541 | --- gdal-1.11.0/GDALmake.opt.in.orig 2014-05-12 19:27:07.164191074 +0200 | |
542 | +++ gdal-1.11.0/GDALmake.opt.in 2014-05-12 20:39:04.850767745 +0200 | |
543 | @@ -37,6 +37,8 @@ | |
544 | $(PCIDSK_LIB) $(RASDAMAN_LIB) $(CHARLS_LIB) $(SOSI_LIB) \ | |
545 | $(OPENCL_LIB) $(JVM_LIB) $(LIBICONV) $(FGDB_LIB) $(LIBXML2_LIB) | |
546 | ||
547 | +SSEFLAGS = @SSEFLAGS@ | |
548 | +HAVE_SSE_AT_COMPILE_TIME = @HAVE_SSE_AT_COMPILE_TIME@ | |
549 | AVXFLAGS = @AVXFLAGS@ | |
550 | HAVE_AVX_AT_COMPILE_TIME = @HAVE_AVX_AT_COMPILE_TIME@ | |
551 |