1 diff --git a/dom/media/webaudio/AudioNodeEngineGeneric.h b/dom/media/webaudio/AudioNodeEngineGeneric.h
2 --- a/dom/media/webaudio/AudioNodeEngineGeneric.h
3 +++ b/dom/media/webaudio/AudioNodeEngineGeneric.h
5 MOZ_ASSERT((aSize % xsimd::batch<float, Arch>::size == 0),
6 "requires tail processing");
9 for (unsigned i = 0; i < aSize * 2;
10 - i += 2 * xsimd::batch<std::complex<float>>::size) {
11 - auto in1 = xsimd::batch<std::complex<float>>::load_aligned(
12 + i += 2 * xsimd::batch<std::complex<float>, Arch>::size) {
13 + auto in1 = xsimd::batch<std::complex<float>, Arch>::load_aligned(
14 reinterpret_cast<const std::complex<float>*>(&aInput[i]));
15 - auto in2 = xsimd::batch<std::complex<float>>::load_aligned(
16 + auto in2 = xsimd::batch<std::complex<float>, Arch>::load_aligned(
17 reinterpret_cast<const std::complex<float>*>(&aScale[i]));
19 out.store_aligned(reinterpret_cast<std::complex<float>*>(&aOutput[i]));
23 diff --git a/dom/media/webaudio/AudioNodeEngineGeneric.h b/dom/media/webaudio/AudioNodeEngineGeneric.h
24 --- a/dom/media/webaudio/AudioNodeEngineGeneric.h
25 +++ b/dom/media/webaudio/AudioNodeEngineGeneric.h
28 #ifndef MOZILLA_AUDIONODEENGINEGENERIC_H_
29 #define MOZILLA_AUDIONODEENGINEGENERIC_H_
31 #include "AudioNodeEngine.h"
32 -#include "AlignmentUtils.h"
34 #include "xsimd/xsimd.hpp"
36 -#if defined(__GNUC__) && __GNUC__ > 7
37 -# define MOZ_PRAGMA(tokens) _Pragma(#tokens)
38 -# define MOZ_UNROLL(factor) MOZ_PRAGMA(GCC unroll factor)
39 -#elif defined(__INTEL_COMPILER) || (defined(__clang__) && __clang_major__ > 3)
40 -# define MOZ_PRAGMA(tokens) _Pragma(#tokens)
41 -# define MOZ_UNROLL(factor) MOZ_PRAGMA(unroll factor)
43 -# define MOZ_UNROLL(_)
49 -static bool is_aligned(const void* ptr) {
50 - return (reinterpret_cast<uintptr_t>(ptr) &
51 - ~(static_cast<uintptr_t>(Arch::alignment()) - 1)) ==
52 - reinterpret_cast<uintptr_t>(ptr);
55 -template <class Arch>
57 static void AudioBufferAddWithScale(const float* aInput, float aScale,
58 - float* aOutput, uint32_t aSize) {
59 - if constexpr (Arch::requires_alignment()) {
60 - if (aScale == 1.0f) {
61 - while (!is_aligned<Arch>(aInput) || !is_aligned<Arch>(aOutput)) {
63 - *aOutput += *aInput;
69 - while (!is_aligned<Arch>(aInput) || !is_aligned<Arch>(aOutput)) {
71 - *aOutput += *aInput * aScale;
78 - MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
79 - MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
81 - xsimd::batch<float, Arch> vgain(aScale);
83 - uint32_t aVSize = aSize & ~(xsimd::batch<float, Arch>::size - 1);
85 - for (unsigned i = 0; i < aVSize; i += xsimd::batch<float, Arch>::size) {
86 - auto vin1 = xsimd::batch<float, Arch>::load_aligned(&aInput[i]);
87 - auto vin2 = xsimd::batch<float, Arch>::load_aligned(&aOutput[i]);
88 - auto vout = xsimd::fma(vin1, vgain, vin2);
89 - vout.store_aligned(&aOutput[i]);
92 - for (unsigned i = aVSize; i < aSize; ++i) {
93 - aOutput[i] += aInput[i] * aScale;
96 + float* aOutput, uint32_t aSize);
98 static void AudioBlockCopyChannelWithScale(const float* aInput, float aScale,
100 - MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
101 - MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
103 - MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
104 - "requires tail processing");
106 - xsimd::batch<float, Arch> vgain = (aScale);
109 - for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
110 - i += xsimd::batch<float, Arch>::size) {
111 - auto vin = xsimd::batch<float, Arch>::load_aligned(&aInput[i]);
112 - auto vout = vin * vgain;
113 - vout.store_aligned(&aOutput[i]);
118 static void AudioBlockCopyChannelWithScale(
119 const float aInput[WEBAUDIO_BLOCK_SIZE],
120 const float aScale[WEBAUDIO_BLOCK_SIZE],
121 - float aOutput[WEBAUDIO_BLOCK_SIZE]) {
122 - MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
123 - MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
124 - MOZ_ASSERT(is_aligned<Arch>(aScale), "aScale is aligned");
126 - MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
127 - "requires tail processing");
130 - for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
131 - i += xsimd::batch<float, Arch>::size) {
132 - auto vscaled = xsimd::batch<float, Arch>::load_aligned(&aScale[i]);
133 - auto vin = xsimd::batch<float, Arch>::load_aligned(&aInput[i]);
134 - auto vout = vin * vscaled;
135 - vout.store_aligned(&aOutput[i]);
138 + float aOutput[WEBAUDIO_BLOCK_SIZE]);
140 static void AudioBufferInPlaceScale(float* aBlock, float aScale,
142 - MOZ_ASSERT(is_aligned<Arch>(aBlock), "aBlock is aligned");
144 - xsimd::batch<float, Arch> vgain(aScale);
146 - uint32_t aVSize = aSize & ~(xsimd::batch<float, Arch>::size - 1);
148 - for (unsigned i = 0; i < aVSize; i += xsimd::batch<float, Arch>::size) {
149 - auto vin = xsimd::batch<float, Arch>::load_aligned(&aBlock[i]);
150 - auto vout = vin * vgain;
151 - vout.store_aligned(&aBlock[i]);
153 - for (unsigned i = aVSize; i < aSize; ++i) aBlock[i] *= aScale;
157 static void AudioBufferInPlaceScale(float* aBlock, float* aScale,
159 - MOZ_ASSERT(is_aligned<Arch>(aBlock), "aBlock is aligned");
160 - MOZ_ASSERT(is_aligned<Arch>(aScale), "aScale is aligned");
162 - uint32_t aVSize = aSize & ~(xsimd::batch<float, Arch>::size - 1);
164 - for (unsigned i = 0; i < aVSize; i += xsimd::batch<float, Arch>::size) {
165 - auto vin = xsimd::batch<float, Arch>::load_aligned(&aBlock[i]);
166 - auto vgain = xsimd::batch<float, Arch>::load_aligned(&aScale[i]);
167 - auto vout = vin * vgain;
168 - vout.store_aligned(&aBlock[i]);
170 - for (uint32_t i = aVSize; i < aSize; ++i) {
171 - *aBlock++ *= *aScale++;
176 static void AudioBlockPanStereoToStereo(
177 const float aInputL[WEBAUDIO_BLOCK_SIZE],
178 const float aInputR[WEBAUDIO_BLOCK_SIZE], float aGainL, float aGainR,
179 bool aIsOnTheLeft, float aOutputL[WEBAUDIO_BLOCK_SIZE],
180 - float aOutputR[WEBAUDIO_BLOCK_SIZE]) {
181 - MOZ_ASSERT(is_aligned<Arch>(aInputL), "aInputL is aligned");
182 - MOZ_ASSERT(is_aligned<Arch>(aInputR), "aInputR is aligned");
183 - MOZ_ASSERT(is_aligned<Arch>(aOutputL), "aOutputL is aligned");
184 - MOZ_ASSERT(is_aligned<Arch>(aOutputR), "aOutputR is aligned");
186 - MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
187 - "requires tail processing");
189 - xsimd::batch<float, Arch> vgainl(aGainL);
190 - xsimd::batch<float, Arch> vgainr(aGainR);
192 - if (aIsOnTheLeft) {
194 - for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
195 - i += xsimd::batch<float, Arch>::size) {
196 - auto vinl = xsimd::batch<float, Arch>::load_aligned(&aInputL[i]);
197 - auto vinr = xsimd::batch<float, Arch>::load_aligned(&aInputR[i]);
199 - /* left channel : aOutputL = aInputL + aInputR * gainL */
200 - auto vout = xsimd::fma(vinr, vgainl, vinl);
201 - vout.store_aligned(&aOutputL[i]);
203 - /* right channel : aOutputR = aInputR * gainR */
204 - auto vscaled = vinr * vgainr;
205 - vscaled.store_aligned(&aOutputR[i]);
209 - for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
210 - i += xsimd::batch<float, Arch>::size) {
211 - auto vinl = xsimd::batch<float, Arch>::load_aligned(&aInputL[i]);
212 - auto vinr = xsimd::batch<float, Arch>::load_aligned(&aInputR[i]);
214 - /* left channel : aInputL * gainL */
215 - auto vscaled = vinl * vgainl;
216 - vscaled.store_aligned(&aOutputL[i]);
218 - /* right channel: aOutputR = aInputR + aInputL * gainR */
219 - auto vout = xsimd::fma(vinl, vgainr, vinr);
220 - vout.store_aligned(&aOutputR[i]);
224 + float aOutputR[WEBAUDIO_BLOCK_SIZE]);
226 static void BufferComplexMultiply(const float* aInput, const float* aScale,
227 - float* aOutput, uint32_t aSize) {
228 - MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
229 - MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
230 - MOZ_ASSERT(is_aligned<Arch>(aScale), "aScale is aligned");
231 - MOZ_ASSERT((aSize % xsimd::batch<float, Arch>::size == 0),
232 - "requires tail processing");
235 - for (unsigned i = 0; i < aSize * 2;
236 - i += 2 * xsimd::batch<std::complex<float>, Arch>::size) {
237 - auto in1 = xsimd::batch<std::complex<float>, Arch>::load_aligned(
238 - reinterpret_cast<const std::complex<float>*>(&aInput[i]));
239 - auto in2 = xsimd::batch<std::complex<float>, Arch>::load_aligned(
240 - reinterpret_cast<const std::complex<float>*>(&aScale[i]));
241 - auto out = in1 * in2;
242 - out.store_aligned(reinterpret_cast<std::complex<float>*>(&aOutput[i]));
246 - static float AudioBufferSumOfSquares(const float* aInput, uint32_t aLength) {
249 - if constexpr (Arch::requires_alignment()) {
250 - while (!is_aligned<Arch>(aInput)) {
254 - sum += *aInput * *aInput;
260 - MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
262 - constexpr uint32_t unroll_factor = 4;
263 - xsimd::batch<float, Arch> accs[unroll_factor] = {0.f, 0.f, 0.f, 0.f};
266 - aLength & ~(unroll_factor * xsimd::batch<float, Arch>::size - 1);
267 + float* aOutput, uint32_t aSize);
269 - for (uint32_t i = 0; i < vLength;
270 - i += unroll_factor * xsimd::batch<float, Arch>::size) {
272 - for (uint32_t j = 0; j < unroll_factor; ++j) {
273 - auto in = xsimd::batch<float, Arch>::load_aligned(
274 - &aInput[i + xsimd::batch<float, Arch>::size * j]);
275 - accs[j] = xsimd::fma(in, in, accs[j]);
279 - sum += reduce_add((accs[0] + accs[1]) + (accs[2] + accs[3]));
280 - for (uint32_t i = vLength; i < aLength; ++i) sum += aInput[i] * aInput[i];
283 + static float AudioBufferSumOfSquares(const float* aInput, uint32_t aLength);
285 - static void NaNToZeroInPlace(float* aSamples, size_t aCount) {
286 - if constexpr (Arch::requires_alignment()) {
287 - while (!is_aligned<Arch>(aSamples)) {
291 - if (*aSamples != *aSamples) {
299 - MOZ_ASSERT(is_aligned<Arch>(aSamples), "aSamples is aligned");
301 - uint32_t vCount = aCount & ~(xsimd::batch<float, Arch>::size - 1);
304 - for (uint32_t i = 0; i < vCount; i += xsimd::batch<float, Arch>::size) {
305 - auto vin = xsimd::batch<float, Arch>::load_aligned(&aSamples[i]);
307 - xsimd::select(xsimd::isnan(vin), xsimd::batch<float, Arch>(0.f), vin);
308 - vout.store_aligned(&aSamples[i]);
311 - for (uint32_t i = vCount; i < aCount; i++) {
312 - if (aSamples[i] != aSamples[i]) {
317 + static void NaNToZeroInPlace(float* aSamples, size_t aCount);
319 static void AudioBlockPanStereoToStereo(
320 const float aInputL[WEBAUDIO_BLOCK_SIZE],
321 const float aInputR[WEBAUDIO_BLOCK_SIZE],
322 const float aGainL[WEBAUDIO_BLOCK_SIZE],
323 const float aGainR[WEBAUDIO_BLOCK_SIZE],
324 const bool aIsOnTheLeft[WEBAUDIO_BLOCK_SIZE],
325 - float aOutputL[WEBAUDIO_BLOCK_SIZE],
326 - float aOutputR[WEBAUDIO_BLOCK_SIZE]) {
327 - MOZ_ASSERT(is_aligned<Arch>(aInputL), "aInputL is aligned");
328 - MOZ_ASSERT(is_aligned<Arch>(aInputR), "aInputR is aligned");
329 - MOZ_ASSERT(is_aligned<Arch>(aGainL), "aGainL is aligned");
330 - MOZ_ASSERT(is_aligned<Arch>(aGainR), "aGainR is aligned");
331 - MOZ_ASSERT(is_aligned<Arch>(aIsOnTheLeft), "aIsOnTheLeft is aligned");
332 - MOZ_ASSERT(is_aligned<Arch>(aOutputL), "aOutputL is aligned");
333 - MOZ_ASSERT(is_aligned<Arch>(aOutputR), "aOutputR is aligned");
335 - MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
336 - "requires tail processing");
339 - for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE;
340 - i += xsimd::batch<float, Arch>::size) {
342 - xsimd::batch_bool<float, Arch>::load_aligned(&aIsOnTheLeft[i]);
344 - auto inputL = xsimd::batch<float, Arch>::load_aligned(&aInputL[i]);
345 - auto inputR = xsimd::batch<float, Arch>::load_aligned(&aInputR[i]);
346 - auto gainL = xsimd::batch<float, Arch>::load_aligned(&aGainL[i]);
347 - auto gainR = xsimd::batch<float, Arch>::load_aligned(&aGainR[i]);
349 - auto outL_true = xsimd::fma(inputR, gainL, inputL);
350 - auto outR_true = inputR * gainR;
352 - auto outL_false = inputL * gainL;
353 - auto outR_false = xsimd::fma(inputL, gainR, inputR);
355 - auto outL = xsimd::select(mask, outL_true, outL_false);
356 - auto outR = xsimd::select(mask, outR_true, outR_false);
358 - outL.store_aligned(&aOutputL[i]);
359 - outR.store_aligned(&aOutputR[i]);
362 + float aOutputL[WEBAUDIO_BLOCK_SIZE], float aOutputR[WEBAUDIO_BLOCK_SIZE]);
365 } // namespace mozilla
368 diff --git a/dom/media/webaudio/AudioNodeEngineGenericImpl.h b/dom/media/webaudio/AudioNodeEngineGenericImpl.h
371 +++ b/dom/media/webaudio/AudioNodeEngineGenericImpl.h
373 +/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
374 +/* this source code form is subject to the terms of the mozilla public
375 + * license, v. 2.0. if a copy of the mpl was not distributed with this file,
376 + * You can obtain one at http://mozilla.org/MPL/2.0/. */
378 +#ifndef MOZILLA_AUDIONODEENGINEGENERICIMPL_H_
379 +#define MOZILLA_AUDIONODEENGINEGENERICIMPL_H_
381 +#include "AudioNodeEngineGeneric.h"
382 +#include "AlignmentUtils.h"
384 +#if defined(__GNUC__) && __GNUC__ > 7
385 +# define MOZ_PRAGMA(tokens) _Pragma(#tokens)
386 +# define MOZ_UNROLL(factor) MOZ_PRAGMA(GCC unroll factor)
387 +#elif defined(__INTEL_COMPILER) || (defined(__clang__) && __clang_major__ > 3)
388 +# define MOZ_PRAGMA(tokens) _Pragma(#tokens)
389 +# define MOZ_UNROLL(factor) MOZ_PRAGMA(unroll factor)
391 +# define MOZ_UNROLL(_)
396 +template <class Arch>
397 +static bool is_aligned(const void* ptr) {
398 + return (reinterpret_cast<uintptr_t>(ptr) &
399 + ~(static_cast<uintptr_t>(Arch::alignment()) - 1)) ==
400 + reinterpret_cast<uintptr_t>(ptr);
403 +template <class Arch>
404 +void Engine<Arch>::AudioBufferAddWithScale(const float* aInput, float aScale,
405 + float* aOutput, uint32_t aSize) {
406 + if constexpr (Arch::requires_alignment()) {
407 + if (aScale == 1.0f) {
408 + while (!is_aligned<Arch>(aInput) || !is_aligned<Arch>(aOutput)) {
409 + if (!aSize) return;
410 + *aOutput += *aInput;
416 + while (!is_aligned<Arch>(aInput) || !is_aligned<Arch>(aOutput)) {
417 + if (!aSize) return;
418 + *aOutput += *aInput * aScale;
425 + MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
426 + MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
428 + xsimd::batch<float, Arch> vgain(aScale);
430 + uint32_t aVSize = aSize & ~(xsimd::batch<float, Arch>::size - 1);
432 + for (unsigned i = 0; i < aVSize; i += xsimd::batch<float, Arch>::size) {
433 + auto vin1 = xsimd::batch<float, Arch>::load_aligned(&aInput[i]);
434 + auto vin2 = xsimd::batch<float, Arch>::load_aligned(&aOutput[i]);
435 + auto vout = xsimd::fma(vin1, vgain, vin2);
436 + vout.store_aligned(&aOutput[i]);
439 + for (unsigned i = aVSize; i < aSize; ++i) {
440 + aOutput[i] += aInput[i] * aScale;
444 +template <class Arch>
445 +void Engine<Arch>::AudioBlockCopyChannelWithScale(const float* aInput,
448 + MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
449 + MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
451 + MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
452 + "requires tail processing");
454 + xsimd::batch<float, Arch> vgain = (aScale);
457 + for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
458 + i += xsimd::batch<float, Arch>::size) {
459 + auto vin = xsimd::batch<float, Arch>::load_aligned(&aInput[i]);
460 + auto vout = vin * vgain;
461 + vout.store_aligned(&aOutput[i]);
465 +template <class Arch>
466 +void Engine<Arch>::AudioBlockCopyChannelWithScale(
467 + const float aInput[WEBAUDIO_BLOCK_SIZE],
468 + const float aScale[WEBAUDIO_BLOCK_SIZE],
469 + float aOutput[WEBAUDIO_BLOCK_SIZE]) {
470 + MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
471 + MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
472 + MOZ_ASSERT(is_aligned<Arch>(aScale), "aScale is aligned");
474 + MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
475 + "requires tail processing");
478 + for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
479 + i += xsimd::batch<float, Arch>::size) {
480 + auto vscaled = xsimd::batch<float, Arch>::load_aligned(&aScale[i]);
481 + auto vin = xsimd::batch<float, Arch>::load_aligned(&aInput[i]);
482 + auto vout = vin * vscaled;
483 + vout.store_aligned(&aOutput[i]);
487 +template <class Arch>
488 +void Engine<Arch>::AudioBufferInPlaceScale(float* aBlock, float aScale,
490 + MOZ_ASSERT(is_aligned<Arch>(aBlock), "aBlock is aligned");
492 + xsimd::batch<float, Arch> vgain(aScale);
494 + uint32_t aVSize = aSize & ~(xsimd::batch<float, Arch>::size - 1);
496 + for (unsigned i = 0; i < aVSize; i += xsimd::batch<float, Arch>::size) {
497 + auto vin = xsimd::batch<float, Arch>::load_aligned(&aBlock[i]);
498 + auto vout = vin * vgain;
499 + vout.store_aligned(&aBlock[i]);
501 + for (unsigned i = aVSize; i < aSize; ++i) aBlock[i] *= aScale;
504 +template <class Arch>
505 +void Engine<Arch>::AudioBufferInPlaceScale(float* aBlock, float* aScale,
507 + MOZ_ASSERT(is_aligned<Arch>(aBlock), "aBlock is aligned");
508 + MOZ_ASSERT(is_aligned<Arch>(aScale), "aScale is aligned");
510 + uint32_t aVSize = aSize & ~(xsimd::batch<float, Arch>::size - 1);
512 + for (unsigned i = 0; i < aVSize; i += xsimd::batch<float, Arch>::size) {
513 + auto vin = xsimd::batch<float, Arch>::load_aligned(&aBlock[i]);
514 + auto vgain = xsimd::batch<float, Arch>::load_aligned(&aScale[i]);
515 + auto vout = vin * vgain;
516 + vout.store_aligned(&aBlock[i]);
518 + for (uint32_t i = aVSize; i < aSize; ++i) {
519 + *aBlock++ *= *aScale++;
523 +template <class Arch>
524 +void Engine<Arch>::AudioBlockPanStereoToStereo(
525 + const float aInputL[WEBAUDIO_BLOCK_SIZE],
526 + const float aInputR[WEBAUDIO_BLOCK_SIZE], float aGainL, float aGainR,
527 + bool aIsOnTheLeft, float aOutputL[WEBAUDIO_BLOCK_SIZE],
528 + float aOutputR[WEBAUDIO_BLOCK_SIZE]) {
529 + MOZ_ASSERT(is_aligned<Arch>(aInputL), "aInputL is aligned");
530 + MOZ_ASSERT(is_aligned<Arch>(aInputR), "aInputR is aligned");
531 + MOZ_ASSERT(is_aligned<Arch>(aOutputL), "aOutputL is aligned");
532 + MOZ_ASSERT(is_aligned<Arch>(aOutputR), "aOutputR is aligned");
534 + MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
535 + "requires tail processing");
537 + xsimd::batch<float, Arch> vgainl(aGainL);
538 + xsimd::batch<float, Arch> vgainr(aGainR);
540 + if (aIsOnTheLeft) {
542 + for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
543 + i += xsimd::batch<float, Arch>::size) {
544 + auto vinl = xsimd::batch<float, Arch>::load_aligned(&aInputL[i]);
545 + auto vinr = xsimd::batch<float, Arch>::load_aligned(&aInputR[i]);
547 + /* left channel : aOutputL = aInputL + aInputR * gainL */
548 + auto vout = xsimd::fma(vinr, vgainl, vinl);
549 + vout.store_aligned(&aOutputL[i]);
551 + /* right channel : aOutputR = aInputR * gainR */
552 + auto vscaled = vinr * vgainr;
553 + vscaled.store_aligned(&aOutputR[i]);
557 + for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
558 + i += xsimd::batch<float, Arch>::size) {
559 + auto vinl = xsimd::batch<float, Arch>::load_aligned(&aInputL[i]);
560 + auto vinr = xsimd::batch<float, Arch>::load_aligned(&aInputR[i]);
562 + /* left channel : aInputL * gainL */
563 + auto vscaled = vinl * vgainl;
564 + vscaled.store_aligned(&aOutputL[i]);
566 + /* right channel: aOutputR = aInputR + aInputL * gainR */
567 + auto vout = xsimd::fma(vinl, vgainr, vinr);
568 + vout.store_aligned(&aOutputR[i]);
573 +template <class Arch>
574 +void Engine<Arch>::BufferComplexMultiply(const float* aInput,
575 + const float* aScale, float* aOutput,
577 + MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
578 + MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
579 + MOZ_ASSERT(is_aligned<Arch>(aScale), "aScale is aligned");
580 + MOZ_ASSERT((aSize % xsimd::batch<float, Arch>::size == 0),
581 + "requires tail processing");
584 + for (unsigned i = 0; i < aSize * 2;
585 + i += 2 * xsimd::batch<std::complex<float>, Arch>::size) {
586 + auto in1 = xsimd::batch<std::complex<float>, Arch>::load_aligned(
587 + reinterpret_cast<const std::complex<float>*>(&aInput[i]));
588 + auto in2 = xsimd::batch<std::complex<float>, Arch>::load_aligned(
589 + reinterpret_cast<const std::complex<float>*>(&aScale[i]));
590 + auto out = in1 * in2;
591 + out.store_aligned(reinterpret_cast<std::complex<float>*>(&aOutput[i]));
595 +template <class Arch>
596 +float Engine<Arch>::AudioBufferSumOfSquares(const float* aInput,
597 + uint32_t aLength) {
600 + if constexpr (Arch::requires_alignment()) {
601 + while (!is_aligned<Arch>(aInput)) {
605 + sum += *aInput * *aInput;
611 + MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
613 + constexpr uint32_t unroll_factor = 4;
614 + xsimd::batch<float, Arch> accs[unroll_factor] = {0.f, 0.f, 0.f, 0.f};
617 + aLength & ~(unroll_factor * xsimd::batch<float, Arch>::size - 1);
619 + for (uint32_t i = 0; i < vLength;
620 + i += unroll_factor * xsimd::batch<float, Arch>::size) {
622 + for (uint32_t j = 0; j < unroll_factor; ++j) {
623 + auto in = xsimd::batch<float, Arch>::load_aligned(
624 + &aInput[i + xsimd::batch<float, Arch>::size * j]);
625 + accs[j] = xsimd::fma(in, in, accs[j]);
629 + sum += reduce_add((accs[0] + accs[1]) + (accs[2] + accs[3]));
630 + for (uint32_t i = vLength; i < aLength; ++i) sum += aInput[i] * aInput[i];
634 +template <class Arch>
635 +void Engine<Arch>::NaNToZeroInPlace(float* aSamples, size_t aCount) {
636 + if constexpr (Arch::requires_alignment()) {
637 + while (!is_aligned<Arch>(aSamples)) {
641 + if (*aSamples != *aSamples) {
649 + MOZ_ASSERT(is_aligned<Arch>(aSamples), "aSamples is aligned");
651 + uint32_t vCount = aCount & ~(xsimd::batch<float, Arch>::size - 1);
654 + for (uint32_t i = 0; i < vCount; i += xsimd::batch<float, Arch>::size) {
655 + auto vin = xsimd::batch<float, Arch>::load_aligned(&aSamples[i]);
657 + xsimd::select(xsimd::isnan(vin), xsimd::batch<float, Arch>(0.f), vin);
658 + vout.store_aligned(&aSamples[i]);
661 + for (uint32_t i = vCount; i < aCount; i++) {
662 + if (aSamples[i] != aSamples[i]) {
668 +template <class Arch>
669 +void Engine<Arch>::AudioBlockPanStereoToStereo(
670 + const float aInputL[WEBAUDIO_BLOCK_SIZE],
671 + const float aInputR[WEBAUDIO_BLOCK_SIZE],
672 + const float aGainL[WEBAUDIO_BLOCK_SIZE],
673 + const float aGainR[WEBAUDIO_BLOCK_SIZE],
674 + const bool aIsOnTheLeft[WEBAUDIO_BLOCK_SIZE],
675 + float aOutputL[WEBAUDIO_BLOCK_SIZE], float aOutputR[WEBAUDIO_BLOCK_SIZE]) {
676 + MOZ_ASSERT(is_aligned<Arch>(aInputL), "aInputL is aligned");
677 + MOZ_ASSERT(is_aligned<Arch>(aInputR), "aInputR is aligned");
678 + MOZ_ASSERT(is_aligned<Arch>(aGainL), "aGainL is aligned");
679 + MOZ_ASSERT(is_aligned<Arch>(aGainR), "aGainR is aligned");
680 + MOZ_ASSERT(is_aligned<Arch>(aIsOnTheLeft), "aIsOnTheLeft is aligned");
681 + MOZ_ASSERT(is_aligned<Arch>(aOutputL), "aOutputL is aligned");
682 + MOZ_ASSERT(is_aligned<Arch>(aOutputR), "aOutputR is aligned");
684 + MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
685 + "requires tail processing");
688 + for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE;
689 + i += xsimd::batch<float, Arch>::size) {
690 + auto mask = xsimd::batch_bool<float, Arch>::load_aligned(&aIsOnTheLeft[i]);
692 + auto inputL = xsimd::batch<float, Arch>::load_aligned(&aInputL[i]);
693 + auto inputR = xsimd::batch<float, Arch>::load_aligned(&aInputR[i]);
694 + auto gainL = xsimd::batch<float, Arch>::load_aligned(&aGainL[i]);
695 + auto gainR = xsimd::batch<float, Arch>::load_aligned(&aGainR[i]);
697 + auto outL_true = xsimd::fma(inputR, gainL, inputL);
698 + auto outR_true = inputR * gainR;
700 + auto outL_false = inputL * gainL;
701 + auto outR_false = xsimd::fma(inputL, gainR, inputR);
703 + auto outL = xsimd::select(mask, outL_true, outL_false);
704 + auto outR = xsimd::select(mask, outR_true, outR_false);
706 + outL.store_aligned(&aOutputL[i]);
707 + outR.store_aligned(&aOutputR[i]);
711 +} // namespace mozilla
714 diff --git a/dom/media/webaudio/AudioNodeEngineNEON.cpp b/dom/media/webaudio/AudioNodeEngineNEON.cpp
715 --- a/dom/media/webaudio/AudioNodeEngineNEON.cpp
716 +++ b/dom/media/webaudio/AudioNodeEngineNEON.cpp
718 /* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
719 /* this source code form is subject to the terms of the mozilla public
720 * license, v. 2.0. if a copy of the mpl was not distributed with this file,
721 * You can obtain one at http://mozilla.org/MPL/2.0/. */
723 -#include "AudioNodeEngineGeneric.h"
724 +#include "AudioNodeEngineGenericImpl.h"
726 template struct Engine<xsimd::neon>;
727 } // namespace mozilla
728 diff --git a/dom/media/webaudio/AudioNodeEngineSSE2.cpp b/dom/media/webaudio/AudioNodeEngineSSE2.cpp
729 --- a/dom/media/webaudio/AudioNodeEngineSSE2.cpp
730 +++ b/dom/media/webaudio/AudioNodeEngineSSE2.cpp
732 /* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
733 /* this source code form is subject to the terms of the mozilla public
734 * license, v. 2.0. if a copy of the mpl was not distributed with this file,
735 * You can obtain one at http://mozilla.org/MPL/2.0/. */
737 -#include "AudioNodeEngineGeneric.h"
738 +#include "AudioNodeEngineGenericImpl.h"
741 template struct Engine<xsimd::sse2>;
742 } // namespace mozilla
743 diff --git a/dom/media/webaudio/AudioNodeEngineSSE4_2_FMA3.cpp b/dom/media/webaudio/AudioNodeEngineSSE4_2_FMA3.cpp
744 --- a/dom/media/webaudio/AudioNodeEngineSSE4_2_FMA3.cpp
745 +++ b/dom/media/webaudio/AudioNodeEngineSSE4_2_FMA3.cpp
747 /* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
748 /* this source code form is subject to the terms of the mozilla public
749 * license, v. 2.0. if a copy of the mpl was not distributed with this file,
750 * You can obtain one at http://mozilla.org/MPL/2.0/. */
752 -#include "AudioNodeEngineGeneric.h"
753 +#include "AudioNodeEngineGenericImpl.h"
756 template struct Engine<xsimd::fma3<xsimd::sse4_2>>;
757 } // namespace mozilla