diff --git a/dom/media/webaudio/AudioNodeEngineGeneric.h b/dom/media/webaudio/AudioNodeEngineGeneric.h --- a/dom/media/webaudio/AudioNodeEngineGeneric.h +++ b/dom/media/webaudio/AudioNodeEngineGeneric.h @@ -203,14 +203,14 @@ MOZ_ASSERT((aSize % xsimd::batch::size == 0), "requires tail processing"); MOZ_UNROLL(2) for (unsigned i = 0; i < aSize * 2; - i += 2 * xsimd::batch>::size) { - auto in1 = xsimd::batch>::load_aligned( + i += 2 * xsimd::batch, Arch>::size) { + auto in1 = xsimd::batch, Arch>::load_aligned( reinterpret_cast*>(&aInput[i])); - auto in2 = xsimd::batch>::load_aligned( + auto in2 = xsimd::batch, Arch>::load_aligned( reinterpret_cast*>(&aScale[i])); auto out = in1 * in2; out.store_aligned(reinterpret_cast*>(&aOutput[i])); } }; diff --git a/dom/media/webaudio/AudioNodeEngineGeneric.h b/dom/media/webaudio/AudioNodeEngineGeneric.h --- a/dom/media/webaudio/AudioNodeEngineGeneric.h +++ b/dom/media/webaudio/AudioNodeEngineGeneric.h @@ -5,331 +5,54 @@ #ifndef MOZILLA_AUDIONODEENGINEGENERIC_H_ #define MOZILLA_AUDIONODEENGINEGENERIC_H_ #include "AudioNodeEngine.h" -#include "AlignmentUtils.h" #include "xsimd/xsimd.hpp" -#if defined(__GNUC__) && __GNUC__ > 7 -# define MOZ_PRAGMA(tokens) _Pragma(#tokens) -# define MOZ_UNROLL(factor) MOZ_PRAGMA(GCC unroll factor) -#elif defined(__INTEL_COMPILER) || (defined(__clang__) && __clang_major__ > 3) -# define MOZ_PRAGMA(tokens) _Pragma(#tokens) -# define MOZ_UNROLL(factor) MOZ_PRAGMA(unroll factor) -#else -# define MOZ_UNROLL(_) -#endif - namespace mozilla { template -static bool is_aligned(const void* ptr) { - return (reinterpret_cast(ptr) & - ~(static_cast(Arch::alignment()) - 1)) == - reinterpret_cast(ptr); -}; - -template struct Engine { static void AudioBufferAddWithScale(const float* aInput, float aScale, - float* aOutput, uint32_t aSize) { - if constexpr (Arch::requires_alignment()) { - if (aScale == 1.0f) { - while (!is_aligned(aInput) || !is_aligned(aOutput)) { - if (!aSize) return; - *aOutput += *aInput; - ++aOutput; - ++aInput; - --aSize; - } - } else { - while (!is_aligned(aInput) || !is_aligned(aOutput)) { - if (!aSize) return; - *aOutput += *aInput * aScale; - ++aOutput; - ++aInput; - --aSize; - } - } - } - MOZ_ASSERT(is_aligned(aInput), "aInput is aligned"); - MOZ_ASSERT(is_aligned(aOutput), "aOutput is aligned"); - - xsimd::batch vgain(aScale); - - uint32_t aVSize = aSize & ~(xsimd::batch::size - 1); - MOZ_UNROLL(4) - for (unsigned i = 0; i < aVSize; i += xsimd::batch::size) { - auto vin1 = xsimd::batch::load_aligned(&aInput[i]); - auto vin2 = xsimd::batch::load_aligned(&aOutput[i]); - auto vout = xsimd::fma(vin1, vgain, vin2); - vout.store_aligned(&aOutput[i]); - } - - for (unsigned i = aVSize; i < aSize; ++i) { - aOutput[i] += aInput[i] * aScale; - } - }; + float* aOutput, uint32_t aSize); static void AudioBlockCopyChannelWithScale(const float* aInput, float aScale, - float* aOutput) { - MOZ_ASSERT(is_aligned(aInput), "aInput is aligned"); - MOZ_ASSERT(is_aligned(aOutput), "aOutput is aligned"); - - MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch::size == 0), - "requires tail processing"); - - xsimd::batch vgain = (aScale); - - MOZ_UNROLL(4) - for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; - i += xsimd::batch::size) { - auto vin = xsimd::batch::load_aligned(&aInput[i]); - auto vout = vin * vgain; - vout.store_aligned(&aOutput[i]); - } - }; + float* aOutput); static void AudioBlockCopyChannelWithScale( const float aInput[WEBAUDIO_BLOCK_SIZE], const float aScale[WEBAUDIO_BLOCK_SIZE], - float aOutput[WEBAUDIO_BLOCK_SIZE]) { - MOZ_ASSERT(is_aligned(aInput), "aInput is aligned"); - MOZ_ASSERT(is_aligned(aOutput), "aOutput is aligned"); - MOZ_ASSERT(is_aligned(aScale), "aScale is aligned"); - - MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch::size == 0), - "requires tail processing"); - - MOZ_UNROLL(4) - for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; - i += xsimd::batch::size) { - auto vscaled = xsimd::batch::load_aligned(&aScale[i]); - auto vin = xsimd::batch::load_aligned(&aInput[i]); - auto vout = vin * vscaled; - vout.store_aligned(&aOutput[i]); - } - }; + float aOutput[WEBAUDIO_BLOCK_SIZE]); static void AudioBufferInPlaceScale(float* aBlock, float aScale, - uint32_t aSize) { - MOZ_ASSERT(is_aligned(aBlock), "aBlock is aligned"); - - xsimd::batch vgain(aScale); - - uint32_t aVSize = aSize & ~(xsimd::batch::size - 1); - MOZ_UNROLL(4) - for (unsigned i = 0; i < aVSize; i += xsimd::batch::size) { - auto vin = xsimd::batch::load_aligned(&aBlock[i]); - auto vout = vin * vgain; - vout.store_aligned(&aBlock[i]); - } - for (unsigned i = aVSize; i < aSize; ++i) aBlock[i] *= aScale; - }; + uint32_t aSize); static void AudioBufferInPlaceScale(float* aBlock, float* aScale, - uint32_t aSize) { - MOZ_ASSERT(is_aligned(aBlock), "aBlock is aligned"); - MOZ_ASSERT(is_aligned(aScale), "aScale is aligned"); - - uint32_t aVSize = aSize & ~(xsimd::batch::size - 1); - MOZ_UNROLL(4) - for (unsigned i = 0; i < aVSize; i += xsimd::batch::size) { - auto vin = xsimd::batch::load_aligned(&aBlock[i]); - auto vgain = xsimd::batch::load_aligned(&aScale[i]); - auto vout = vin * vgain; - vout.store_aligned(&aBlock[i]); - } - for (uint32_t i = aVSize; i < aSize; ++i) { - *aBlock++ *= *aScale++; - } - }; + uint32_t aSize); static void AudioBlockPanStereoToStereo( const float aInputL[WEBAUDIO_BLOCK_SIZE], const float aInputR[WEBAUDIO_BLOCK_SIZE], float aGainL, float aGainR, bool aIsOnTheLeft, float aOutputL[WEBAUDIO_BLOCK_SIZE], - float aOutputR[WEBAUDIO_BLOCK_SIZE]) { - MOZ_ASSERT(is_aligned(aInputL), "aInputL is aligned"); - MOZ_ASSERT(is_aligned(aInputR), "aInputR is aligned"); - MOZ_ASSERT(is_aligned(aOutputL), "aOutputL is aligned"); - MOZ_ASSERT(is_aligned(aOutputR), "aOutputR is aligned"); - - MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch::size == 0), - "requires tail processing"); - - xsimd::batch vgainl(aGainL); - xsimd::batch vgainr(aGainR); - - if (aIsOnTheLeft) { - MOZ_UNROLL(2) - for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; - i += xsimd::batch::size) { - auto vinl = xsimd::batch::load_aligned(&aInputL[i]); - auto vinr = xsimd::batch::load_aligned(&aInputR[i]); - - /* left channel : aOutputL = aInputL + aInputR * gainL */ - auto vout = xsimd::fma(vinr, vgainl, vinl); - vout.store_aligned(&aOutputL[i]); - - /* right channel : aOutputR = aInputR * gainR */ - auto vscaled = vinr * vgainr; - vscaled.store_aligned(&aOutputR[i]); - } - } else { - MOZ_UNROLL(2) - for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; - i += xsimd::batch::size) { - auto vinl = xsimd::batch::load_aligned(&aInputL[i]); - auto vinr = xsimd::batch::load_aligned(&aInputR[i]); - - /* left channel : aInputL * gainL */ - auto vscaled = vinl * vgainl; - vscaled.store_aligned(&aOutputL[i]); - - /* right channel: aOutputR = aInputR + aInputL * gainR */ - auto vout = xsimd::fma(vinl, vgainr, vinr); - vout.store_aligned(&aOutputR[i]); - } - } - }; + float aOutputR[WEBAUDIO_BLOCK_SIZE]); static void BufferComplexMultiply(const float* aInput, const float* aScale, - float* aOutput, uint32_t aSize) { - MOZ_ASSERT(is_aligned(aInput), "aInput is aligned"); - MOZ_ASSERT(is_aligned(aOutput), "aOutput is aligned"); - MOZ_ASSERT(is_aligned(aScale), "aScale is aligned"); - MOZ_ASSERT((aSize % xsimd::batch::size == 0), - "requires tail processing"); - - MOZ_UNROLL(2) - for (unsigned i = 0; i < aSize * 2; - i += 2 * xsimd::batch, Arch>::size) { - auto in1 = xsimd::batch, Arch>::load_aligned( - reinterpret_cast*>(&aInput[i])); - auto in2 = xsimd::batch, Arch>::load_aligned( - reinterpret_cast*>(&aScale[i])); - auto out = in1 * in2; - out.store_aligned(reinterpret_cast*>(&aOutput[i])); - } - }; - - static float AudioBufferSumOfSquares(const float* aInput, uint32_t aLength) { - float sum = 0.f; - - if constexpr (Arch::requires_alignment()) { - while (!is_aligned(aInput)) { - if (!aLength) { - return sum; - } - sum += *aInput * *aInput; - ++aInput; - --aLength; - } - } - - MOZ_ASSERT(is_aligned(aInput), "aInput is aligned"); - - constexpr uint32_t unroll_factor = 4; - xsimd::batch accs[unroll_factor] = {0.f, 0.f, 0.f, 0.f}; - - uint32_t vLength = - aLength & ~(unroll_factor * xsimd::batch::size - 1); + float* aOutput, uint32_t aSize); - for (uint32_t i = 0; i < vLength; - i += unroll_factor * xsimd::batch::size) { - MOZ_UNROLL(4) - for (uint32_t j = 0; j < unroll_factor; ++j) { - auto in = xsimd::batch::load_aligned( - &aInput[i + xsimd::batch::size * j]); - accs[j] = xsimd::fma(in, in, accs[j]); - } - } - - sum += reduce_add((accs[0] + accs[1]) + (accs[2] + accs[3])); - for (uint32_t i = vLength; i < aLength; ++i) sum += aInput[i] * aInput[i]; - return sum; - }; + static float AudioBufferSumOfSquares(const float* aInput, uint32_t aLength); - static void NaNToZeroInPlace(float* aSamples, size_t aCount) { - if constexpr (Arch::requires_alignment()) { - while (!is_aligned(aSamples)) { - if (!aCount) { - return; - } - if (*aSamples != *aSamples) { - *aSamples = 0.0; - } - ++aSamples; - --aCount; - } - } - - MOZ_ASSERT(is_aligned(aSamples), "aSamples is aligned"); - - uint32_t vCount = aCount & ~(xsimd::batch::size - 1); - - MOZ_UNROLL(4) - for (uint32_t i = 0; i < vCount; i += xsimd::batch::size) { - auto vin = xsimd::batch::load_aligned(&aSamples[i]); - auto vout = - xsimd::select(xsimd::isnan(vin), xsimd::batch(0.f), vin); - vout.store_aligned(&aSamples[i]); - } - - for (uint32_t i = vCount; i < aCount; i++) { - if (aSamples[i] != aSamples[i]) { - aSamples[i] = 0.0; - } - } - }; + static void NaNToZeroInPlace(float* aSamples, size_t aCount); static void AudioBlockPanStereoToStereo( const float aInputL[WEBAUDIO_BLOCK_SIZE], const float aInputR[WEBAUDIO_BLOCK_SIZE], const float aGainL[WEBAUDIO_BLOCK_SIZE], const float aGainR[WEBAUDIO_BLOCK_SIZE], const bool aIsOnTheLeft[WEBAUDIO_BLOCK_SIZE], - float aOutputL[WEBAUDIO_BLOCK_SIZE], - float aOutputR[WEBAUDIO_BLOCK_SIZE]) { - MOZ_ASSERT(is_aligned(aInputL), "aInputL is aligned"); - MOZ_ASSERT(is_aligned(aInputR), "aInputR is aligned"); - MOZ_ASSERT(is_aligned(aGainL), "aGainL is aligned"); - MOZ_ASSERT(is_aligned(aGainR), "aGainR is aligned"); - MOZ_ASSERT(is_aligned(aIsOnTheLeft), "aIsOnTheLeft is aligned"); - MOZ_ASSERT(is_aligned(aOutputL), "aOutputL is aligned"); - MOZ_ASSERT(is_aligned(aOutputR), "aOutputR is aligned"); - - MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch::size == 0), - "requires tail processing"); - - MOZ_UNROLL(2) - for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; - i += xsimd::batch::size) { - auto mask = - xsimd::batch_bool::load_aligned(&aIsOnTheLeft[i]); - - auto inputL = xsimd::batch::load_aligned(&aInputL[i]); - auto inputR = xsimd::batch::load_aligned(&aInputR[i]); - auto gainL = xsimd::batch::load_aligned(&aGainL[i]); - auto gainR = xsimd::batch::load_aligned(&aGainR[i]); - - auto outL_true = xsimd::fma(inputR, gainL, inputL); - auto outR_true = inputR * gainR; - - auto outL_false = inputL * gainL; - auto outR_false = xsimd::fma(inputL, gainR, inputR); - - auto outL = xsimd::select(mask, outL_true, outL_false); - auto outR = xsimd::select(mask, outR_true, outR_false); - - outL.store_aligned(&aOutputL[i]); - outR.store_aligned(&aOutputR[i]); - } - } + float aOutputL[WEBAUDIO_BLOCK_SIZE], float aOutputR[WEBAUDIO_BLOCK_SIZE]); }; } // namespace mozilla #endif diff --git a/dom/media/webaudio/AudioNodeEngineGenericImpl.h b/dom/media/webaudio/AudioNodeEngineGenericImpl.h new file mode 100644 --- /dev/null +++ b/dom/media/webaudio/AudioNodeEngineGenericImpl.h @@ -0,0 +1,341 @@ +/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* this source code form is subject to the terms of the mozilla public + * license, v. 2.0. if a copy of the mpl was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZILLA_AUDIONODEENGINEGENERICIMPL_H_ +#define MOZILLA_AUDIONODEENGINEGENERICIMPL_H_ + +#include "AudioNodeEngineGeneric.h" +#include "AlignmentUtils.h" + +#if defined(__GNUC__) && __GNUC__ > 7 +# define MOZ_PRAGMA(tokens) _Pragma(#tokens) +# define MOZ_UNROLL(factor) MOZ_PRAGMA(GCC unroll factor) +#elif defined(__INTEL_COMPILER) || (defined(__clang__) && __clang_major__ > 3) +# define MOZ_PRAGMA(tokens) _Pragma(#tokens) +# define MOZ_UNROLL(factor) MOZ_PRAGMA(unroll factor) +#else +# define MOZ_UNROLL(_) +#endif + +namespace mozilla { + +template +static bool is_aligned(const void* ptr) { + return (reinterpret_cast(ptr) & + ~(static_cast(Arch::alignment()) - 1)) == + reinterpret_cast(ptr); +}; + +template +void Engine::AudioBufferAddWithScale(const float* aInput, float aScale, + float* aOutput, uint32_t aSize) { + if constexpr (Arch::requires_alignment()) { + if (aScale == 1.0f) { + while (!is_aligned(aInput) || !is_aligned(aOutput)) { + if (!aSize) return; + *aOutput += *aInput; + ++aOutput; + ++aInput; + --aSize; + } + } else { + while (!is_aligned(aInput) || !is_aligned(aOutput)) { + if (!aSize) return; + *aOutput += *aInput * aScale; + ++aOutput; + ++aInput; + --aSize; + } + } + } + MOZ_ASSERT(is_aligned(aInput), "aInput is aligned"); + MOZ_ASSERT(is_aligned(aOutput), "aOutput is aligned"); + + xsimd::batch vgain(aScale); + + uint32_t aVSize = aSize & ~(xsimd::batch::size - 1); + MOZ_UNROLL(4) + for (unsigned i = 0; i < aVSize; i += xsimd::batch::size) { + auto vin1 = xsimd::batch::load_aligned(&aInput[i]); + auto vin2 = xsimd::batch::load_aligned(&aOutput[i]); + auto vout = xsimd::fma(vin1, vgain, vin2); + vout.store_aligned(&aOutput[i]); + } + + for (unsigned i = aVSize; i < aSize; ++i) { + aOutput[i] += aInput[i] * aScale; + } +} + +template +void Engine::AudioBlockCopyChannelWithScale(const float* aInput, + float aScale, + float* aOutput) { + MOZ_ASSERT(is_aligned(aInput), "aInput is aligned"); + MOZ_ASSERT(is_aligned(aOutput), "aOutput is aligned"); + + MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch::size == 0), + "requires tail processing"); + + xsimd::batch vgain = (aScale); + + MOZ_UNROLL(4) + for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; + i += xsimd::batch::size) { + auto vin = xsimd::batch::load_aligned(&aInput[i]); + auto vout = vin * vgain; + vout.store_aligned(&aOutput[i]); + } +}; + +template +void Engine::AudioBlockCopyChannelWithScale( + const float aInput[WEBAUDIO_BLOCK_SIZE], + const float aScale[WEBAUDIO_BLOCK_SIZE], + float aOutput[WEBAUDIO_BLOCK_SIZE]) { + MOZ_ASSERT(is_aligned(aInput), "aInput is aligned"); + MOZ_ASSERT(is_aligned(aOutput), "aOutput is aligned"); + MOZ_ASSERT(is_aligned(aScale), "aScale is aligned"); + + MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch::size == 0), + "requires tail processing"); + + MOZ_UNROLL(4) + for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; + i += xsimd::batch::size) { + auto vscaled = xsimd::batch::load_aligned(&aScale[i]); + auto vin = xsimd::batch::load_aligned(&aInput[i]); + auto vout = vin * vscaled; + vout.store_aligned(&aOutput[i]); + } +}; + +template +void Engine::AudioBufferInPlaceScale(float* aBlock, float aScale, + uint32_t aSize) { + MOZ_ASSERT(is_aligned(aBlock), "aBlock is aligned"); + + xsimd::batch vgain(aScale); + + uint32_t aVSize = aSize & ~(xsimd::batch::size - 1); + MOZ_UNROLL(4) + for (unsigned i = 0; i < aVSize; i += xsimd::batch::size) { + auto vin = xsimd::batch::load_aligned(&aBlock[i]); + auto vout = vin * vgain; + vout.store_aligned(&aBlock[i]); + } + for (unsigned i = aVSize; i < aSize; ++i) aBlock[i] *= aScale; +}; + +template +void Engine::AudioBufferInPlaceScale(float* aBlock, float* aScale, + uint32_t aSize) { + MOZ_ASSERT(is_aligned(aBlock), "aBlock is aligned"); + MOZ_ASSERT(is_aligned(aScale), "aScale is aligned"); + + uint32_t aVSize = aSize & ~(xsimd::batch::size - 1); + MOZ_UNROLL(4) + for (unsigned i = 0; i < aVSize; i += xsimd::batch::size) { + auto vin = xsimd::batch::load_aligned(&aBlock[i]); + auto vgain = xsimd::batch::load_aligned(&aScale[i]); + auto vout = vin * vgain; + vout.store_aligned(&aBlock[i]); + } + for (uint32_t i = aVSize; i < aSize; ++i) { + *aBlock++ *= *aScale++; + } +}; + +template +void Engine::AudioBlockPanStereoToStereo( + const float aInputL[WEBAUDIO_BLOCK_SIZE], + const float aInputR[WEBAUDIO_BLOCK_SIZE], float aGainL, float aGainR, + bool aIsOnTheLeft, float aOutputL[WEBAUDIO_BLOCK_SIZE], + float aOutputR[WEBAUDIO_BLOCK_SIZE]) { + MOZ_ASSERT(is_aligned(aInputL), "aInputL is aligned"); + MOZ_ASSERT(is_aligned(aInputR), "aInputR is aligned"); + MOZ_ASSERT(is_aligned(aOutputL), "aOutputL is aligned"); + MOZ_ASSERT(is_aligned(aOutputR), "aOutputR is aligned"); + + MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch::size == 0), + "requires tail processing"); + + xsimd::batch vgainl(aGainL); + xsimd::batch vgainr(aGainR); + + if (aIsOnTheLeft) { + MOZ_UNROLL(2) + for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; + i += xsimd::batch::size) { + auto vinl = xsimd::batch::load_aligned(&aInputL[i]); + auto vinr = xsimd::batch::load_aligned(&aInputR[i]); + + /* left channel : aOutputL = aInputL + aInputR * gainL */ + auto vout = xsimd::fma(vinr, vgainl, vinl); + vout.store_aligned(&aOutputL[i]); + + /* right channel : aOutputR = aInputR * gainR */ + auto vscaled = vinr * vgainr; + vscaled.store_aligned(&aOutputR[i]); + } + } else { + MOZ_UNROLL(2) + for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; + i += xsimd::batch::size) { + auto vinl = xsimd::batch::load_aligned(&aInputL[i]); + auto vinr = xsimd::batch::load_aligned(&aInputR[i]); + + /* left channel : aInputL * gainL */ + auto vscaled = vinl * vgainl; + vscaled.store_aligned(&aOutputL[i]); + + /* right channel: aOutputR = aInputR + aInputL * gainR */ + auto vout = xsimd::fma(vinl, vgainr, vinr); + vout.store_aligned(&aOutputR[i]); + } + } +}; + +template +void Engine::BufferComplexMultiply(const float* aInput, + const float* aScale, float* aOutput, + uint32_t aSize) { + MOZ_ASSERT(is_aligned(aInput), "aInput is aligned"); + MOZ_ASSERT(is_aligned(aOutput), "aOutput is aligned"); + MOZ_ASSERT(is_aligned(aScale), "aScale is aligned"); + MOZ_ASSERT((aSize % xsimd::batch::size == 0), + "requires tail processing"); + + MOZ_UNROLL(2) + for (unsigned i = 0; i < aSize * 2; + i += 2 * xsimd::batch, Arch>::size) { + auto in1 = xsimd::batch, Arch>::load_aligned( + reinterpret_cast*>(&aInput[i])); + auto in2 = xsimd::batch, Arch>::load_aligned( + reinterpret_cast*>(&aScale[i])); + auto out = in1 * in2; + out.store_aligned(reinterpret_cast*>(&aOutput[i])); + } +}; + +template +float Engine::AudioBufferSumOfSquares(const float* aInput, + uint32_t aLength) { + float sum = 0.f; + + if constexpr (Arch::requires_alignment()) { + while (!is_aligned(aInput)) { + if (!aLength) { + return sum; + } + sum += *aInput * *aInput; + ++aInput; + --aLength; + } + } + + MOZ_ASSERT(is_aligned(aInput), "aInput is aligned"); + + constexpr uint32_t unroll_factor = 4; + xsimd::batch accs[unroll_factor] = {0.f, 0.f, 0.f, 0.f}; + + uint32_t vLength = + aLength & ~(unroll_factor * xsimd::batch::size - 1); + + for (uint32_t i = 0; i < vLength; + i += unroll_factor * xsimd::batch::size) { + MOZ_UNROLL(4) + for (uint32_t j = 0; j < unroll_factor; ++j) { + auto in = xsimd::batch::load_aligned( + &aInput[i + xsimd::batch::size * j]); + accs[j] = xsimd::fma(in, in, accs[j]); + } + } + + sum += reduce_add((accs[0] + accs[1]) + (accs[2] + accs[3])); + for (uint32_t i = vLength; i < aLength; ++i) sum += aInput[i] * aInput[i]; + return sum; +}; + +template +void Engine::NaNToZeroInPlace(float* aSamples, size_t aCount) { + if constexpr (Arch::requires_alignment()) { + while (!is_aligned(aSamples)) { + if (!aCount) { + return; + } + if (*aSamples != *aSamples) { + *aSamples = 0.0; + } + ++aSamples; + --aCount; + } + } + + MOZ_ASSERT(is_aligned(aSamples), "aSamples is aligned"); + + uint32_t vCount = aCount & ~(xsimd::batch::size - 1); + + MOZ_UNROLL(4) + for (uint32_t i = 0; i < vCount; i += xsimd::batch::size) { + auto vin = xsimd::batch::load_aligned(&aSamples[i]); + auto vout = + xsimd::select(xsimd::isnan(vin), xsimd::batch(0.f), vin); + vout.store_aligned(&aSamples[i]); + } + + for (uint32_t i = vCount; i < aCount; i++) { + if (aSamples[i] != aSamples[i]) { + aSamples[i] = 0.0; + } + } +}; + +template +void Engine::AudioBlockPanStereoToStereo( + const float aInputL[WEBAUDIO_BLOCK_SIZE], + const float aInputR[WEBAUDIO_BLOCK_SIZE], + const float aGainL[WEBAUDIO_BLOCK_SIZE], + const float aGainR[WEBAUDIO_BLOCK_SIZE], + const bool aIsOnTheLeft[WEBAUDIO_BLOCK_SIZE], + float aOutputL[WEBAUDIO_BLOCK_SIZE], float aOutputR[WEBAUDIO_BLOCK_SIZE]) { + MOZ_ASSERT(is_aligned(aInputL), "aInputL is aligned"); + MOZ_ASSERT(is_aligned(aInputR), "aInputR is aligned"); + MOZ_ASSERT(is_aligned(aGainL), "aGainL is aligned"); + MOZ_ASSERT(is_aligned(aGainR), "aGainR is aligned"); + MOZ_ASSERT(is_aligned(aIsOnTheLeft), "aIsOnTheLeft is aligned"); + MOZ_ASSERT(is_aligned(aOutputL), "aOutputL is aligned"); + MOZ_ASSERT(is_aligned(aOutputR), "aOutputR is aligned"); + + MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch::size == 0), + "requires tail processing"); + + MOZ_UNROLL(2) + for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; + i += xsimd::batch::size) { + auto mask = xsimd::batch_bool::load_aligned(&aIsOnTheLeft[i]); + + auto inputL = xsimd::batch::load_aligned(&aInputL[i]); + auto inputR = xsimd::batch::load_aligned(&aInputR[i]); + auto gainL = xsimd::batch::load_aligned(&aGainL[i]); + auto gainR = xsimd::batch::load_aligned(&aGainR[i]); + + auto outL_true = xsimd::fma(inputR, gainL, inputL); + auto outR_true = inputR * gainR; + + auto outL_false = inputL * gainL; + auto outR_false = xsimd::fma(inputL, gainR, inputR); + + auto outL = xsimd::select(mask, outL_true, outL_false); + auto outR = xsimd::select(mask, outR_true, outR_false); + + outL.store_aligned(&aOutputL[i]); + outR.store_aligned(&aOutputR[i]); + } +} + +} // namespace mozilla + +#endif diff --git a/dom/media/webaudio/AudioNodeEngineNEON.cpp b/dom/media/webaudio/AudioNodeEngineNEON.cpp --- a/dom/media/webaudio/AudioNodeEngineNEON.cpp +++ b/dom/media/webaudio/AudioNodeEngineNEON.cpp @@ -1,9 +1,9 @@ /* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* this source code form is subject to the terms of the mozilla public * license, v. 2.0. if a copy of the mpl was not distributed with this file, * You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include "AudioNodeEngineGeneric.h" +#include "AudioNodeEngineGenericImpl.h" namespace mozilla { template struct Engine; } // namespace mozilla diff --git a/dom/media/webaudio/AudioNodeEngineSSE2.cpp b/dom/media/webaudio/AudioNodeEngineSSE2.cpp --- a/dom/media/webaudio/AudioNodeEngineSSE2.cpp +++ b/dom/media/webaudio/AudioNodeEngineSSE2.cpp @@ -1,10 +1,10 @@ /* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* this source code form is subject to the terms of the mozilla public * license, v. 2.0. if a copy of the mpl was not distributed with this file, * You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include "AudioNodeEngineGeneric.h" +#include "AudioNodeEngineGenericImpl.h" namespace mozilla { template struct Engine; } // namespace mozilla diff --git a/dom/media/webaudio/AudioNodeEngineSSE4_2_FMA3.cpp b/dom/media/webaudio/AudioNodeEngineSSE4_2_FMA3.cpp --- a/dom/media/webaudio/AudioNodeEngineSSE4_2_FMA3.cpp +++ b/dom/media/webaudio/AudioNodeEngineSSE4_2_FMA3.cpp @@ -1,10 +1,10 @@ /* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* this source code form is subject to the terms of the mozilla public * license, v. 2.0. if a copy of the mpl was not distributed with this file, * You can obtain one at http://mozilla.org/MPL/2.0/. */ -#include "AudioNodeEngineGeneric.h" +#include "AudioNodeEngineGenericImpl.h" namespace mozilla { template struct Engine>; } // namespace mozilla