more upstream fixes in attempt to fix i686 build

author Jan Palus <atler@pld-linux.org>

Tue, 21 Mar 2023 08:53:36 +0000 (09:53 +0100)

committer Jan Palus <atler@pld-linux.org>

Tue, 21 Mar 2023 08:53:36 +0000 (09:53 +0100)
author Jan Palus <atler@pld-linux.org>
Tue, 21 Mar 2023 08:53:36 +0000 (09:53 +0100)
committer Jan Palus <atler@pld-linux.org>
Tue, 21 Mar 2023 08:53:36 +0000 (09:53 +0100)
diff --git a/xsimd.patch b/xsimd.patch

index b4189c01030dc35889eaea9e62d0eb282fb888e7..0141b8932b01ab957295d02f93b2d2e510d24c51 100644 (file)
--- a/xsimd.patch
+++ b/xsimd.patch
@@ -20,3 +20,739 @@ diff --git a/dom/media/webaudio/AudioNodeEngineGeneric.h b/dom/media/webaudio/Au
       }
     };
  
+diff --git a/dom/media/webaudio/AudioNodeEngineGeneric.h b/dom/media/webaudio/AudioNodeEngineGeneric.h
+--- a/dom/media/webaudio/AudioNodeEngineGeneric.h
++++ b/dom/media/webaudio/AudioNodeEngineGeneric.h
+@@ -5,331 +5,54 @@
+ 
+ #ifndef MOZILLA_AUDIONODEENGINEGENERIC_H_
+ #define MOZILLA_AUDIONODEENGINEGENERIC_H_
+ 
+ #include "AudioNodeEngine.h"
+-#include "AlignmentUtils.h"
+ 
+ #include "xsimd/xsimd.hpp"
+ 
+-#if defined(__GNUC__) && __GNUC__ > 7
+-#  define MOZ_PRAGMA(tokens) _Pragma(#tokens)
+-#  define MOZ_UNROLL(factor) MOZ_PRAGMA(GCC unroll factor)
+-#elif defined(__INTEL_COMPILER) || (defined(__clang__) && __clang_major__ > 3)
+-#  define MOZ_PRAGMA(tokens) _Pragma(#tokens)
+-#  define MOZ_UNROLL(factor) MOZ_PRAGMA(unroll factor)
+-#else
+-#  define MOZ_UNROLL(_)
+-#endif
+-
+ namespace mozilla {
+ 
+ template <class Arch>
+-static bool is_aligned(const void* ptr) {
+-  return (reinterpret_cast<uintptr_t>(ptr) &
+-          ~(static_cast<uintptr_t>(Arch::alignment()) - 1)) ==
+-         reinterpret_cast<uintptr_t>(ptr);
+-};
+-
+-template <class Arch>
+ struct Engine {
+   static void AudioBufferAddWithScale(const float* aInput, float aScale,
+-                                      float* aOutput, uint32_t aSize) {
+-    if constexpr (Arch::requires_alignment()) {
+-      if (aScale == 1.0f) {
+-        while (!is_aligned<Arch>(aInput) || !is_aligned<Arch>(aOutput)) {
+-          if (!aSize) return;
+-          *aOutput += *aInput;
+-          ++aOutput;
+-          ++aInput;
+-          --aSize;
+-        }
+-      } else {
+-        while (!is_aligned<Arch>(aInput) || !is_aligned<Arch>(aOutput)) {
+-          if (!aSize) return;
+-          *aOutput += *aInput * aScale;
+-          ++aOutput;
+-          ++aInput;
+-          --aSize;
+-        }
+-      }
+-    }
+-    MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
+-    MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
+-
+-    xsimd::batch<float, Arch> vgain(aScale);
+-
+-    uint32_t aVSize = aSize & ~(xsimd::batch<float, Arch>::size - 1);
+-    MOZ_UNROLL(4)
+-    for (unsigned i = 0; i < aVSize; i += xsimd::batch<float, Arch>::size) {
+-      auto vin1 = xsimd::batch<float, Arch>::load_aligned(&aInput[i]);
+-      auto vin2 = xsimd::batch<float, Arch>::load_aligned(&aOutput[i]);
+-      auto vout = xsimd::fma(vin1, vgain, vin2);
+-      vout.store_aligned(&aOutput[i]);
+-    }
+-
+-    for (unsigned i = aVSize; i < aSize; ++i) {
+-      aOutput[i] += aInput[i] * aScale;
+-    }
+-  };
++                                      float* aOutput, uint32_t aSize);
+ 
+   static void AudioBlockCopyChannelWithScale(const float* aInput, float aScale,
+-                                             float* aOutput) {
+-    MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
+-    MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
+-
+-    MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
+-               "requires tail processing");
+-
+-    xsimd::batch<float, Arch> vgain = (aScale);
+-
+-    MOZ_UNROLL(4)
+-    for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
+-         i += xsimd::batch<float, Arch>::size) {
+-      auto vin = xsimd::batch<float, Arch>::load_aligned(&aInput[i]);
+-      auto vout = vin * vgain;
+-      vout.store_aligned(&aOutput[i]);
+-    }
+-  };
++                                             float* aOutput);
+ 
+   static void AudioBlockCopyChannelWithScale(
+       const float aInput[WEBAUDIO_BLOCK_SIZE],
+       const float aScale[WEBAUDIO_BLOCK_SIZE],
+-      float aOutput[WEBAUDIO_BLOCK_SIZE]) {
+-    MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
+-    MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
+-    MOZ_ASSERT(is_aligned<Arch>(aScale), "aScale is aligned");
+-
+-    MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
+-               "requires tail processing");
+-
+-    MOZ_UNROLL(4)
+-    for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
+-         i += xsimd::batch<float, Arch>::size) {
+-      auto vscaled = xsimd::batch<float, Arch>::load_aligned(&aScale[i]);
+-      auto vin = xsimd::batch<float, Arch>::load_aligned(&aInput[i]);
+-      auto vout = vin * vscaled;
+-      vout.store_aligned(&aOutput[i]);
+-    }
+-  };
++      float aOutput[WEBAUDIO_BLOCK_SIZE]);
+ 
+   static void AudioBufferInPlaceScale(float* aBlock, float aScale,
+-                                      uint32_t aSize) {
+-    MOZ_ASSERT(is_aligned<Arch>(aBlock), "aBlock is aligned");
+-
+-    xsimd::batch<float, Arch> vgain(aScale);
+-
+-    uint32_t aVSize = aSize & ~(xsimd::batch<float, Arch>::size - 1);
+-    MOZ_UNROLL(4)
+-    for (unsigned i = 0; i < aVSize; i += xsimd::batch<float, Arch>::size) {
+-      auto vin = xsimd::batch<float, Arch>::load_aligned(&aBlock[i]);
+-      auto vout = vin * vgain;
+-      vout.store_aligned(&aBlock[i]);
+-    }
+-    for (unsigned i = aVSize; i < aSize; ++i) aBlock[i] *= aScale;
+-  };
++                                      uint32_t aSize);
+ 
+   static void AudioBufferInPlaceScale(float* aBlock, float* aScale,
+-                                      uint32_t aSize) {
+-    MOZ_ASSERT(is_aligned<Arch>(aBlock), "aBlock is aligned");
+-    MOZ_ASSERT(is_aligned<Arch>(aScale), "aScale is aligned");
+-
+-    uint32_t aVSize = aSize & ~(xsimd::batch<float, Arch>::size - 1);
+-    MOZ_UNROLL(4)
+-    for (unsigned i = 0; i < aVSize; i += xsimd::batch<float, Arch>::size) {
+-      auto vin = xsimd::batch<float, Arch>::load_aligned(&aBlock[i]);
+-      auto vgain = xsimd::batch<float, Arch>::load_aligned(&aScale[i]);
+-      auto vout = vin * vgain;
+-      vout.store_aligned(&aBlock[i]);
+-    }
+-    for (uint32_t i = aVSize; i < aSize; ++i) {
+-      *aBlock++ *= *aScale++;
+-    }
+-  };
++                                      uint32_t aSize);
+ 
+   static void AudioBlockPanStereoToStereo(
+       const float aInputL[WEBAUDIO_BLOCK_SIZE],
+       const float aInputR[WEBAUDIO_BLOCK_SIZE], float aGainL, float aGainR,
+       bool aIsOnTheLeft, float aOutputL[WEBAUDIO_BLOCK_SIZE],
+-      float aOutputR[WEBAUDIO_BLOCK_SIZE]) {
+-    MOZ_ASSERT(is_aligned<Arch>(aInputL), "aInputL is aligned");
+-    MOZ_ASSERT(is_aligned<Arch>(aInputR), "aInputR is aligned");
+-    MOZ_ASSERT(is_aligned<Arch>(aOutputL), "aOutputL is aligned");
+-    MOZ_ASSERT(is_aligned<Arch>(aOutputR), "aOutputR is aligned");
+-
+-    MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
+-               "requires tail processing");
+-
+-    xsimd::batch<float, Arch> vgainl(aGainL);
+-    xsimd::batch<float, Arch> vgainr(aGainR);
+-
+-    if (aIsOnTheLeft) {
+-      MOZ_UNROLL(2)
+-      for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
+-           i += xsimd::batch<float, Arch>::size) {
+-        auto vinl = xsimd::batch<float, Arch>::load_aligned(&aInputL[i]);
+-        auto vinr = xsimd::batch<float, Arch>::load_aligned(&aInputR[i]);
+-
+-        /* left channel : aOutputL  = aInputL + aInputR * gainL */
+-        auto vout = xsimd::fma(vinr, vgainl, vinl);
+-        vout.store_aligned(&aOutputL[i]);
+-
+-        /* right channel : aOutputR = aInputR * gainR */
+-        auto vscaled = vinr * vgainr;
+-        vscaled.store_aligned(&aOutputR[i]);
+-      }
+-    } else {
+-      MOZ_UNROLL(2)
+-      for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
+-           i += xsimd::batch<float, Arch>::size) {
+-        auto vinl = xsimd::batch<float, Arch>::load_aligned(&aInputL[i]);
+-        auto vinr = xsimd::batch<float, Arch>::load_aligned(&aInputR[i]);
+-
+-        /* left channel : aInputL * gainL */
+-        auto vscaled = vinl * vgainl;
+-        vscaled.store_aligned(&aOutputL[i]);
+-
+-        /* right channel: aOutputR = aInputR + aInputL * gainR */
+-        auto vout = xsimd::fma(vinl, vgainr, vinr);
+-        vout.store_aligned(&aOutputR[i]);
+-      }
+-    }
+-  };
++      float aOutputR[WEBAUDIO_BLOCK_SIZE]);
+ 
+   static void BufferComplexMultiply(const float* aInput, const float* aScale,
+-                                    float* aOutput, uint32_t aSize) {
+-    MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
+-    MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
+-    MOZ_ASSERT(is_aligned<Arch>(aScale), "aScale is aligned");
+-    MOZ_ASSERT((aSize % xsimd::batch<float, Arch>::size == 0),
+-               "requires tail processing");
+-
+-    MOZ_UNROLL(2)
+-    for (unsigned i = 0; i < aSize * 2;
+-         i += 2 * xsimd::batch<std::complex<float>, Arch>::size) {
+-      auto in1 = xsimd::batch<std::complex<float>, Arch>::load_aligned(
+-          reinterpret_cast<const std::complex<float>*>(&aInput[i]));
+-      auto in2 = xsimd::batch<std::complex<float>, Arch>::load_aligned(
+-          reinterpret_cast<const std::complex<float>*>(&aScale[i]));
+-      auto out = in1 * in2;
+-      out.store_aligned(reinterpret_cast<std::complex<float>*>(&aOutput[i]));
+-    }
+-  };
+-
+-  static float AudioBufferSumOfSquares(const float* aInput, uint32_t aLength) {
+-    float sum = 0.f;
+-
+-    if constexpr (Arch::requires_alignment()) {
+-      while (!is_aligned<Arch>(aInput)) {
+-        if (!aLength) {
+-          return sum;
+-        }
+-        sum += *aInput * *aInput;
+-        ++aInput;
+-        --aLength;
+-      }
+-    }
+-
+-    MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
+-
+-    constexpr uint32_t unroll_factor = 4;
+-    xsimd::batch<float, Arch> accs[unroll_factor] = {0.f, 0.f, 0.f, 0.f};
+-
+-    uint32_t vLength =
+-        aLength & ~(unroll_factor * xsimd::batch<float, Arch>::size - 1);
++                                    float* aOutput, uint32_t aSize);
+ 
+-    for (uint32_t i = 0; i < vLength;
+-         i += unroll_factor * xsimd::batch<float, Arch>::size) {
+-      MOZ_UNROLL(4)
+-      for (uint32_t j = 0; j < unroll_factor; ++j) {
+-        auto in = xsimd::batch<float, Arch>::load_aligned(
+-            &aInput[i + xsimd::batch<float, Arch>::size * j]);
+-        accs[j] = xsimd::fma(in, in, accs[j]);
+-      }
+-    }
+-
+-    sum += reduce_add((accs[0] + accs[1]) + (accs[2] + accs[3]));
+-    for (uint32_t i = vLength; i < aLength; ++i) sum += aInput[i] * aInput[i];
+-    return sum;
+-  };
++  static float AudioBufferSumOfSquares(const float* aInput, uint32_t aLength);
+ 
+-  static void NaNToZeroInPlace(float* aSamples, size_t aCount) {
+-    if constexpr (Arch::requires_alignment()) {
+-      while (!is_aligned<Arch>(aSamples)) {
+-        if (!aCount) {
+-          return;
+-        }
+-        if (*aSamples != *aSamples) {
+-          *aSamples = 0.0;
+-        }
+-        ++aSamples;
+-        --aCount;
+-      }
+-    }
+-
+-    MOZ_ASSERT(is_aligned<Arch>(aSamples), "aSamples is aligned");
+-
+-    uint32_t vCount = aCount & ~(xsimd::batch<float, Arch>::size - 1);
+-
+-    MOZ_UNROLL(4)
+-    for (uint32_t i = 0; i < vCount; i += xsimd::batch<float, Arch>::size) {
+-      auto vin = xsimd::batch<float, Arch>::load_aligned(&aSamples[i]);
+-      auto vout =
+-          xsimd::select(xsimd::isnan(vin), xsimd::batch<float, Arch>(0.f), vin);
+-      vout.store_aligned(&aSamples[i]);
+-    }
+-
+-    for (uint32_t i = vCount; i < aCount; i++) {
+-      if (aSamples[i] != aSamples[i]) {
+-        aSamples[i] = 0.0;
+-      }
+-    }
+-  };
++  static void NaNToZeroInPlace(float* aSamples, size_t aCount);
+ 
+   static void AudioBlockPanStereoToStereo(
+       const float aInputL[WEBAUDIO_BLOCK_SIZE],
+       const float aInputR[WEBAUDIO_BLOCK_SIZE],
+       const float aGainL[WEBAUDIO_BLOCK_SIZE],
+       const float aGainR[WEBAUDIO_BLOCK_SIZE],
+       const bool aIsOnTheLeft[WEBAUDIO_BLOCK_SIZE],
+-      float aOutputL[WEBAUDIO_BLOCK_SIZE],
+-      float aOutputR[WEBAUDIO_BLOCK_SIZE]) {
+-    MOZ_ASSERT(is_aligned<Arch>(aInputL), "aInputL is aligned");
+-    MOZ_ASSERT(is_aligned<Arch>(aInputR), "aInputR is aligned");
+-    MOZ_ASSERT(is_aligned<Arch>(aGainL), "aGainL is aligned");
+-    MOZ_ASSERT(is_aligned<Arch>(aGainR), "aGainR is aligned");
+-    MOZ_ASSERT(is_aligned<Arch>(aIsOnTheLeft), "aIsOnTheLeft is aligned");
+-    MOZ_ASSERT(is_aligned<Arch>(aOutputL), "aOutputL is aligned");
+-    MOZ_ASSERT(is_aligned<Arch>(aOutputR), "aOutputR is aligned");
+-
+-    MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
+-               "requires tail processing");
+-
+-    MOZ_UNROLL(2)
+-    for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE;
+-         i += xsimd::batch<float, Arch>::size) {
+-      auto mask =
+-          xsimd::batch_bool<float, Arch>::load_aligned(&aIsOnTheLeft[i]);
+-
+-      auto inputL = xsimd::batch<float, Arch>::load_aligned(&aInputL[i]);
+-      auto inputR = xsimd::batch<float, Arch>::load_aligned(&aInputR[i]);
+-      auto gainL = xsimd::batch<float, Arch>::load_aligned(&aGainL[i]);
+-      auto gainR = xsimd::batch<float, Arch>::load_aligned(&aGainR[i]);
+-
+-      auto outL_true = xsimd::fma(inputR, gainL, inputL);
+-      auto outR_true = inputR * gainR;
+-
+-      auto outL_false = inputL * gainL;
+-      auto outR_false = xsimd::fma(inputL, gainR, inputR);
+-
+-      auto outL = xsimd::select(mask, outL_true, outL_false);
+-      auto outR = xsimd::select(mask, outR_true, outR_false);
+-
+-      outL.store_aligned(&aOutputL[i]);
+-      outR.store_aligned(&aOutputR[i]);
+-    }
+-  }
++      float aOutputL[WEBAUDIO_BLOCK_SIZE], float aOutputR[WEBAUDIO_BLOCK_SIZE]);
+ };
+ 
+ }  // namespace mozilla
+ 
+ #endif
+diff --git a/dom/media/webaudio/AudioNodeEngineGenericImpl.h b/dom/media/webaudio/AudioNodeEngineGenericImpl.h
+new file mode 100644
+--- /dev/null
++++ b/dom/media/webaudio/AudioNodeEngineGenericImpl.h
+@@ -0,0 +1,341 @@
++/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
++/* this source code form is subject to the terms of the mozilla public
++ * license, v. 2.0. if a copy of the mpl was not distributed with this file,
++ * You can obtain one at http://mozilla.org/MPL/2.0/. */
++
++#ifndef MOZILLA_AUDIONODEENGINEGENERICIMPL_H_
++#define MOZILLA_AUDIONODEENGINEGENERICIMPL_H_
++
++#include "AudioNodeEngineGeneric.h"
++#include "AlignmentUtils.h"
++
++#if defined(__GNUC__) && __GNUC__ > 7
++#  define MOZ_PRAGMA(tokens) _Pragma(#tokens)
++#  define MOZ_UNROLL(factor) MOZ_PRAGMA(GCC unroll factor)
++#elif defined(__INTEL_COMPILER) || (defined(__clang__) && __clang_major__ > 3)
++#  define MOZ_PRAGMA(tokens) _Pragma(#tokens)
++#  define MOZ_UNROLL(factor) MOZ_PRAGMA(unroll factor)
++#else
++#  define MOZ_UNROLL(_)
++#endif
++
++namespace mozilla {
++
++template <class Arch>
++static bool is_aligned(const void* ptr) {
++  return (reinterpret_cast<uintptr_t>(ptr) &
++          ~(static_cast<uintptr_t>(Arch::alignment()) - 1)) ==
++         reinterpret_cast<uintptr_t>(ptr);
++};
++
++template <class Arch>
++void Engine<Arch>::AudioBufferAddWithScale(const float* aInput, float aScale,
++                                           float* aOutput, uint32_t aSize) {
++  if constexpr (Arch::requires_alignment()) {
++    if (aScale == 1.0f) {
++      while (!is_aligned<Arch>(aInput) || !is_aligned<Arch>(aOutput)) {
++        if (!aSize) return;
++        *aOutput += *aInput;
++        ++aOutput;
++        ++aInput;
++        --aSize;
++      }
++    } else {
++      while (!is_aligned<Arch>(aInput) || !is_aligned<Arch>(aOutput)) {
++        if (!aSize) return;
++        *aOutput += *aInput * aScale;
++        ++aOutput;
++        ++aInput;
++        --aSize;
++      }
++    }
++  }
++  MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
++  MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
++
++  xsimd::batch<float, Arch> vgain(aScale);
++
++  uint32_t aVSize = aSize & ~(xsimd::batch<float, Arch>::size - 1);
++  MOZ_UNROLL(4)
++  for (unsigned i = 0; i < aVSize; i += xsimd::batch<float, Arch>::size) {
++    auto vin1 = xsimd::batch<float, Arch>::load_aligned(&aInput[i]);
++    auto vin2 = xsimd::batch<float, Arch>::load_aligned(&aOutput[i]);
++    auto vout = xsimd::fma(vin1, vgain, vin2);
++    vout.store_aligned(&aOutput[i]);
++  }
++
++  for (unsigned i = aVSize; i < aSize; ++i) {
++    aOutput[i] += aInput[i] * aScale;
++  }
++}
++
++template <class Arch>
++void Engine<Arch>::AudioBlockCopyChannelWithScale(const float* aInput,
++                                                  float aScale,
++                                                  float* aOutput) {
++  MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
++  MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
++
++  MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
++             "requires tail processing");
++
++  xsimd::batch<float, Arch> vgain = (aScale);
++
++  MOZ_UNROLL(4)
++  for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
++       i += xsimd::batch<float, Arch>::size) {
++    auto vin = xsimd::batch<float, Arch>::load_aligned(&aInput[i]);
++    auto vout = vin * vgain;
++    vout.store_aligned(&aOutput[i]);
++  }
++};
++
++template <class Arch>
++void Engine<Arch>::AudioBlockCopyChannelWithScale(
++    const float aInput[WEBAUDIO_BLOCK_SIZE],
++    const float aScale[WEBAUDIO_BLOCK_SIZE],
++    float aOutput[WEBAUDIO_BLOCK_SIZE]) {
++  MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
++  MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
++  MOZ_ASSERT(is_aligned<Arch>(aScale), "aScale is aligned");
++
++  MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
++             "requires tail processing");
++
++  MOZ_UNROLL(4)
++  for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
++       i += xsimd::batch<float, Arch>::size) {
++    auto vscaled = xsimd::batch<float, Arch>::load_aligned(&aScale[i]);
++    auto vin = xsimd::batch<float, Arch>::load_aligned(&aInput[i]);
++    auto vout = vin * vscaled;
++    vout.store_aligned(&aOutput[i]);
++  }
++};
++
++template <class Arch>
++void Engine<Arch>::AudioBufferInPlaceScale(float* aBlock, float aScale,
++                                           uint32_t aSize) {
++  MOZ_ASSERT(is_aligned<Arch>(aBlock), "aBlock is aligned");
++
++  xsimd::batch<float, Arch> vgain(aScale);
++
++  uint32_t aVSize = aSize & ~(xsimd::batch<float, Arch>::size - 1);
++  MOZ_UNROLL(4)
++  for (unsigned i = 0; i < aVSize; i += xsimd::batch<float, Arch>::size) {
++    auto vin = xsimd::batch<float, Arch>::load_aligned(&aBlock[i]);
++    auto vout = vin * vgain;
++    vout.store_aligned(&aBlock[i]);
++  }
++  for (unsigned i = aVSize; i < aSize; ++i) aBlock[i] *= aScale;
++};
++
++template <class Arch>
++void Engine<Arch>::AudioBufferInPlaceScale(float* aBlock, float* aScale,
++                                           uint32_t aSize) {
++  MOZ_ASSERT(is_aligned<Arch>(aBlock), "aBlock is aligned");
++  MOZ_ASSERT(is_aligned<Arch>(aScale), "aScale is aligned");
++
++  uint32_t aVSize = aSize & ~(xsimd::batch<float, Arch>::size - 1);
++  MOZ_UNROLL(4)
++  for (unsigned i = 0; i < aVSize; i += xsimd::batch<float, Arch>::size) {
++    auto vin = xsimd::batch<float, Arch>::load_aligned(&aBlock[i]);
++    auto vgain = xsimd::batch<float, Arch>::load_aligned(&aScale[i]);
++    auto vout = vin * vgain;
++    vout.store_aligned(&aBlock[i]);
++  }
++  for (uint32_t i = aVSize; i < aSize; ++i) {
++    *aBlock++ *= *aScale++;
++  }
++};
++
++template <class Arch>
++void Engine<Arch>::AudioBlockPanStereoToStereo(
++    const float aInputL[WEBAUDIO_BLOCK_SIZE],
++    const float aInputR[WEBAUDIO_BLOCK_SIZE], float aGainL, float aGainR,
++    bool aIsOnTheLeft, float aOutputL[WEBAUDIO_BLOCK_SIZE],
++    float aOutputR[WEBAUDIO_BLOCK_SIZE]) {
++  MOZ_ASSERT(is_aligned<Arch>(aInputL), "aInputL is aligned");
++  MOZ_ASSERT(is_aligned<Arch>(aInputR), "aInputR is aligned");
++  MOZ_ASSERT(is_aligned<Arch>(aOutputL), "aOutputL is aligned");
++  MOZ_ASSERT(is_aligned<Arch>(aOutputR), "aOutputR is aligned");
++
++  MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
++             "requires tail processing");
++
++  xsimd::batch<float, Arch> vgainl(aGainL);
++  xsimd::batch<float, Arch> vgainr(aGainR);
++
++  if (aIsOnTheLeft) {
++    MOZ_UNROLL(2)
++    for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
++         i += xsimd::batch<float, Arch>::size) {
++      auto vinl = xsimd::batch<float, Arch>::load_aligned(&aInputL[i]);
++      auto vinr = xsimd::batch<float, Arch>::load_aligned(&aInputR[i]);
++
++      /* left channel : aOutputL  = aInputL + aInputR * gainL */
++      auto vout = xsimd::fma(vinr, vgainl, vinl);
++      vout.store_aligned(&aOutputL[i]);
++
++      /* right channel : aOutputR = aInputR * gainR */
++      auto vscaled = vinr * vgainr;
++      vscaled.store_aligned(&aOutputR[i]);
++    }
++  } else {
++    MOZ_UNROLL(2)
++    for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
++         i += xsimd::batch<float, Arch>::size) {
++      auto vinl = xsimd::batch<float, Arch>::load_aligned(&aInputL[i]);
++      auto vinr = xsimd::batch<float, Arch>::load_aligned(&aInputR[i]);
++
++      /* left channel : aInputL * gainL */
++      auto vscaled = vinl * vgainl;
++      vscaled.store_aligned(&aOutputL[i]);
++
++      /* right channel: aOutputR = aInputR + aInputL * gainR */
++      auto vout = xsimd::fma(vinl, vgainr, vinr);
++      vout.store_aligned(&aOutputR[i]);
++    }
++  }
++};
++
++template <class Arch>
++void Engine<Arch>::BufferComplexMultiply(const float* aInput,
++                                         const float* aScale, float* aOutput,
++                                         uint32_t aSize) {
++  MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
++  MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
++  MOZ_ASSERT(is_aligned<Arch>(aScale), "aScale is aligned");
++  MOZ_ASSERT((aSize % xsimd::batch<float, Arch>::size == 0),
++             "requires tail processing");
++
++  MOZ_UNROLL(2)
++  for (unsigned i = 0; i < aSize * 2;
++       i += 2 * xsimd::batch<std::complex<float>, Arch>::size) {
++    auto in1 = xsimd::batch<std::complex<float>, Arch>::load_aligned(
++        reinterpret_cast<const std::complex<float>*>(&aInput[i]));
++    auto in2 = xsimd::batch<std::complex<float>, Arch>::load_aligned(
++        reinterpret_cast<const std::complex<float>*>(&aScale[i]));
++    auto out = in1 * in2;
++    out.store_aligned(reinterpret_cast<std::complex<float>*>(&aOutput[i]));
++  }
++};
++
++template <class Arch>
++float Engine<Arch>::AudioBufferSumOfSquares(const float* aInput,
++                                            uint32_t aLength) {
++  float sum = 0.f;
++
++  if constexpr (Arch::requires_alignment()) {
++    while (!is_aligned<Arch>(aInput)) {
++      if (!aLength) {
++        return sum;
++      }
++      sum += *aInput * *aInput;
++      ++aInput;
++      --aLength;
++    }
++  }
++
++  MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
++
++  constexpr uint32_t unroll_factor = 4;
++  xsimd::batch<float, Arch> accs[unroll_factor] = {0.f, 0.f, 0.f, 0.f};
++
++  uint32_t vLength =
++      aLength & ~(unroll_factor * xsimd::batch<float, Arch>::size - 1);
++
++  for (uint32_t i = 0; i < vLength;
++       i += unroll_factor * xsimd::batch<float, Arch>::size) {
++    MOZ_UNROLL(4)
++    for (uint32_t j = 0; j < unroll_factor; ++j) {
++      auto in = xsimd::batch<float, Arch>::load_aligned(
++          &aInput[i + xsimd::batch<float, Arch>::size * j]);
++      accs[j] = xsimd::fma(in, in, accs[j]);
++    }
++  }
++
++  sum += reduce_add((accs[0] + accs[1]) + (accs[2] + accs[3]));
++  for (uint32_t i = vLength; i < aLength; ++i) sum += aInput[i] * aInput[i];
++  return sum;
++};
++
++template <class Arch>
++void Engine<Arch>::NaNToZeroInPlace(float* aSamples, size_t aCount) {
++  if constexpr (Arch::requires_alignment()) {
++    while (!is_aligned<Arch>(aSamples)) {
++      if (!aCount) {
++        return;
++      }
++      if (*aSamples != *aSamples) {
++        *aSamples = 0.0;
++      }
++      ++aSamples;
++      --aCount;
++    }
++  }
++
++  MOZ_ASSERT(is_aligned<Arch>(aSamples), "aSamples is aligned");
++
++  uint32_t vCount = aCount & ~(xsimd::batch<float, Arch>::size - 1);
++
++  MOZ_UNROLL(4)
++  for (uint32_t i = 0; i < vCount; i += xsimd::batch<float, Arch>::size) {
++    auto vin = xsimd::batch<float, Arch>::load_aligned(&aSamples[i]);
++    auto vout =
++        xsimd::select(xsimd::isnan(vin), xsimd::batch<float, Arch>(0.f), vin);
++    vout.store_aligned(&aSamples[i]);
++  }
++
++  for (uint32_t i = vCount; i < aCount; i++) {
++    if (aSamples[i] != aSamples[i]) {
++      aSamples[i] = 0.0;
++    }
++  }
++};
++
++template <class Arch>
++void Engine<Arch>::AudioBlockPanStereoToStereo(
++    const float aInputL[WEBAUDIO_BLOCK_SIZE],
++    const float aInputR[WEBAUDIO_BLOCK_SIZE],
++    const float aGainL[WEBAUDIO_BLOCK_SIZE],
++    const float aGainR[WEBAUDIO_BLOCK_SIZE],
++    const bool aIsOnTheLeft[WEBAUDIO_BLOCK_SIZE],
++    float aOutputL[WEBAUDIO_BLOCK_SIZE], float aOutputR[WEBAUDIO_BLOCK_SIZE]) {
++  MOZ_ASSERT(is_aligned<Arch>(aInputL), "aInputL is aligned");
++  MOZ_ASSERT(is_aligned<Arch>(aInputR), "aInputR is aligned");
++  MOZ_ASSERT(is_aligned<Arch>(aGainL), "aGainL is aligned");
++  MOZ_ASSERT(is_aligned<Arch>(aGainR), "aGainR is aligned");
++  MOZ_ASSERT(is_aligned<Arch>(aIsOnTheLeft), "aIsOnTheLeft is aligned");
++  MOZ_ASSERT(is_aligned<Arch>(aOutputL), "aOutputL is aligned");
++  MOZ_ASSERT(is_aligned<Arch>(aOutputR), "aOutputR is aligned");
++
++  MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
++             "requires tail processing");
++
++  MOZ_UNROLL(2)
++  for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE;
++       i += xsimd::batch<float, Arch>::size) {
++    auto mask = xsimd::batch_bool<float, Arch>::load_aligned(&aIsOnTheLeft[i]);
++
++    auto inputL = xsimd::batch<float, Arch>::load_aligned(&aInputL[i]);
++    auto inputR = xsimd::batch<float, Arch>::load_aligned(&aInputR[i]);
++    auto gainL = xsimd::batch<float, Arch>::load_aligned(&aGainL[i]);
++    auto gainR = xsimd::batch<float, Arch>::load_aligned(&aGainR[i]);
++
++    auto outL_true = xsimd::fma(inputR, gainL, inputL);
++    auto outR_true = inputR * gainR;
++
++    auto outL_false = inputL * gainL;
++    auto outR_false = xsimd::fma(inputL, gainR, inputR);
++
++    auto outL = xsimd::select(mask, outL_true, outL_false);
++    auto outR = xsimd::select(mask, outR_true, outR_false);
++
++    outL.store_aligned(&aOutputL[i]);
++    outR.store_aligned(&aOutputR[i]);
++  }
++}
++
++}  // namespace mozilla
++
++#endif
+diff --git a/dom/media/webaudio/AudioNodeEngineNEON.cpp b/dom/media/webaudio/AudioNodeEngineNEON.cpp
+--- a/dom/media/webaudio/AudioNodeEngineNEON.cpp
++++ b/dom/media/webaudio/AudioNodeEngineNEON.cpp
+@@ -1,9 +1,9 @@
+ /* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+ /* this source code form is subject to the terms of the mozilla public
+  * license, v. 2.0. if a copy of the mpl was not distributed with this file,
+  * You can obtain one at http://mozilla.org/MPL/2.0/. */
+ 
+-#include "AudioNodeEngineGeneric.h"
++#include "AudioNodeEngineGenericImpl.h"
+ namespace mozilla {
+ template struct Engine<xsimd::neon>;
+ }  // namespace mozilla
+diff --git a/dom/media/webaudio/AudioNodeEngineSSE2.cpp b/dom/media/webaudio/AudioNodeEngineSSE2.cpp
+--- a/dom/media/webaudio/AudioNodeEngineSSE2.cpp
++++ b/dom/media/webaudio/AudioNodeEngineSSE2.cpp
+@@ -1,10 +1,10 @@
+ /* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+ /* this source code form is subject to the terms of the mozilla public
+  * license, v. 2.0. if a copy of the mpl was not distributed with this file,
+  * You can obtain one at http://mozilla.org/MPL/2.0/. */
+ 
+-#include "AudioNodeEngineGeneric.h"
++#include "AudioNodeEngineGenericImpl.h"
+ 
+ namespace mozilla {
+ template struct Engine<xsimd::sse2>;
+ }  // namespace mozilla
+diff --git a/dom/media/webaudio/AudioNodeEngineSSE4_2_FMA3.cpp b/dom/media/webaudio/AudioNodeEngineSSE4_2_FMA3.cpp
+--- a/dom/media/webaudio/AudioNodeEngineSSE4_2_FMA3.cpp
++++ b/dom/media/webaudio/AudioNodeEngineSSE4_2_FMA3.cpp
+@@ -1,10 +1,10 @@
+ /* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+ /* this source code form is subject to the terms of the mozilla public
+  * license, v. 2.0. if a copy of the mpl was not distributed with this file,
+  * You can obtain one at http://mozilla.org/MPL/2.0/. */
+ 
+-#include "AudioNodeEngineGeneric.h"
++#include "AudioNodeEngineGenericImpl.h"
+ 
+ namespace mozilla {
+ template struct Engine<xsimd::fma3<xsimd::sse4_2>>;
+ }  // namespace mozilla
+
author	Jan Palus <atler@pld-linux.org>
	Tue, 21 Mar 2023 08:53:36 +0000 (09:53 +0100)
committer	Jan Palus <atler@pld-linux.org>
	Tue, 21 Mar 2023 08:53:36 +0000 (09:53 +0100)