3 #ifndef SPECTMORPH_MATH_ARM_HH
4 #define SPECTMORPH_MATH_ARM_HH
6 #if defined(__ARM_NEON) || defined(__arm64__) || defined(__aarch64__)
10 typedef float32x4_t __m128;
12 static inline __attribute__((always_inline)) __m128 _mm_set_ps(
float e3,
float e2,
float e1,
float e0)
15 alignas(16)
float data[4] = {e0, e1, e2, e3};
20 #define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
22 static inline __attribute__((always_inline)) __m128 _mm_mul_ps(__m128 a, __m128 b)
24 return vmulq_f32(a, b);
27 static inline __attribute__((always_inline)) __m128 _mm_add_ps(__m128 a, __m128 b)
29 return vaddq_f32(a, b);
32 static inline __attribute__((always_inline)) __m128 _mm_sub_ps(__m128 a, __m128 b)
34 return vsubq_f32(a, b);
37 static inline __attribute__((always_inline)) __m128 _mm_set_ss(
float a)
39 return vsetq_lane_f32(a, vdupq_n_f32(0.f), 0);
42 #define _mm_shuffle_ps(a, b, imm8) \
44 float32x4_t shuffle_ps_a_ = (a); \
45 float32x4_t shuffle_ps_b_ = (b); \
46 float32x4_t shuffle_ps_ret_; \
47 shuffle_ps_ret_ = vmovq_n_f32(vgetq_lane_f32(shuffle_ps_a_, (imm8) & (0x3))); \
48 shuffle_ps_ret_ = vsetq_lane_f32(vgetq_lane_f32(shuffle_ps_a_, ((imm8) >> 2) & 0x3), shuffle_ps_ret_, 1); \
49 shuffle_ps_ret_ = vsetq_lane_f32(vgetq_lane_f32(shuffle_ps_b_, ((imm8) >> 4) & 0x3), shuffle_ps_ret_, 2); \
50 shuffle_ps_ret_ = vsetq_lane_f32(vgetq_lane_f32(shuffle_ps_b_, ((imm8) >> 6) & 0x3), shuffle_ps_ret_, 3); \