SpectMorph
smmatharm.hh
1 // This Source Code Form is licensed MPL-2.0: http://mozilla.org/MPL/2.0
2 
3 #ifndef SPECTMORPH_MATH_ARM_HH
4 #define SPECTMORPH_MATH_ARM_HH
5 
6 #if defined(__ARM_NEON) || defined(__arm64__) || defined(__aarch64__)
7 #define SM_ARM_SSE
8 
9 #include <arm_neon.h>
10 typedef float32x4_t __m128;
11 
12 static inline __attribute__((always_inline)) __m128 _mm_set_ps(float e3, float e2, float e1, float e0)
13 {
14  __m128 r;
15  alignas(16) float data[4] = {e0, e1, e2, e3};
16  r = vld1q_f32(data);
17  return r;
18 }
19 
20 #define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
21 
22 static inline __attribute__((always_inline)) __m128 _mm_mul_ps(__m128 a, __m128 b)
23 {
24  return vmulq_f32(a, b);
25 }
26 
27 static inline __attribute__((always_inline)) __m128 _mm_add_ps(__m128 a, __m128 b)
28 {
29  return vaddq_f32(a, b);
30 }
31 
32 static inline __attribute__((always_inline)) __m128 _mm_sub_ps(__m128 a, __m128 b)
33 {
34  return vsubq_f32(a, b);
35 }
36 
37 static inline __attribute__((always_inline)) __m128 _mm_set_ss(float a)
38 {
39  return vsetq_lane_f32(a, vdupq_n_f32(0.f), 0);
40 }
41 
42 #define _mm_shuffle_ps(a, b, imm8) \
43  __extension__({ \
44  float32x4_t shuffle_ps_a_ = (a); \
45  float32x4_t shuffle_ps_b_ = (b); \
46  float32x4_t shuffle_ps_ret_; \
47  shuffle_ps_ret_ = vmovq_n_f32(vgetq_lane_f32(shuffle_ps_a_, (imm8) & (0x3))); \
48  shuffle_ps_ret_ = vsetq_lane_f32(vgetq_lane_f32(shuffle_ps_a_, ((imm8) >> 2) & 0x3), shuffle_ps_ret_, 1); \
49  shuffle_ps_ret_ = vsetq_lane_f32(vgetq_lane_f32(shuffle_ps_b_, ((imm8) >> 4) & 0x3), shuffle_ps_ret_, 2); \
50  shuffle_ps_ret_ = vsetq_lane_f32(vgetq_lane_f32(shuffle_ps_b_, ((imm8) >> 6) & 0x3), shuffle_ps_ret_, 3); \
51  })
52 #endif
53 
54 #endif