35#if defined(USE_SIMD_NEON)
36 float32x4_t a = {
x,
y, 0.0f, 0.0f};
37 float32x4_t b = {other.
x, other.
y, 0.0f, 0.0f};
38 const float32x4_t m = vmulq_f32(a, b);
39 float32x2_t sum = vadd_f32(vget_low_f32(m), vget_high_f32(m));
40 return vget_lane_f32(sum, 0) + vget_lane_f32(sum, 1);
41#elif defined(USE_SIMD_SSE)
42 __m128 a = _mm_set_ps(0.0f, 0.0f,
y,
x);
43 __m128 b = _mm_set_ps(0.0f, 0.0f, other.
y, other.
x);
44 __m128 mul = _mm_mul_ps(a, b);
46 __m128 shuf = _mm_shuffle_ps(mul, mul, _MM_SHUFFLE(2, 3, 0, 1));
47 __m128 sums = _mm_add_ps(mul, shuf);
48 __m128
dot = _mm_add_ss(sums, _mm_shuffle_ps(sums, sums, _MM_SHUFFLE(1, 1, 1, 1)));
49 return _mm_cvtss_f32(
dot);
51 return x * other.
x +
y * other.
y;
62#if defined(USE_SIMD_NEON)
63 float32x4_t a = {
x,
y, 0.0f, 0.0f};
64 float32x4_t b = {other.
x, other.
y, 0.0f, 0.0f};
65 float32x4_t result = vsubq_f32(a, b);
66 return {vgetq_lane_f32(result, 0), vgetq_lane_f32(result, 1)};
67#elif defined(USE_SIMD_SSE)
68 __m128 a = _mm_set_ps(0.0f, 0.0f,
y,
x);
69 __m128 b = _mm_set_ps(0.0f, 0.0f, other.
y, other.
x);
70 __m128 result = _mm_sub_ps(a, b);
71 alignas(16)
float res[4];
72 _mm_store_ps(res, result);
73 return { res[0], res[1] };
75 return {
x - other.
x,
y - other.
y };
81#if defined(USE_SIMD_NEON)
82 float32x4_t a = {
x,
y, 0.0f, 0.0f};
83 float32x4_t s = vdupq_n_f32(scalar);
84 float32x4_t result = vmulq_f32(a, s);
85 return {vgetq_lane_f32(result, 0), vgetq_lane_f32(result, 1)};
86#elif defined(USE_SIMD_SSE)
87 __m128 a = _mm_set_ps(0.0f, 0.0f,
y,
x);
88 __m128 s = _mm_set1_ps(scalar);
89 __m128 result = _mm_mul_ps(a, s);
90 alignas(16)
float res[4];
91 _mm_store_ps(res, result);
92 return { res[0], res[1] };
94 return {
x * scalar,
y * scalar };