VisuTwin Canvas
C++ 3D Engine — Metal Backend
Loading...
Searching...
No Matches
vector4.h
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2025-2026 Arnis Lektauers
3//
4// Created by Arnis Lektauers on 18.07.2025.
5//
6
7#pragma once
8
9#include "defines.h"
10
11namespace visutwin::canvas
12{
13 struct Vector3;
14
19 struct alignas(16) Vector4
20 {
21 union
22 {
23#if defined(USE_SIMD_SSE)
24 __m128 m128;
25#elif defined(USE_SIMD_APPLE)
26 simd_float4 m128;
27#elif defined(USE_SIMD_NEON)
28 float32x4_t m128;
29#else
30 struct
31 {
32 float x, y, z, w;
33 };
34
35 float v[4];
36#endif
37 };
38
39 //static Vector4 UNIT_X;
40 //static Vector4 UNIT_Y;
41 //static Vector4 UNIT_Z;
42
44 {
45#if defined(USE_SIMD_SSE)
46 m128 = _mm_setzero_ps();
47#elif defined(USE_SIMD_APPLE)
48 m128 = simd_make_float4(0.0f, 0.0f, 0.0f, 0.0f);
49#elif defined(USE_SIMD_NEON)
50 m128 = vdupq_n_f32(0.0f);
51#else
52 x = y = z = w = 0.0f;
53#endif
54 }
55
56 Vector4(const float x, const float y, const float z, const float w)
57 {
58#if defined(USE_SIMD_SSE)
59 m128 = _mm_set_ps(w, z, y, x); // Note: reversed order
60#elif defined(USE_SIMD_APPLE)
61 m128 = simd_make_float4(x, y, z, w);
62#elif defined(USE_SIMD_NEON)
63 float data[4] = {x, y, z, w};
64 m128 = vld1q_f32(data);
65#else
66 this->x = x;
67 this->y = y;
68 this->z = z;
69 this->w = w;
70#endif
71 }
72
73#if defined(USE_SIMD_APPLE)
74 explicit Vector4(const simd_float4& data) : m128(data)
75 {
76 }
77#elif defined(USE_SIMD_SSE)
78 explicit Vector4(const __m128& data) : m128(data) {}
79#elif defined(USE_SIMD_NEON)
80 explicit Vector4(const float32x4_t& data) : m128(data) {}
81#endif
82
83 explicit Vector4(const Vector3& vec3, float w = 0.0f);
84
85 [[nodiscard]] float getX() const
86 {
87#if defined(USE_SIMD_SSE)
88 return _mm_cvtss_f32(m128); // extracts the lowest float from __m128
89#elif defined(USE_SIMD_APPLE)
90 return m128.x;
91#elif defined(USE_SIMD_NEON)
92 return vgetq_lane_f32(m128, 0); // extract lane 0
93#else
94 return x; // fallback scalar path
95#endif
96 }
97
98 [[nodiscard]] float getY() const
99 {
100#if defined(USE_SIMD_SSE)
101 return _mm_cvtss_f32(_mm_shuffle_ps(m128, m128, _MM_SHUFFLE(1, 1, 1, 1)));
102#elif defined(USE_SIMD_APPLE)
103 return m128.y;
104#elif defined(USE_SIMD_NEON)
105 return vgetq_lane_f32(m128, 1);
106#else
107 return y;
108#endif
109 }
110
111 [[nodiscard]] float getZ() const
112 {
113#if defined(USE_SIMD_SSE)
114 return _mm_cvtss_f32(_mm_shuffle_ps(m128, m128, _MM_SHUFFLE(2, 2, 2, 2)));
115#elif defined(USE_SIMD_APPLE)
116 return m128.z;
117#elif defined(USE_SIMD_NEON)
118 return vgetq_lane_f32(m128, 2);
119#else
120 return z;
121#endif
122 }
123
124 [[nodiscard]] float getW() const
125 {
126#if defined(USE_SIMD_SSE)
127 return _mm_cvtss_f32(_mm_shuffle_ps(m128, m128, _MM_SHUFFLE(3, 3, 3, 3)));
128#elif defined(USE_SIMD_APPLE)
129 return m128.w;
130#elif defined(USE_SIMD_NEON)
131 return vgetq_lane_f32(m128, 3);
132#else
133 return w;
134#endif
135 }
136
137 [[nodiscard]] float dot(const Vector4& other) const
138 {
139#if defined(USE_SIMD_SSE)
140 // Multiply the vectors
141 __m128 mul = _mm_mul_ps(m128, other.m128);
142 // SSE2-compatible horizontal sum
143 __m128 shuf = _mm_shuffle_ps(mul, mul, _MM_SHUFFLE(2, 3, 0, 1));
144 __m128 sums = _mm_add_ps(mul, shuf);
145 shuf = _mm_shuffle_ps(sums, sums, _MM_SHUFFLE(1, 0, 3, 2));
146 sums = _mm_add_ps(sums, shuf);
147 return _mm_cvtss_f32(sums); // extract the lowest float
148#elif defined(USE_SIMD_APPLE)
149 return simd_dot(m128, other.m128);
150#elif defined(USE_SIMD_NEON)
151 float32x4_t mul = vmulq_f32(m128, other.m128);
152 const float32x2_t sum2 = vadd_f32(vget_low_f32(mul), vget_high_f32(mul));
153 const float sum = vget_lane_f32(sum2, 0) + vget_lane_f32(sum2, 1);
154 return sum;
155#else
156 return x * other.x + y * other.y + z * other.z + w * other.w;
157#endif
158 }
159
160 [[nodiscard]] float length() const
161 {
162#if defined(USE_SIMD_APPLE)
163 return simd_length(m128);
164#else
165 return std::sqrt(dot(*this));
166#endif
167 }
168
169 Vector4 operator+(const Vector4& other) const
170 {
171#if defined(USE_SIMD_SSE)
172 Vector4 result;
173 result.m128 = _mm_add_ps(this->m128, other.m128);
174 return result;
175#elif defined(USE_SIMD_APPLE)
176 return Vector4(this->m128 + other.m128); // simd_float4 supports operator+
177#elif defined(USE_SIMD_NEON)
178 return Vector4(vaddq_f32(this->m128, other.m128));
179#else
180 return Vector4(x + other.x, y + other.y, z + other.z, w + other.w);
181#endif
182 }
183
184 Vector4 operator-(const Vector4& other) const
185 {
186#if defined(USE_SIMD_SSE)
187 Vector4 result;
188 result.m128 = _mm_sub_ps(this->m128, other.m128);
189 return result;
190#elif defined(USE_SIMD_APPLE)
191 return Vector4(this->m128 - other.m128); // simd_float4 supports operator+
192#elif defined(USE_SIMD_NEON)
193 return Vector4(vsubq_f32(this->m128, other.m128));
194#else
195 return Vector4(x - other.x, y - other.y, z - other.z, w - other.w);
196#endif
197 }
198
199 Vector4 operator*(float scalar) const
200 {
201#if defined(USE_SIMD_SSE)
202 Vector4 result;
203 __m128 scalarVec = _mm_set1_ps(scalar); // Broadcast scalar to all 4 lanes
204 result.m128 = _mm_mul_ps(this->m128, scalarVec); // Multiply vectors
205 return result;
206#elif defined(USE_SIMD_APPLE)
207 return Vector4(m128 * scalar); // simd_float4 supports scalar multiplication
208#elif defined(USE_SIMD_NEON)
209 Vector4 result;
210 float32x4_t scalarVec = vdupq_n_f32(scalar); // Broadcast scalar to all 4 lanes
211 result.m128 = vmulq_f32(this->m128, scalarVec); // Multiply vectors
212 return result;
213#else
214 return Vector4(x * scalar, y * scalar, z * scalar, w * scalar);
215#endif
216 }
217
218 // Normalize the plane (A, B, C, D) such that the normal vector (A, B, C) has unit length, and D is scaled accordingly
219 [[nodiscard]] Vector4 planeNormalize() const
220 {
221#if defined(USE_SIMD_SSE)
222 // Dot product of (x, y, z) only
223 __m128 xyz = _mm_set_ps(0.0f, getZ(), getY(), getX()); // w = 0
224 __m128 dot = _mm_mul_ps(xyz, xyz);
225
226 // SSE2 reduction over xyz
227 __m128 shuf = _mm_shuffle_ps(dot, dot, _MM_SHUFFLE(2, 3, 0, 1));
228 __m128 sums = _mm_add_ps(dot, shuf);
229 __m128 lengthSq = _mm_add_ss(sums, _mm_shuffle_ps(sums, sums, _MM_SHUFFLE(1, 1, 1, 1)));
230
231 float len = _mm_cvtss_f32(lengthSq);
232 if (len > 0)
233 {
234 float invLen = 1.0f / std::sqrt(len);
235 __m128 scale = _mm_set1_ps(invLen);
236 __m128 scaled = _mm_mul_ps(m128, scale);
237 alignas(16) float out[4];
238 _mm_store_ps(out, scaled);
239 return {out[0], out[1], out[2], out[3]};
240 }
241 return {};
242#elif defined(USE_SIMD_APPLE)
243 simd_float4 xyz = m128;
244 xyz.w = 0.0f;
245 if (float len = simd_length(xyz); len > 0.0f)
246 {
247 float invLen = 1.0f / len;
248 return Vector4(m128 * invLen);
249 }
250 return {};
251#elif defined(USE_SIMD_NEON)
252 float32x4_t xyz = m128;
253 xyz = vsetq_lane_f32(0.0f, xyz, 3); // zero out w
254
255 float32x4_t dot = vmulq_f32(xyz, xyz);
256 const float32x2_t sum2 = vadd_f32(vget_low_f32(dot), vget_high_f32(dot));
257
258 if (float len = vget_lane_f32(sum2, 0) + vget_lane_f32(sum2, 1); len > 0)
259 {
260 float invLen = 1.0f / std::sqrt(len);
261 const float32x4_t result = vmulq_n_f32(m128, invLen);
262 Vector4 r;
263 r.m128 = result;
264 return r;
265 }
266 return {};
267#else
268 // Plane normalization: normalize by the 3D normal length (x,y,z only), NOT the 4D vector length.
269 // The w component (distance) is scaled by the same factor but must not contribute to the length.
270 const float lenSq = x * x + y * y + z * z;
271 if (lenSq > 0) {
272 const float invLength = 1.0f / std::sqrt(lenSq);
273 return {
274 x * invLength,
275 y * invLength,
276 z * invLength,
277 w * invLength
278 };
279 }
280 // Return a zero plane if the normal vector is degenerate
281 return {};
282#endif
283 }
284
285 // result=x⋅A+y⋅B+z⋅C+1⋅D
286 [[nodiscard]] float planeDotCoord(const Vector4& point) const
287 {
288#if defined(USE_SIMD_SSE)
289 __m128 vec = _mm_insert_ps(point.m128, _mm_set_ss(1.0f), 0x30); // insert 1.0f into lane 3 (w) → [x, y, z, 1]
290 __m128 result = _mm_dp_ps(this->m128, vec, 0xF1); // 0xF1: dot all 4 floats, store in lane 0
291 return _mm_cvtss_f32(result);
292#elif defined(USE_SIMD_APPLE)
293 simd_float4 vec = point.m128;
294 vec.w = 1.0f;
295 return simd_dot(m128, vec);
296#elif defined(USE_SIMD_NEON)
297 float32x4_t vec = point.m128;
298 vec = vsetq_lane_f32(1.0f, vec, 3); // set w = 1
299 const float32x4_t mul = vmulq_f32(this->m128, vec);
300 const float32x2_t sum1 = vadd_f32(vget_low_f32(mul), vget_high_f32(mul));
301 const float32x2_t sum2 = vpadd_f32(sum1, sum1);
302 return vget_lane_f32(sum2, 0);
303#else
304 return x * point.x + y * point.y + z * point.z + w; // use 1.0 for w
305#endif
306 }
307 };
308}
309
310#include "vector4.inl"
3D vector for positions, directions, and normals with multi-backend SIMD acceleration.
Definition vector3.h:29
float dot(const Vector4 &other) const
Definition vector4.h:137
float getX() const
Definition vector4.h:85
Vector4 operator+(const Vector4 &other) const
Definition vector4.h:169
Vector4 planeNormalize() const
Definition vector4.h:219
float planeDotCoord(const Vector4 &point) const
Definition vector4.h:286
float getY() const
Definition vector4.h:98
Vector4(const Vector3 &vec3, float w=0.0f)
Vector4(const float x, const float y, const float z, const float w)
Definition vector4.h:56
Vector4 operator-(const Vector4 &other) const
Definition vector4.h:184
Vector4 operator*(float scalar) const
Definition vector4.h:199
float length() const
Definition vector4.h:160