VisuTwin Canvas
C++ 3D Engine — Metal Backend
Loading...
Searching...
No Matches
matrix4.h
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2025-2026 Arnis Lektauers
3//
4// Created by Arnis Lektauers on 18.07.2025.
5//
6
7#pragma once
8
9#include <cassert>
10#include <iostream>
11#include <iomanip>
12
13#include "defines.h"
14#include "util/simd.h"
15
16namespace visutwin::canvas
17{
18 struct Vector3;
19 struct Vector4;
20 struct Quaternion;
21
30 struct alignas(16) Matrix4
31 {
32 // Column-major layout: m[col][row]
33 union
34 {
35#if defined(USE_SIMD_SSE)
36 __m128 c[4];
37#elif defined(USE_SIMD_APPLE)
38 simd_float4x4 cm;
39#elif defined(USE_SIMD_NEON)
40 float32x4_t c[4];
41#else
42 float m[4][4];
43#endif
44 };
45
46 // Default constructor creates identity matrix (matching upstream Mat4 behavior)
48 {
49#if defined(USE_SIMD_SSE)
50 c[0] = VECTOR4_MASK_X; // [1, 0, 0, 0]
51 c[1] = VECTOR4_MASK_Y; // [0, 1, 0, 0]
52 c[2] = VECTOR4_MASK_Z; // [0, 0, 1, 0]
53 c[3] = VECTOR4_MASK_W; // [0, 0, 0, 1]
54#elif defined(USE_SIMD_APPLE)
55 cm = matrix_identity_float4x4;
56#elif defined(USE_SIMD_NEON)
57 c[0] = vdupq_n_f32(0.0f);
58 c[1] = vdupq_n_f32(0.0f);
59 c[2] = vdupq_n_f32(0.0f);
60 c[3] = vdupq_n_f32(0.0f);
61 c[0] = vsetq_lane_f32(1.0f, c[0], 0);
62 c[1] = vsetq_lane_f32(1.0f, c[1], 1);
63 c[2] = vsetq_lane_f32(1.0f, c[2], 2);
64 c[3] = vsetq_lane_f32(1.0f, c[3], 3);
65#else
66 m[0][0] = 1.0f; m[0][1] = 0.0f; m[0][2] = 0.0f; m[0][3] = 0.0f;
67 m[1][0] = 0.0f; m[1][1] = 1.0f; m[1][2] = 0.0f; m[1][3] = 0.0f;
68 m[2][0] = 0.0f; m[2][1] = 0.0f; m[2][2] = 1.0f; m[2][3] = 0.0f;
69 m[3][0] = 0.0f; m[3][1] = 0.0f; m[3][2] = 0.0f; m[3][3] = 1.0f;
70#endif
71 }
72
73 explicit Matrix4(const Vector4& col0, const Vector4& col1, const Vector4& col2, const Vector4& col3);
74
75#if defined(USE_SIMD_APPLE)
76 explicit Matrix4(const matrix_float4x4& m) : cm(m) {}
77
78 explicit Matrix4(const simd_float4 col0, const simd_float4 col1, const simd_float4 col2, const simd_float4 col3)
79 : cm(simd_matrix(col0, col1, col2, col3)) {}
80#endif
81
82 Matrix4& operator=(const Matrix4& other)
83 {
84 if (this == &other)
85 {
86 return *this; // self-assignment check
87 }
88
89#if defined(USE_SIMD_SSE) || defined(USE_SIMD_NEON)
90 c[0] = other.c[0];
91 c[1] = other.c[1];
92 c[2] = other.c[2];
93 c[3] = other.c[3];
94#elif defined(USE_SIMD_APPLE)
95 cm = other.cm;
96#else
97 for (int col = 0; col < 4; ++col)
98 {
99 for (int row = 0; row < 4; ++row)
100 {
101 m[col][row] = other.m[col][row];
102 }
103 }
104#endif
105 return *this;
106 }
107
109 {
110 return Matrix4();
111 }
112
113 Matrix4 operator*(const Matrix4& rhs) const;
114
115 Vector4 operator*(const Vector4& v) const;
116
117 Vector3 operator*(const Vector3& v) const;
118
119 void print() const
120 {
121 std::cout << std::fixed << std::setprecision(3);
122#if defined(USE_SIMD_SSE)
123 alignas(16) float temp[4];
124 for (int row = 0; row < 4; ++row)
125 {
126 std::cout << "[ ";
127 for (int col = 0; col < 4; ++col)
128 {
129 _mm_store_ps(temp, c[col]); // store c[col] into temp[]
130 std::cout << std::setw(8) << temp[row] << " ";
131 }
132 std::cout << "]\n";
133 }
134#elif defined(USE_SIMD_APPLE)
135 for (int row = 0; row < 4; ++row)
136 {
137 std::cout << "[ ";
138 for (const auto column : cm.columns)
139 {
140 std::cout << std::setw(8) << column[row] << " ";
141 }
142 std::cout << "]\n";
143 }
144#elif defined(USE_SIMD_NEON)
145 alignas(16) float temp[4];
146 for (int row = 0; row < 4; ++row)
147 {
148 std::cout << "[ ";
149 for (int col = 0; col < 4; ++col)
150 {
151 vst1q_f32(temp, c[col]); // store NEON vector to temp
152 std::cout << std::setw(8) << temp[row] << " ";
153 }
154 std::cout << "]\n";
155 }
156#else
157 // Fallback to standard float array
158 for (int row = 0; row < 4; ++row)
159 {
160 std::cout << "[ ";
161 for (int col = 0; col < 4; ++col)
162 {
163 std::cout << std::setw(8) << m[col][row] << " ";
164 }
165 std::cout << "]\n";
166 }
167#endif
168 }
169
170 [[nodiscard]] Matrix4 transpose() const
171 {
172 Matrix4 result;
173#if defined(USE_SIMD_APPLE)
174 const simd_float4x4 transposed = simd_transpose(this->cm);
175 result.cm = transposed;
176#elif defined(USE_SIMD_NEON)
177 // Step 1: Pairwise interleave at 32-bit granularity
178 const float32x4x2_t t01 = vtrnq_f32(this->c[0], this->c[1]);
179 const float32x4x2_t t23 = vtrnq_f32(this->c[2], this->c[3]);
180
181 // Step 2: Combine low/high halves to complete the transpose
182 result.c[0] = vcombine_f32(vget_low_f32(t01.val[0]), vget_low_f32(t23.val[0]));
183 result.c[1] = vcombine_f32(vget_low_f32(t01.val[1]), vget_low_f32(t23.val[1]));
184 result.c[2] = vcombine_f32(vget_high_f32(t01.val[0]), vget_high_f32(t23.val[0]));
185 result.c[3] = vcombine_f32(vget_high_f32(t01.val[1]), vget_high_f32(t23.val[1]));
186#elif defined(USE_SIMD_SSE)
187 __m128 col0 = this->c[0];
188 __m128 col1 = this->c[1];
189 __m128 col2 = this->c[2];
190 __m128 col3 = this->c[3];
191
192 // Perform in-place transpose of four __m128 registers
193 _MM_TRANSPOSE4_PS(col0, col1, col2, col3);
194
195 result.c[0] = col0;
196 result.c[1] = col1;
197 result.c[2] = col2;
198 result.c[3] = col3;
199#else
200 // Scalar fallback: manually transpose
201 for (int col = 0; col < 4; ++col)
202 {
203 for (int row = 0; row < 4; ++row)
204 {
205 result.m[col][row] = this->m[row][col];
206 }
207 }
208#endif
209 return result;
210 }
211
212 [[nodiscard]] Vector4 getColumn(int col) const;
213
214 void setColumn(int col, const Vector4& v);
215
216 // Create an orthographic projection matrix using a left-handed coordinate system
217 // Clip z in [0,1], reverse-Z (near -> 1, far -> 0)
218 static Matrix4 orthographicLHReverseZ(const float viewWidth, const float viewHeight, const float nearZ,
219 const float farZ)
220 {
221 assert(std::abs(viewWidth) > 0.00001f);
222 assert(std::abs(viewHeight) > 0.00001f);
223 assert(std::abs(farZ - nearZ) > 0.00001f);
224
225 const float halfW = 2.0f / viewWidth;
226 const float halfH = 2.0f / viewHeight;
227 const float invDZ = 1.0f / (farZ - nearZ);
228
229 Matrix4 result;
230#if defined(USE_SIMD_SSE)
231 result.c[0] = _mm_set_ps(0.0f, 0.0f, 0.0f, halfW); // [2/w, 0, 0, 0]
232 result.c[1] = _mm_set_ps(0.0f, 0.0f, halfH, 0.0f); // [0, 2/h, 0, 0]
233 result.c[2] = _mm_set_ps(0.0f, -invDZ, 0.0f, 0.0f); // [0, 0, -invDZ, 0]
234 result.c[3] = _mm_set_ps(1.0f, farZ * invDZ, 0.0f, 0.0f); // [0, 0, farZ*invDZ, 1]
235#elif defined(USE_SIMD_APPLE)
236 result.cm.columns[0] = simd_make_float4(halfW, 0.0f, 0.0f, 0.0f);
237 result.cm.columns[1] = simd_make_float4(0.0f, halfH, 0.0f, 0.0f);
238 result.cm.columns[2] = simd_make_float4(0.0f, 0.0f, -invDZ, 0.0f);
239 result.cm.columns[3] = simd_make_float4(0.0f, 0.0f, farZ * invDZ, 1.0f);
240#elif defined(USE_SIMD_NEON)
241 result.c[0] = vdupq_n_f32(0.0f);
242 result.c[1] = vdupq_n_f32(0.0f);
243 result.c[2] = vdupq_n_f32(0.0f);
244 result.c[3] = vdupq_n_f32(0.0f);
245
246 result.c[0] = vsetq_lane_f32(halfW, result.c[0], 0);
247 result.c[1] = vsetq_lane_f32(halfH, result.c[1], 1);
248 result.c[2] = vsetq_lane_f32(-invDZ, result.c[2], 2);
249 result.c[3] = vsetq_lane_f32(farZ * invDZ, result.c[3], 2);
250 result.c[3] = vsetq_lane_f32(1.0f, result.c[3], 3);
251#else
252 result.m[0][0] = halfW; result.m[0][1] = 0.0f; result.m[0][2] = 0.0f; result.m[0][3] = 0.0f;
253 result.m[1][0] = 0.0f; result.m[1][1] = halfH; result.m[1][2] = 0.0f; result.m[1][3] = 0.0f;
254 result.m[2][0] = 0.0f; result.m[2][1] = 0.0f; result.m[2][2] = -invDZ; result.m[2][3] = 0.0f;
255 result.m[3][0] = 0.0f; result.m[3][1] = 0.0f; result.m[3][2] = farZ * invDZ; result.m[3][3] = 1.0f;
256#endif
257 return result;
258 }
259
260 // Clip z in [0,1], reverse-Z (near -> 1, far -> 0)
261 static Matrix4 perspectiveFovLHReverseZ(const float fovY, const float aspect, const float zNear, const float zFar)
262 {
263 assert(zNear > 0.f && zFar > 0.f);
264 assert(std::abs(fovY) > 0.00001f * 2.0f);
265 assert(std::abs(aspect) > 0.00001f);
266 assert(std::abs(zFar - zNear) > 0.00001f);
267
268 const float yScale = 1.0f / std::tan(fovY * 0.5f); // same as cosFov / sinFov
269 const float xScale = yScale / aspect;
270 const float fRange = zNear / (zFar - zNear);
271 const float fTranslate = zFar * fRange;
272
273 Matrix4 result;
274#if defined(USE_SIMD_SSE)
275 result.c[0] = _mm_set_ps(0.0f, 0.0f, 0.0f, xScale);
276 result.c[1] = _mm_set_ps(0.0f, 0.0f, yScale, 0.0f);
277 result.c[2] = _mm_set_ps(1.0f, fRange, 0.0f, 0.0f);
278 result.c[3] = _mm_set_ps(0.0f, fTranslate, 0.0f, 0.0f);
279#elif defined(USE_SIMD_APPLE)
280 result.cm.columns[0] = simd_make_float4(xScale, 0.0f, 0.0f, 0.0f);
281 result.cm.columns[1] = simd_make_float4(0.0f, yScale, 0.0f, 0.0f);
282 result.cm.columns[2] = simd_make_float4(0.0f, 0.0f, fRange, 1.0f);
283 result.cm.columns[3] = simd_make_float4(0.0f, 0.0f, fTranslate, 0.0f);
284#elif defined(USE_SIMD_NEON)
285 result.c[0] = vdupq_n_f32(0.0f);
286 result.c[1] = vdupq_n_f32(0.0f);
287 result.c[2] = vdupq_n_f32(0.0f);
288 result.c[3] = vdupq_n_f32(0.0f);
289
290 result.c[0] = vsetq_lane_f32(xScale, result.c[0], 0);
291 result.c[1] = vsetq_lane_f32(yScale, result.c[1], 1);
292 result.c[2] = vsetq_lane_f32(fRange, result.c[2], 2);
293 result.c[2] = vsetq_lane_f32(1.0f, result.c[2], 3);
294 result.c[3] = vsetq_lane_f32(fTranslate, result.c[3], 2);
295#else
296 result.m[0][0] = xScale; result.m[0][1] = 0.0f; result.m[0][2] = 0.0f; result.m[0][3] = 0.0f;
297 result.m[1][0] = 0.0f; result.m[1][1] = yScale; result.m[1][2] = 0.0f; result.m[1][3] = 0.0f;
298 result.m[2][0] = 0.0f; result.m[2][1] = 0.0f; result.m[2][2] = fRange; result.m[2][3] = 1.0f;
299 result.m[3][0] = 0.0f; result.m[3][1] = 0.0f; result.m[3][2] = fTranslate; result.m[3][3] = 0.0f;
300#endif
301 return result;
302 }
303
304 static Matrix4 translation(float x, float y, float z)
305 {
306 Matrix4 result; // Zero-initialized
307#if defined(USE_SIMD_SSE)
308 result.c[0] = _mm_set_ps(0.0f, 0.0f, 0.0f, 1.0f); // [1, 0, 0, 0]
309 result.c[1] = _mm_set_ps(0.0f, 0.0f, 1.0f, 0.0f); // [0, 1, 0, 0]
310 result.c[2] = _mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f); // [0, 0, 1, 0]
311 result.c[3] = _mm_set_ps(1.0f, z, y, x); // [x, y, z, 1]
312#elif defined(USE_SIMD_APPLE)
313 result.cm.columns[0] = simd_make_float4(1.0f, 0.0f, 0.0f, 0.0f); // X-axis basis
314 result.cm.columns[1] = simd_make_float4(0.0f, 1.0f, 0.0f, 0.0f); // Y-axis basis
315 result.cm.columns[2] = simd_make_float4(0.0f, 0.0f, 1.0f, 0.0f); // Z-axis basis
316 result.cm.columns[3] = simd_make_float4(x, y, z, 1.0f); // Translation column
317#elif defined(USE_SIMD_NEON)
318 result.c[0] = vdupq_n_f32(0.0f);
319 result.c[1] = vdupq_n_f32(0.0f);
320 result.c[2] = vdupq_n_f32(0.0f);
321 result.c[3] = vdupq_n_f32(0.0f);
322
323 result.c[0] = vsetq_lane_f32(1.0f, result.c[0], 0); // m[0][0] = 1
324 result.c[1] = vsetq_lane_f32(1.0f, result.c[1], 1); // m[1][1] = 1
325 result.c[2] = vsetq_lane_f32(1.0f, result.c[2], 2); // m[2][2] = 1
326
327 result.c[3] = vsetq_lane_f32(x, result.c[3], 0); // m[3][0] = x
328 result.c[3] = vsetq_lane_f32(y, result.c[3], 1); // m[3][1] = y
329 result.c[3] = vsetq_lane_f32(z, result.c[3], 2); // m[3][2] = z
330 result.c[3] = vsetq_lane_f32(1.0f, result.c[3], 3); // m[3][3] = 1
331#else
332 result.m[0][0] = 1.0f;
333 result.m[1][1] = 1.0f;
334 result.m[2][2] = 1.0f;
335 result.m[3][3] = 1.0f;
336
337 result.m[3][0] = x;
338 result.m[3][1] = y;
339 result.m[3][2] = z;
340#endif
341 return result;
342 }
343
344 // Returns left-handed view matrix, looking from the eye in dir direction with up
345 static Matrix4 lookToLH(const Vector3& eye, const Vector3& dir, const Vector3& up);
346
347 [[nodiscard]] Matrix4 inverse() const;
348
349 [[nodiscard]] Vector3 getPosition() const;
350
351 [[nodiscard]] Quaternion getRotation() const;
352
353 [[nodiscard]] Vector3 getTranslation() const;
354
355 [[nodiscard]] float getElement(const int col, int row) const {
356 assert(0 <= col && col < 4 && 0 <= row && row < 4);
357#if defined(USE_SIMD_APPLE)
358 return cm.columns[col][row];
359#elif defined(USE_SIMD_SSE)
360 alignas(16) float tmp[4];
361 _mm_store_ps(tmp, c[col]);
362 return tmp[row];
363#elif defined(USE_SIMD_NEON)
364 switch (row) {
365 case 0: return vgetq_lane_f32(c[col], 0);
366 case 1: return vgetq_lane_f32(c[col], 1);
367 case 2: return vgetq_lane_f32(c[col], 2);
368 case 3: return vgetq_lane_f32(c[col], 3);
369 default: return 0.0f;
370 }
371#else
372 return m[col][row];
373#endif
374 }
375
376 void setElement(const int col, int row, const float value) {
377 assert(0 <= col && col < 4 && 0 <= row && row < 4);
378#if defined(USE_SIMD_APPLE)
379 cm.columns[col][row] = value;
380#elif defined(USE_SIMD_SSE)
381 alignas(16) float tmp[4];
382 _mm_store_ps(tmp, c[col]);
383 tmp[row] = value;
384 c[col] = _mm_load_ps(tmp);
385#elif defined(USE_SIMD_NEON)
386 switch (row) {
387 case 0: c[col] = vsetq_lane_f32(value, c[col], 0); break;
388 case 1: c[col] = vsetq_lane_f32(value, c[col], 1); break;
389 case 2: c[col] = vsetq_lane_f32(value, c[col], 2); break;
390 case 3: c[col] = vsetq_lane_f32(value, c[col], 3); break;
391 default: break;
392 }
393#else
394 m[col][row] = value;
395#endif
396 }
397
402
407
415 Matrix4 mulAffine(const Matrix4& rhs) const;
416
417 static Matrix4 perspective(float fov, float aspect, float zNear, float zFar, bool fovIsHorizontal = false);
418
419 static Matrix4 frustum(float left, float right, float bottom, float top, float zNear, float zFar);
420
421 static Matrix4 ortho(float left, float right, float bottom, float top, float near, float far);
422
429 static Matrix4 trs(const Vector3& t, const Quaternion& r, const Vector3& s);
430
438 static Matrix4 reflection(float nx, float ny, float nz, float distance)
439 {
440 const float a = nx, b = ny, c = nz;
441 Matrix4 r;
442 r.setElement(0, 0, 1.0f - 2.0f * a * a);
443 r.setElement(0, 1, -2.0f * a * b);
444 r.setElement(0, 2, -2.0f * a * c);
445 r.setElement(0, 3, 0.0f);
446
447 r.setElement(1, 0, -2.0f * a * b);
448 r.setElement(1, 1, 1.0f - 2.0f * b * b);
449 r.setElement(1, 2, -2.0f * b * c);
450 r.setElement(1, 3, 0.0f);
451
452 r.setElement(2, 0, -2.0f * a * c);
453 r.setElement(2, 1, -2.0f * b * c);
454 r.setElement(2, 2, 1.0f - 2.0f * c * c);
455 r.setElement(2, 3, 0.0f);
456
457 r.setElement(3, 0, -2.0f * a * distance);
458 r.setElement(3, 1, -2.0f * b * distance);
459 r.setElement(3, 2, -2.0f * c * distance);
460 r.setElement(3, 3, 1.0f);
461 return r;
462 }
463 };
464}
465
466#include "matrix4.inl"
467#include "matrix4Inverse.inl"
void setElement(const int col, int row, const float value)
Definition matrix4.h:376
static Matrix4 reflection(float nx, float ny, float nz, float distance)
Definition matrix4.h:438
static Matrix4 frustum(float left, float right, float bottom, float top, float zNear, float zFar)
static Matrix4 identity()
Definition matrix4.h:108
static Matrix4 translation(float x, float y, float z)
Definition matrix4.h:304
Vector3 transformPoint(const Vector3 &v) const
static Matrix4 lookToLH(const Vector3 &eye, const Vector3 &dir, const Vector3 &up)
static Matrix4 orthographicLHReverseZ(const float viewWidth, const float viewHeight, const float nearZ, const float farZ)
Definition matrix4.h:218
Quaternion getRotation() const
Matrix4 inverse() const
Matrix4 & operator=(const Matrix4 &other)
Definition matrix4.h:82
Matrix4 operator*(const Matrix4 &rhs) const
Vector3 getScale() const
void setColumn(int col, const Vector4 &v)
Matrix4 mulAffine(const Matrix4 &rhs) const
Vector3 operator*(const Vector3 &v) const
Vector4 operator*(const Vector4 &v) const
static Matrix4 perspectiveFovLHReverseZ(const float fovY, const float aspect, const float zNear, const float zFar)
Definition matrix4.h:261
static Matrix4 ortho(float left, float right, float bottom, float top, float near, float far)
Vector3 getTranslation() const
Matrix4 transpose() const
Definition matrix4.h:170
Vector4 getColumn(int col) const
static Matrix4 perspective(float fov, float aspect, float zNear, float zFar, bool fovIsHorizontal=false)
static Matrix4 trs(const Vector3 &t, const Quaternion &r, const Vector3 &s)
Vector3 getPosition() const
float getElement(const int col, int row) const
Definition matrix4.h:355
Matrix4(const Vector4 &col0, const Vector4 &col1, const Vector4 &col2, const Vector4 &col3)
Unit quaternion for rotation representation with SIMD-accelerated slerp and multiply.
Definition quaternion.h:20
3D vector for positions, directions, and normals with multi-backend SIMD acceleration.
Definition vector3.h:29
4D vector for homogeneous coordinates, color values, and SIMD operations.
Definition vector4.h:20