35#if defined(USE_SIMD_SSE)
37#elif defined(USE_SIMD_APPLE)
39#elif defined(USE_SIMD_NEON)
49#if defined(USE_SIMD_SSE)
50 c[0] = VECTOR4_MASK_X;
51 c[1] = VECTOR4_MASK_Y;
52 c[2] = VECTOR4_MASK_Z;
53 c[3] = VECTOR4_MASK_W;
54#elif defined(USE_SIMD_APPLE)
55 cm = matrix_identity_float4x4;
56#elif defined(USE_SIMD_NEON)
57 c[0] = vdupq_n_f32(0.0f);
58 c[1] = vdupq_n_f32(0.0f);
59 c[2] = vdupq_n_f32(0.0f);
60 c[3] = vdupq_n_f32(0.0f);
61 c[0] = vsetq_lane_f32(1.0f, c[0], 0);
62 c[1] = vsetq_lane_f32(1.0f, c[1], 1);
63 c[2] = vsetq_lane_f32(1.0f, c[2], 2);
64 c[3] = vsetq_lane_f32(1.0f, c[3], 3);
66 m[0][0] = 1.0f;
m[0][1] = 0.0f;
m[0][2] = 0.0f;
m[0][3] = 0.0f;
67 m[1][0] = 0.0f;
m[1][1] = 1.0f;
m[1][2] = 0.0f;
m[1][3] = 0.0f;
68 m[2][0] = 0.0f;
m[2][1] = 0.0f;
m[2][2] = 1.0f;
m[2][3] = 0.0f;
69 m[3][0] = 0.0f;
m[3][1] = 0.0f;
m[3][2] = 0.0f;
m[3][3] = 1.0f;
75#if defined(USE_SIMD_APPLE)
76 explicit Matrix4(
const matrix_float4x4&
m) : cm(
m) {}
78 explicit Matrix4(
const simd_float4 col0,
const simd_float4 col1,
const simd_float4 col2,
const simd_float4 col3)
79 : cm(simd_matrix(col0, col1, col2, col3)) {}
89#if defined(USE_SIMD_SSE) || defined(USE_SIMD_NEON)
94#elif defined(USE_SIMD_APPLE)
97 for (
int col = 0; col < 4; ++col)
99 for (
int row = 0; row < 4; ++row)
101 m[col][row] = other.
m[col][row];
121 std::cout << std::fixed << std::setprecision(3);
122#if defined(USE_SIMD_SSE)
123 alignas(16)
float temp[4];
124 for (
int row = 0; row < 4; ++row)
127 for (
int col = 0; col < 4; ++col)
129 _mm_store_ps(temp, c[col]);
130 std::cout << std::setw(8) << temp[row] <<
" ";
134#elif defined(USE_SIMD_APPLE)
135 for (
int row = 0; row < 4; ++row)
138 for (
const auto column : cm.columns)
140 std::cout << std::setw(8) << column[row] <<
" ";
144#elif defined(USE_SIMD_NEON)
145 alignas(16)
float temp[4];
146 for (
int row = 0; row < 4; ++row)
149 for (
int col = 0; col < 4; ++col)
151 vst1q_f32(temp, c[col]);
152 std::cout << std::setw(8) << temp[row] <<
" ";
158 for (
int row = 0; row < 4; ++row)
161 for (
int col = 0; col < 4; ++col)
163 std::cout << std::setw(8) <<
m[col][row] <<
" ";
173#if defined(USE_SIMD_APPLE)
174 const simd_float4x4 transposed = simd_transpose(this->cm);
175 result.cm = transposed;
176#elif defined(USE_SIMD_NEON)
178 const float32x4x2_t t01 = vtrnq_f32(this->c[0], this->c[1]);
179 const float32x4x2_t t23 = vtrnq_f32(this->c[2], this->c[3]);
182 result.c[0] = vcombine_f32(vget_low_f32(t01.val[0]), vget_low_f32(t23.val[0]));
183 result.c[1] = vcombine_f32(vget_low_f32(t01.val[1]), vget_low_f32(t23.val[1]));
184 result.c[2] = vcombine_f32(vget_high_f32(t01.val[0]), vget_high_f32(t23.val[0]));
185 result.c[3] = vcombine_f32(vget_high_f32(t01.val[1]), vget_high_f32(t23.val[1]));
186#elif defined(USE_SIMD_SSE)
187 __m128 col0 = this->c[0];
188 __m128 col1 = this->c[1];
189 __m128 col2 = this->c[2];
190 __m128 col3 = this->c[3];
193 _MM_TRANSPOSE4_PS(col0, col1, col2, col3);
201 for (
int col = 0; col < 4; ++col)
203 for (
int row = 0; row < 4; ++row)
205 result.
m[col][row] = this->
m[row][col];
221 assert(std::abs(viewWidth) > 0.00001f);
222 assert(std::abs(viewHeight) > 0.00001f);
223 assert(std::abs(farZ - nearZ) > 0.00001f);
225 const float halfW = 2.0f / viewWidth;
226 const float halfH = 2.0f / viewHeight;
227 const float invDZ = 1.0f / (farZ - nearZ);
230#if defined(USE_SIMD_SSE)
231 result.c[0] = _mm_set_ps(0.0f, 0.0f, 0.0f, halfW);
232 result.c[1] = _mm_set_ps(0.0f, 0.0f, halfH, 0.0f);
233 result.c[2] = _mm_set_ps(0.0f, -invDZ, 0.0f, 0.0f);
234 result.c[3] = _mm_set_ps(1.0f, farZ * invDZ, 0.0f, 0.0f);
235#elif defined(USE_SIMD_APPLE)
236 result.cm.columns[0] = simd_make_float4(halfW, 0.0f, 0.0f, 0.0f);
237 result.cm.columns[1] = simd_make_float4(0.0f, halfH, 0.0f, 0.0f);
238 result.cm.columns[2] = simd_make_float4(0.0f, 0.0f, -invDZ, 0.0f);
239 result.cm.columns[3] = simd_make_float4(0.0f, 0.0f, farZ * invDZ, 1.0f);
240#elif defined(USE_SIMD_NEON)
241 result.c[0] = vdupq_n_f32(0.0f);
242 result.c[1] = vdupq_n_f32(0.0f);
243 result.c[2] = vdupq_n_f32(0.0f);
244 result.c[3] = vdupq_n_f32(0.0f);
246 result.c[0] = vsetq_lane_f32(halfW, result.c[0], 0);
247 result.c[1] = vsetq_lane_f32(halfH, result.c[1], 1);
248 result.c[2] = vsetq_lane_f32(-invDZ, result.c[2], 2);
249 result.c[3] = vsetq_lane_f32(farZ * invDZ, result.c[3], 2);
250 result.c[3] = vsetq_lane_f32(1.0f, result.c[3], 3);
252 result.
m[0][0] = halfW; result.
m[0][1] = 0.0f; result.
m[0][2] = 0.0f; result.
m[0][3] = 0.0f;
253 result.
m[1][0] = 0.0f; result.
m[1][1] = halfH; result.
m[1][2] = 0.0f; result.
m[1][3] = 0.0f;
254 result.
m[2][0] = 0.0f; result.
m[2][1] = 0.0f; result.
m[2][2] = -invDZ; result.
m[2][3] = 0.0f;
255 result.
m[3][0] = 0.0f; result.
m[3][1] = 0.0f; result.
m[3][2] = farZ * invDZ; result.
m[3][3] = 1.0f;
263 assert(zNear > 0.f && zFar > 0.f);
264 assert(std::abs(fovY) > 0.00001f * 2.0f);
265 assert(std::abs(aspect) > 0.00001f);
266 assert(std::abs(zFar - zNear) > 0.00001f);
268 const float yScale = 1.0f / std::tan(fovY * 0.5f);
269 const float xScale = yScale / aspect;
270 const float fRange = zNear / (zFar - zNear);
271 const float fTranslate = zFar * fRange;
274#if defined(USE_SIMD_SSE)
275 result.c[0] = _mm_set_ps(0.0f, 0.0f, 0.0f, xScale);
276 result.c[1] = _mm_set_ps(0.0f, 0.0f, yScale, 0.0f);
277 result.c[2] = _mm_set_ps(1.0f, fRange, 0.0f, 0.0f);
278 result.c[3] = _mm_set_ps(0.0f, fTranslate, 0.0f, 0.0f);
279#elif defined(USE_SIMD_APPLE)
280 result.cm.columns[0] = simd_make_float4(xScale, 0.0f, 0.0f, 0.0f);
281 result.cm.columns[1] = simd_make_float4(0.0f, yScale, 0.0f, 0.0f);
282 result.cm.columns[2] = simd_make_float4(0.0f, 0.0f, fRange, 1.0f);
283 result.cm.columns[3] = simd_make_float4(0.0f, 0.0f, fTranslate, 0.0f);
284#elif defined(USE_SIMD_NEON)
285 result.c[0] = vdupq_n_f32(0.0f);
286 result.c[1] = vdupq_n_f32(0.0f);
287 result.c[2] = vdupq_n_f32(0.0f);
288 result.c[3] = vdupq_n_f32(0.0f);
290 result.c[0] = vsetq_lane_f32(xScale, result.c[0], 0);
291 result.c[1] = vsetq_lane_f32(yScale, result.c[1], 1);
292 result.c[2] = vsetq_lane_f32(fRange, result.c[2], 2);
293 result.c[2] = vsetq_lane_f32(1.0f, result.c[2], 3);
294 result.c[3] = vsetq_lane_f32(fTranslate, result.c[3], 2);
296 result.
m[0][0] = xScale; result.
m[0][1] = 0.0f; result.
m[0][2] = 0.0f; result.
m[0][3] = 0.0f;
297 result.
m[1][0] = 0.0f; result.
m[1][1] = yScale; result.
m[1][2] = 0.0f; result.
m[1][3] = 0.0f;
298 result.
m[2][0] = 0.0f; result.
m[2][1] = 0.0f; result.
m[2][2] = fRange; result.
m[2][3] = 1.0f;
299 result.
m[3][0] = 0.0f; result.
m[3][1] = 0.0f; result.
m[3][2] = fTranslate; result.
m[3][3] = 0.0f;
307#if defined(USE_SIMD_SSE)
308 result.c[0] = _mm_set_ps(0.0f, 0.0f, 0.0f, 1.0f);
309 result.c[1] = _mm_set_ps(0.0f, 0.0f, 1.0f, 0.0f);
310 result.c[2] = _mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f);
311 result.c[3] = _mm_set_ps(1.0f, z, y, x);
312#elif defined(USE_SIMD_APPLE)
313 result.cm.columns[0] = simd_make_float4(1.0f, 0.0f, 0.0f, 0.0f);
314 result.cm.columns[1] = simd_make_float4(0.0f, 1.0f, 0.0f, 0.0f);
315 result.cm.columns[2] = simd_make_float4(0.0f, 0.0f, 1.0f, 0.0f);
316 result.cm.columns[3] = simd_make_float4(x, y, z, 1.0f);
317#elif defined(USE_SIMD_NEON)
318 result.c[0] = vdupq_n_f32(0.0f);
319 result.c[1] = vdupq_n_f32(0.0f);
320 result.c[2] = vdupq_n_f32(0.0f);
321 result.c[3] = vdupq_n_f32(0.0f);
323 result.c[0] = vsetq_lane_f32(1.0f, result.c[0], 0);
324 result.c[1] = vsetq_lane_f32(1.0f, result.c[1], 1);
325 result.c[2] = vsetq_lane_f32(1.0f, result.c[2], 2);
327 result.c[3] = vsetq_lane_f32(x, result.c[3], 0);
328 result.c[3] = vsetq_lane_f32(y, result.c[3], 1);
329 result.c[3] = vsetq_lane_f32(z, result.c[3], 2);
330 result.c[3] = vsetq_lane_f32(1.0f, result.c[3], 3);
332 result.
m[0][0] = 1.0f;
333 result.
m[1][1] = 1.0f;
334 result.
m[2][2] = 1.0f;
335 result.
m[3][3] = 1.0f;
355 [[nodiscard]]
float getElement(
const int col,
int row)
const {
356 assert(0 <= col && col < 4 && 0 <= row && row < 4);
357#if defined(USE_SIMD_APPLE)
358 return cm.columns[col][row];
359#elif defined(USE_SIMD_SSE)
360 alignas(16)
float tmp[4];
361 _mm_store_ps(tmp, c[col]);
363#elif defined(USE_SIMD_NEON)
365 case 0:
return vgetq_lane_f32(c[col], 0);
366 case 1:
return vgetq_lane_f32(c[col], 1);
367 case 2:
return vgetq_lane_f32(c[col], 2);
368 case 3:
return vgetq_lane_f32(c[col], 3);
369 default:
return 0.0f;
377 assert(0 <= col && col < 4 && 0 <= row && row < 4);
378#if defined(USE_SIMD_APPLE)
379 cm.columns[col][row] = value;
380#elif defined(USE_SIMD_SSE)
381 alignas(16)
float tmp[4];
382 _mm_store_ps(tmp, c[col]);
384 c[col] = _mm_load_ps(tmp);
385#elif defined(USE_SIMD_NEON)
387 case 0: c[col] = vsetq_lane_f32(value, c[col], 0);
break;
388 case 1: c[col] = vsetq_lane_f32(value, c[col], 1);
break;
389 case 2: c[col] = vsetq_lane_f32(value, c[col], 2);
break;
390 case 3: c[col] = vsetq_lane_f32(value, c[col], 3);
break;
417 static Matrix4 perspective(
float fov,
float aspect,
float zNear,
float zFar,
bool fovIsHorizontal =
false);
419 static Matrix4 frustum(
float left,
float right,
float bottom,
float top,
float zNear,
float zFar);
421 static Matrix4 ortho(
float left,
float right,
float bottom,
float top,
float near,
float far);
440 const float a = nx, b = ny, c = nz;
466#include "matrix4.inl"
467#include "matrix4Inverse.inl"
void setElement(const int col, int row, const float value)
static Matrix4 reflection(float nx, float ny, float nz, float distance)
static Matrix4 frustum(float left, float right, float bottom, float top, float zNear, float zFar)
static Matrix4 identity()
static Matrix4 translation(float x, float y, float z)
Vector3 transformPoint(const Vector3 &v) const
static Matrix4 lookToLH(const Vector3 &eye, const Vector3 &dir, const Vector3 &up)
static Matrix4 orthographicLHReverseZ(const float viewWidth, const float viewHeight, const float nearZ, const float farZ)
Quaternion getRotation() const
Matrix4 & operator=(const Matrix4 &other)
Matrix4 operator*(const Matrix4 &rhs) const
void setColumn(int col, const Vector4 &v)
Matrix4 mulAffine(const Matrix4 &rhs) const
Vector3 operator*(const Vector3 &v) const
Vector4 operator*(const Vector4 &v) const
static Matrix4 perspectiveFovLHReverseZ(const float fovY, const float aspect, const float zNear, const float zFar)
static Matrix4 ortho(float left, float right, float bottom, float top, float near, float far)
Vector3 getTranslation() const
Matrix4 transpose() const
Vector4 getColumn(int col) const
static Matrix4 perspective(float fov, float aspect, float zNear, float zFar, bool fovIsHorizontal=false)
static Matrix4 trs(const Vector3 &t, const Quaternion &r, const Vector3 &s)
Vector3 getPosition() const
float getElement(const int col, int row) const
Matrix4(const Vector4 &col0, const Vector4 &col1, const Vector4 &col2, const Vector4 &col3)
Unit quaternion for rotation representation with SIMD-accelerated slerp and multiply.
3D vector for positions, directions, and normals with multi-backend SIMD acceleration.
4D vector for homogeneous coordinates, color values, and SIMD operations.