VisuTwin Canvas
C++ 3D Engine — Metal Backend
Loading...
Searching...
No Matches
skinBatchInstance.cpp
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2025-2026 Arnis Lektauers
3//
4// SkinBatchInstance implementation.
5//
6//
7//
8#include "skinBatchInstance.h"
9
10#include "core/math/matrix4.h"
11
12namespace visutwin::canvas
13{
14 SkinBatchInstance::SkinBatchInstance(std::vector<GraphNode*> nodes)
15 : _nodes(std::move(nodes))
16 {
17 // Pre-allocate palette: 16 floats (float4x4) per bone.
18 _palette.resize(_nodes.size() * 16, 0.0f);
19 }
20
22 {
23 //
24 //
25 // upstream packs 4x3 matrices (12 floats per bone) to save texture
26 // space. We use float4x4 (16 floats per bone) for simpler Metal
27 // shader code — the buffer approach has no texture dimension constraints.
28 //
29 // DEVIATION: upstream stores transposed rows; we store column-major
30 // float4x4 directly from Matrix4, matching Metal's column-major convention.
31 //
32 // Matrix4 is 64 bytes (4 columns × 4 floats × 4 bytes) on all backends
33 // (scalar float[4][4], SSE __m128[4], NEON float32x4_t[4], Apple
34 // simd_float4x4) — all column-major, contiguous in memory. A single
35 // memcpy per bone is 8-16× faster than 16 getElement() calls, which on
36 // SSE/NEON each do a store-to-temp + scalar extract round-trip.
37 static_assert(sizeof(Matrix4) == 64, "Matrix4 must be 64 bytes for palette memcpy");
38
39 const int count = static_cast<int>(_nodes.size());
40 for (int i = 0; i < count; ++i)
41 {
42 const Matrix4& wt = _nodes[i]->worldTransform();
43 std::memcpy(&_palette[i * 16], &wt, 64);
44 }
45 }
46
47} // namespace visutwin::canvas
SkinBatchInstance(std::vector< GraphNode * > nodes)
4x4 column-major transformation matrix with SIMD acceleration.
Definition matrix4.h:31