VisuTwin Canvas
C++ 3D Engine — Metal Backend
Loading...
Searching...
No Matches
metalParticleComputePass.cpp
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2025-2026 Arnis Lektauers
3//
4// Metal compute pass for GPU particle advection — implementation.
5//
6// Contains the embedded MSL compute kernel and the CPU-side dispatch logic.
7//
8// Custom shader — no upstream GLSL equivalent exists.
9//
11
12#include "metalGraphicsDevice.h"
13#include "metalTexture.h"
14#include "spdlog/spdlog.h"
15
16namespace visutwin::canvas
17{
18 namespace
19 {
20 // ── Embedded Metal Shading Language ─────────────────────────────
21 //
22 // Compute kernel: advect particles through a 3D velocity field.
23 //
24 // Each thread processes one particle. Integration uses classical
25 // RK4 with hardware trilinear interpolation of the velocity
26 // texture (RGBA32Float: xyz = velocity, w = magnitude).
27 //
28 // Particle lifecycle:
29 // - age += dt each step
30 // - If age >= lifetime → respawn at seedPosition with new lifetime
31 // - If position exits domain → respawn
32 //
33 // The output buffer is suitable for direct rendering as point
34 // primitives (position is at offset 0).
35 //
36 constexpr const char* PARTICLE_COMPUTE_SOURCE = R"(
37#include <metal_stdlib>
38using namespace metal;
39
40// Must match GPUParticle in particleSystem3d.h (48 bytes, 16-byte aligned).
41struct Particle {
42 packed_float3 position; // 12
43 float age; // 4 → 16
44 packed_float3 velocity; // 12
45 float lifetime; // 4 → 32
46 packed_float3 seedPosition; // 12
47 uint flags; // 4 → 48
48};
49
50struct Uniforms {
51 packed_float3 domainMin;
52 float dt;
53 packed_float3 domainMax;
54 uint particleCount;
55 packed_float3 invDomainSize;
56 float time;
57 float speedMin;
58 float speedMax;
59 float fadeStart;
60 float padding;
61};
62
63// Convert world position to [0,1] texture coordinates for the velocity field.
64inline float3 worldToUVW(float3 pos, float3 dMin, float3 invSize)
65{
66 return (pos - dMin) * invSize;
67}
68
69// Sample velocity field at a world-space position.
70// Returns zero if outside domain.
71inline float3 sampleVelocity(float3 worldPos,
72 texture3d<float> field,
73 sampler fieldSampler,
74 float3 dMin, float3 dMax, float3 invSize)
75{
76 float3 uvw = worldToUVW(worldPos, dMin, invSize);
77 // Clamp to [0,1] — out-of-bounds positions get boundary velocity
78 uvw = clamp(uvw, float3(0.0), float3(1.0));
79 return field.sample(fieldSampler, uvw).xyz;
80}
81
82// Simple hash for per-particle pseudo-random lifetime variation.
83inline float hashFloat(uint seed)
84{
85 seed = (seed ^ 61u) ^ (seed >> 16u);
86 seed *= 9u;
87 seed = seed ^ (seed >> 4u);
88 seed *= 0x27d4eb2du;
89 seed = seed ^ (seed >> 15u);
90 return float(seed) / float(0xFFFFFFFFu);
91}
92
93kernel void advectParticles(
94 device Particle* particles [[buffer(0)]],
95 constant Uniforms& uniforms [[buffer(1)]],
96 texture3d<float> velocityField [[texture(0)]],
97 sampler fieldSampler [[sampler(0)]],
98 uint gid [[thread_position_in_grid]])
99{
100 if (gid >= uniforms.particleCount) return;
101
102 Particle p = particles[gid];
103
104 // Skip dead particles
105 if ((p.flags & 1u) == 0u) return;
106
107 const float dt = uniforms.dt;
108 const float3 dMin = float3(uniforms.domainMin);
109 const float3 dMax = float3(uniforms.domainMax);
110 const float3 invSize = float3(uniforms.invDomainSize);
111
112 // ── Age ──────────────────────────────────────────────────────────
113 p.age += dt;
114
115 // ── Respawn if expired ───────────────────────────────────────────
116 if (p.age >= p.lifetime) {
117 p.position = p.seedPosition;
118 p.velocity = packed_float3(0.0);
119 p.age = 0.0;
120 // Vary lifetime using particle index + time as seed
121 float h = hashFloat(gid + as_type<uint>(uniforms.time));
122 float minLife = 2.0; // seconds
123 float maxLife = 8.0;
124 p.lifetime = minLife + h * (maxLife - minLife);
125 particles[gid] = p;
126 return;
127 }
128
129 // ── RK4 advection ────────────────────────────────────────────────
130 float3 pos = float3(p.position);
131
132 float3 k1 = sampleVelocity(pos, velocityField, fieldSampler, dMin, dMax, invSize);
133 float3 k2 = sampleVelocity(pos + 0.5 * dt * k1, velocityField, fieldSampler, dMin, dMax, invSize);
134 float3 k3 = sampleVelocity(pos + 0.5 * dt * k2, velocityField, fieldSampler, dMin, dMax, invSize);
135 float3 k4 = sampleVelocity(pos + dt * k3, velocityField, fieldSampler, dMin, dMax, invSize);
136
137 float3 newPos = pos + (dt / 6.0) * (k1 + 2.0*k2 + 2.0*k3 + k4);
138
139 // ── Boundary check ───────────────────────────────────────────────
140 float3 uvw = worldToUVW(newPos, dMin, invSize);
141 if (any(uvw < float3(0.0)) || any(uvw > float3(1.0))) {
142 // Respawn at seed
143 p.position = p.seedPosition;
144 p.velocity = packed_float3(0.0);
145 p.age = 0.0;
146 float h = hashFloat(gid + as_type<uint>(uniforms.time) + 0x12345678u);
147 p.lifetime = 2.0 + h * 6.0;
148 } else {
149 p.position = packed_float3(newPos);
150 p.velocity = packed_float3(sampleVelocity(newPos, velocityField, fieldSampler, dMin, dMax, invSize));
151 }
152
153 particles[gid] = p;
154}
155)";
156
157 constexpr uint32_t THREADS_PER_GROUP = 256;
158
159 } // anonymous namespace
160
161 // ─── Construction / Destruction ───────────────────────────────────
162
167
169 {
170 if (particleBufferA_) { particleBufferA_->release(); particleBufferA_ = nullptr; }
171 if (particleBufferB_) { particleBufferB_->release(); particleBufferB_ = nullptr; }
172 if (uniformBuffer_) { uniformBuffer_->release(); uniformBuffer_ = nullptr; }
173 if (computePipeline_) { computePipeline_->release(); computePipeline_ = nullptr; }
174 if (fieldSampler_) { fieldSampler_->release(); fieldSampler_ = nullptr; }
175 }
176
177 // ─── Lazy Resource Creation ──────────────────────────────────────
178
179 void MetalParticleComputePass::ensureResources()
180 {
181 if (resourcesReady_) return;
182
183 auto* mtlDevice = device_->raw();
184 if (!mtlDevice) return;
185
186 // ── Compile compute shader ──────────────────────────────────
187 if (!computePipeline_) {
188 NS::Error* error = nullptr;
189 auto* source = NS::String::string(
190 PARTICLE_COMPUTE_SOURCE, NS::UTF8StringEncoding);
191 auto* library = mtlDevice->newLibrary(source, nullptr, &error);
192 if (!library) {
193 spdlog::error("[MetalParticleComputePass] Failed to compile compute shader: {}",
194 error ? error->localizedDescription()->utf8String() : "unknown");
195 return;
196 }
197
198 auto* funcName = NS::String::string("advectParticles", NS::UTF8StringEncoding);
199 auto* function = library->newFunction(funcName);
200 if (!function) {
201 spdlog::error("[MetalParticleComputePass] Entry point 'advectParticles' not found");
202 library->release();
203 return;
204 }
205
206 computePipeline_ = mtlDevice->newComputePipelineState(function, &error);
207 if (!computePipeline_) {
208 spdlog::error("[MetalParticleComputePass] Failed to create pipeline state: {}",
209 error ? error->localizedDescription()->utf8String() : "unknown");
210 }
211
212 function->release();
213 library->release();
214 }
215
216 // ── Uniform buffer (64 bytes) ───────────────────────────────
217 if (!uniformBuffer_) {
218 uniformBuffer_ = mtlDevice->newBuffer(
219 sizeof(ParticleComputeUniforms),
220 MTL::ResourceStorageModeShared);
221 }
222
223 // ── Trilinear sampler for velocity field ────────────────────
224 if (!fieldSampler_) {
225 auto* desc = MTL::SamplerDescriptor::alloc()->init();
226 desc->setMinFilter(MTL::SamplerMinMagFilterLinear);
227 desc->setMagFilter(MTL::SamplerMinMagFilterLinear);
228 desc->setSAddressMode(MTL::SamplerAddressModeClampToEdge);
229 desc->setTAddressMode(MTL::SamplerAddressModeClampToEdge);
230 desc->setRAddressMode(MTL::SamplerAddressModeClampToEdge);
231 fieldSampler_ = mtlDevice->newSamplerState(desc);
232 desc->release();
233 }
234
235 resourcesReady_ = (computePipeline_ && uniformBuffer_ && fieldSampler_);
236 }
237
238 // ─── Buffer Initialization ───────────────────────────────────────
239
241 {
242 auto* mtlDevice = device_->raw();
243 if (!mtlDevice || maxParticles == 0) return;
244
245 // Release old buffers
246 if (particleBufferA_) { particleBufferA_->release(); particleBufferA_ = nullptr; }
247 if (particleBufferB_) { particleBufferB_->release(); particleBufferB_ = nullptr; }
248
249 const size_t bufferSize = static_cast<size_t>(maxParticles) * 48; // sizeof(GPUParticle)
250
251 // Shared storage for CPU upload + GPU compute read/write.
252 // Apple Silicon unified memory makes this zero-copy.
253 particleBufferA_ = mtlDevice->newBuffer(bufferSize, MTL::ResourceStorageModeShared);
254 particleBufferB_ = mtlDevice->newBuffer(bufferSize, MTL::ResourceStorageModeShared);
255
256 if (!particleBufferA_ || !particleBufferB_) {
257 spdlog::error("[MetalParticleComputePass] Failed to allocate particle buffers "
258 "({} particles, {} bytes each)", maxParticles, bufferSize);
259 return;
260 }
261
262 // Zero-fill
263 std::memset(particleBufferA_->contents(), 0, bufferSize);
264 std::memset(particleBufferB_->contents(), 0, bufferSize);
265
266 maxParticles_ = maxParticles;
267 currentBuffer_ = 0;
268 initialized_ = true;
269
270 spdlog::info("[MetalParticleComputePass] Initialized: {} particles, {:.1f} MB per buffer",
271 maxParticles, static_cast<double>(bufferSize) / (1024.0 * 1024.0));
272 }
273
274 // ─── CPU Upload ──────────────────────────────────────────────────
275
276 void MetalParticleComputePass::uploadParticles(const void* data, uint32_t count)
277 {
278 if (!initialized_ || !data || count == 0) return;
279
280 const size_t copySize = static_cast<size_t>(std::min(count, maxParticles_)) * 48;
281 auto* dst = currentBuffer_ == 0 ? particleBufferA_ : particleBufferB_;
282 std::memcpy(dst->contents(), data, copySize);
283 }
284
285 // ─── GPU Advection ───────────────────────────────────────────────
286
288 const ParticleComputeUniforms& uniforms)
289 {
290 if (!initialized_ || !velocityTexture) return;
291
292 ensureResources();
293 if (!resourcesReady_) return;
294
295 // Upload uniforms
296 std::memcpy(uniformBuffer_->contents(), &uniforms, sizeof(ParticleComputeUniforms));
297
298 // Select buffers: read from current, write to current (in-place).
299 // Metal compute has no read/write hazard within a single dispatch
300 // because each thread writes only its own particle.
301 auto* particleBuffer = currentBuffer_ == 0 ? particleBufferA_ : particleBufferB_;
302
303 // ── Encode compute command ──────────────────────────────────
304 auto* commandBuffer = device_->_commandQueue->commandBuffer();
305 if (!commandBuffer) {
306 spdlog::warn("[MetalParticleComputePass] Failed to allocate command buffer");
307 return;
308 }
309
310 auto* encoder = commandBuffer->computeCommandEncoder();
311 if (!encoder) {
312 spdlog::warn("[MetalParticleComputePass] Failed to create compute encoder");
313 return;
314 }
315
316 encoder->pushDebugGroup(
317 NS::String::string("ParticleAdvection", NS::UTF8StringEncoding));
318
319 encoder->setComputePipelineState(computePipeline_);
320
321 // Buffer bindings
322 encoder->setBuffer(particleBuffer, 0, 0); // [[buffer(0)]]
323 encoder->setBuffer(uniformBuffer_, 0, 1); // [[buffer(1)]]
324
325 // Velocity field 3D texture
326 auto* hwTexture = dynamic_cast<gpu::MetalTexture*>(velocityTexture->impl());
327 if (hwTexture && hwTexture->raw()) {
328 encoder->setTexture(hwTexture->raw(), 0); // [[texture(0)]]
329 }
330 encoder->setSamplerState(fieldSampler_, 0); // [[sampler(0)]]
331
332 // Dispatch: one thread per particle
333 const uint32_t threadgroups =
334 (uniforms.particleCount + THREADS_PER_GROUP - 1) / THREADS_PER_GROUP;
335 encoder->dispatchThreadgroups(
336 MTL::Size(threadgroups, 1, 1),
337 MTL::Size(THREADS_PER_GROUP, 1, 1));
338
339 encoder->popDebugGroup();
340 encoder->endEncoding();
341 commandBuffer->commit();
342
343 // We don't swap buffers since we advect in-place.
344 // If double-buffering is needed for overlapping frames,
345 // toggle currentBuffer_ = 1 - currentBuffer_ here.
346 }
347
348 // ─── Buffer Access ───────────────────────────────────────────────
349
351 {
352 return currentBuffer_ == 0 ? particleBufferA_ : particleBufferB_;
353 }
354
355} // namespace visutwin::canvas
uint32_t maxParticles() const
Get the particle count (set during initialize).
void uploadParticles(const void *data, uint32_t count)
void advect(Texture *velocityTexture, const ParticleComputeUniforms &uniforms)
GPU texture resource supporting 2D, cubemap, volume, and array formats with mipmap management.
Definition texture.h:57
gpu::HardwareTexture * impl() const
Definition texture.h:101
Uniform data uploaded to the compute kernel each frame.
uint32_t particleCount
Number of active particles.