14#include "spdlog/spdlog.h"
36 constexpr const char* PARTICLE_COMPUTE_SOURCE = R
"(
37#include <metal_stdlib>
40// Must match GPUParticle in particleSystem3d.h (48 bytes, 16-byte aligned).
42 packed_float3 position; // 12
44 packed_float3 velocity; // 12
45 float lifetime; // 4 → 32
46 packed_float3 seedPosition; // 12
51 packed_float3 domainMin;
53 packed_float3 domainMax;
55 packed_float3 invDomainSize;
63// Convert world position to [0,1] texture coordinates for the velocity field.
64inline float3 worldToUVW(float3 pos, float3 dMin, float3 invSize)
66 return (pos - dMin) * invSize;
69// Sample velocity field at a world-space position.
70// Returns zero if outside domain.
71inline float3 sampleVelocity(float3 worldPos,
72 texture3d<float> field,
74 float3 dMin, float3 dMax, float3 invSize)
76 float3 uvw = worldToUVW(worldPos, dMin, invSize);
77 // Clamp to [0,1] — out-of-bounds positions get boundary velocity
78 uvw = clamp(uvw, float3(0.0), float3(1.0));
79 return field.sample(fieldSampler, uvw).xyz;
82// Simple hash for per-particle pseudo-random lifetime variation.
83inline float hashFloat(uint seed)
85 seed = (seed ^ 61u) ^ (seed >> 16u);
87 seed = seed ^ (seed >> 4u);
89 seed = seed ^ (seed >> 15u);
90 return float(seed) / float(0xFFFFFFFFu);
93kernel void advectParticles(
94 device Particle* particles [[buffer(0)]],
95 constant Uniforms& uniforms [[buffer(1)]],
96 texture3d<float> velocityField [[texture(0)]],
97 sampler fieldSampler [[sampler(0)]],
98 uint gid [[thread_position_in_grid]])
100 if (gid >= uniforms.particleCount) return;
102 Particle p = particles[gid];
104 // Skip dead particles
105 if ((p.flags & 1u) == 0u) return;
107 const float dt = uniforms.dt;
108 const float3 dMin = float3(uniforms.domainMin);
109 const float3 dMax = float3(uniforms.domainMax);
110 const float3 invSize = float3(uniforms.invDomainSize);
112 // ── Age ──────────────────────────────────────────────────────────
115 // ── Respawn if expired ───────────────────────────────────────────
116 if (p.age >= p.lifetime) {
117 p.position = p.seedPosition;
118 p.velocity = packed_float3(0.0);
120 // Vary lifetime using particle index + time as seed
121 float h = hashFloat(gid + as_type<uint>(uniforms.time));
122 float minLife = 2.0; // seconds
124 p.lifetime = minLife + h * (maxLife - minLife);
129 // ── RK4 advection ────────────────────────────────────────────────
130 float3 pos = float3(p.position);
132 float3 k1 = sampleVelocity(pos, velocityField, fieldSampler, dMin, dMax, invSize);
133 float3 k2 = sampleVelocity(pos + 0.5 * dt * k1, velocityField, fieldSampler, dMin, dMax, invSize);
134 float3 k3 = sampleVelocity(pos + 0.5 * dt * k2, velocityField, fieldSampler, dMin, dMax, invSize);
135 float3 k4 = sampleVelocity(pos + dt * k3, velocityField, fieldSampler, dMin, dMax, invSize);
137 float3 newPos = pos + (dt / 6.0) * (k1 + 2.0*k2 + 2.0*k3 + k4);
139 // ── Boundary check ───────────────────────────────────────────────
140 float3 uvw = worldToUVW(newPos, dMin, invSize);
141 if (any(uvw < float3(0.0)) || any(uvw > float3(1.0))) {
143 p.position = p.seedPosition;
144 p.velocity = packed_float3(0.0);
146 float h = hashFloat(gid + as_type<uint>(uniforms.time) + 0x12345678u);
147 p.lifetime = 2.0 + h * 6.0;
149 p.position = packed_float3(newPos);
150 p.velocity = packed_float3(sampleVelocity(newPos, velocityField, fieldSampler, dMin, dMax, invSize));
157 constexpr uint32_t THREADS_PER_GROUP = 256;
170 if (particleBufferA_) { particleBufferA_->release(); particleBufferA_ =
nullptr; }
171 if (particleBufferB_) { particleBufferB_->release(); particleBufferB_ =
nullptr; }
172 if (uniformBuffer_) { uniformBuffer_->release(); uniformBuffer_ =
nullptr; }
173 if (computePipeline_) { computePipeline_->release(); computePipeline_ =
nullptr; }
174 if (fieldSampler_) { fieldSampler_->release(); fieldSampler_ =
nullptr; }
179 void MetalParticleComputePass::ensureResources()
181 if (resourcesReady_)
return;
183 auto* mtlDevice = device_->
raw();
184 if (!mtlDevice)
return;
187 if (!computePipeline_) {
188 NS::Error* error =
nullptr;
189 auto* source = NS::String::string(
190 PARTICLE_COMPUTE_SOURCE, NS::UTF8StringEncoding);
191 auto* library = mtlDevice->newLibrary(source,
nullptr, &error);
193 spdlog::error(
"[MetalParticleComputePass] Failed to compile compute shader: {}",
194 error ? error->localizedDescription()->utf8String() :
"unknown");
198 auto* funcName = NS::String::string(
"advectParticles", NS::UTF8StringEncoding);
199 auto* function = library->newFunction(funcName);
201 spdlog::error(
"[MetalParticleComputePass] Entry point 'advectParticles' not found");
206 computePipeline_ = mtlDevice->newComputePipelineState(function, &error);
207 if (!computePipeline_) {
208 spdlog::error(
"[MetalParticleComputePass] Failed to create pipeline state: {}",
209 error ? error->localizedDescription()->utf8String() :
"unknown");
217 if (!uniformBuffer_) {
218 uniformBuffer_ = mtlDevice->newBuffer(
219 sizeof(ParticleComputeUniforms),
220 MTL::ResourceStorageModeShared);
224 if (!fieldSampler_) {
225 auto* desc = MTL::SamplerDescriptor::alloc()->init();
226 desc->setMinFilter(MTL::SamplerMinMagFilterLinear);
227 desc->setMagFilter(MTL::SamplerMinMagFilterLinear);
228 desc->setSAddressMode(MTL::SamplerAddressModeClampToEdge);
229 desc->setTAddressMode(MTL::SamplerAddressModeClampToEdge);
230 desc->setRAddressMode(MTL::SamplerAddressModeClampToEdge);
231 fieldSampler_ = mtlDevice->newSamplerState(desc);
235 resourcesReady_ = (computePipeline_ && uniformBuffer_ && fieldSampler_);
242 auto* mtlDevice = device_->raw();
246 if (particleBufferA_) { particleBufferA_->release(); particleBufferA_ =
nullptr; }
247 if (particleBufferB_) { particleBufferB_->release(); particleBufferB_ =
nullptr; }
249 const size_t bufferSize =
static_cast<size_t>(
maxParticles) * 48;
253 particleBufferA_ = mtlDevice->newBuffer(bufferSize, MTL::ResourceStorageModeShared);
254 particleBufferB_ = mtlDevice->newBuffer(bufferSize, MTL::ResourceStorageModeShared);
256 if (!particleBufferA_ || !particleBufferB_) {
257 spdlog::error(
"[MetalParticleComputePass] Failed to allocate particle buffers "
258 "({} particles, {} bytes each)",
maxParticles, bufferSize);
263 std::memset(particleBufferA_->contents(), 0, bufferSize);
264 std::memset(particleBufferB_->contents(), 0, bufferSize);
270 spdlog::info(
"[MetalParticleComputePass] Initialized: {} particles, {:.1f} MB per buffer",
271 maxParticles,
static_cast<double>(bufferSize) / (1024.0 * 1024.0));
278 if (!initialized_ || !data || count == 0)
return;
280 const size_t copySize =
static_cast<size_t>(std::min(count, maxParticles_)) * 48;
281 auto* dst = currentBuffer_ == 0 ? particleBufferA_ : particleBufferB_;
282 std::memcpy(dst->contents(), data, copySize);
290 if (!initialized_ || !velocityTexture)
return;
293 if (!resourcesReady_)
return;
301 auto* particleBuffer = currentBuffer_ == 0 ? particleBufferA_ : particleBufferB_;
304 auto* commandBuffer = device_->_commandQueue->commandBuffer();
305 if (!commandBuffer) {
306 spdlog::warn(
"[MetalParticleComputePass] Failed to allocate command buffer");
310 auto* encoder = commandBuffer->computeCommandEncoder();
312 spdlog::warn(
"[MetalParticleComputePass] Failed to create compute encoder");
316 encoder->pushDebugGroup(
317 NS::String::string(
"ParticleAdvection", NS::UTF8StringEncoding));
319 encoder->setComputePipelineState(computePipeline_);
322 encoder->setBuffer(particleBuffer, 0, 0);
323 encoder->setBuffer(uniformBuffer_, 0, 1);
327 if (hwTexture && hwTexture->raw()) {
328 encoder->setTexture(hwTexture->raw(), 0);
330 encoder->setSamplerState(fieldSampler_, 0);
333 const uint32_t threadgroups =
334 (uniforms.
particleCount + THREADS_PER_GROUP - 1) / THREADS_PER_GROUP;
335 encoder->dispatchThreadgroups(
336 MTL::Size(threadgroups, 1, 1),
337 MTL::Size(THREADS_PER_GROUP, 1, 1));
339 encoder->popDebugGroup();
340 encoder->endEncoding();
341 commandBuffer->commit();
352 return currentBuffer_ == 0 ? particleBufferA_ : particleBufferB_;
uint32_t maxParticles() const
Get the particle count (set during initialize).
MTL::Buffer * currentParticleBuffer() const
void uploadParticles(const void *data, uint32_t count)
MetalParticleComputePass(MetalGraphicsDevice *device)
void advect(Texture *velocityTexture, const ParticleComputeUniforms &uniforms)
void initialize(uint32_t maxParticles)
~MetalParticleComputePass()
GPU texture resource supporting 2D, cubemap, volume, and array formats with mipmap management.
gpu::HardwareTexture * impl() const
Uniform data uploaded to the compute kernel each frame.
uint32_t particleCount
Number of active particles.