134 if (cullPipeline_) { cullPipeline_->release(); cullPipeline_ =
nullptr; }
135 if (writeArgsPipeline_) { writeArgsPipeline_->release(); writeArgsPipeline_ =
nullptr; }
136 if (compactedBuffer_) { compactedBuffer_->release(); compactedBuffer_ =
nullptr; }
137 if (indirectArgsBuffer_){ indirectArgsBuffer_->release(); indirectArgsBuffer_ =
nullptr; }
138 if (counterBuffer_) { counterBuffer_->release(); counterBuffer_ =
nullptr; }
139 if (uniformBuffer_) { uniformBuffer_->release(); uniformBuffer_ =
nullptr; }
164 auto* funcName = NS::String::string(
"instanceCull", NS::UTF8StringEncoding);
183 auto* funcName = NS::String::string(
"writeIndirectArgs", NS::UTF8StringEncoding);
205 counterBuffer_ = mtlDevice->newBuffer(
sizeof(uint32_t), MTL::ResourceStorageModeShared);
214 uniformBuffer_ = mtlDevice->newBuffer(
sizeof(InstanceCullParams), MTL::ResourceStorageModeShared);
243 if (maxInstances <= maxInstances_ && compactedBuffer_)
return;
245 auto* mtlDevice = device_->raw();
246 if (!mtlDevice)
return;
248 if (compactedBuffer_) {
249 compactedBuffer_->release();
250 compactedBuffer_ =
nullptr;
253 const size_t bufferSize =
static_cast<size_t>(maxInstances) * INSTANCE_DATA_SIZE;
254 compactedBuffer_ = mtlDevice->newBuffer(bufferSize, MTL::ResourceStorageModeShared);
255 if (!compactedBuffer_) {
256 spdlog::error(
"[MetalInstanceCullPass] Failed to allocate compacted buffer ({} instances, {:.1f} KB)",
257 maxInstances,
static_cast<double>(bufferSize) / 1024.0);
262 maxInstances_ = maxInstances;
263 spdlog::debug(
"[MetalInstanceCullPass] Reserved compacted buffer for {} instances ({:.1f} KB)",
264 maxInstances,
static_cast<double>(bufferSize) / 1024.0);
274 if (!resourcesReady_)
return;
278 if (!compactedBuffer_)
return;
285 std::memcpy(counterBuffer_->contents(), &zero,
sizeof(uint32_t));
290 auto* commandBuffer = device_->_commandQueue->commandBuffer();
291 if (!commandBuffer) {
292 spdlog::warn(
"[MetalInstanceCullPass] Failed to create command buffer");
298 auto* encoder = commandBuffer->computeCommandEncoder();
300 spdlog::warn(
"[MetalInstanceCullPass] Failed to create compute encoder for cull");
304 encoder->pushDebugGroup(
305 NS::String::string(
"InstanceCull", NS::UTF8StringEncoding));
307 encoder->setComputePipelineState(cullPipeline_);
308 encoder->setBuffer(uniformBuffer_, 0, 0);
309 encoder->setBuffer(inputBuffer, 0, 1);
310 encoder->setBuffer(compactedBuffer_, 0, 2);
311 encoder->setBuffer(counterBuffer_, 0, 3);
313 const uint32_t threadgroups = (params.
instanceCount + THREADGROUP_SIZE - 1) / THREADGROUP_SIZE;
314 encoder->dispatchThreadgroups(
315 MTL::Size(threadgroups, 1, 1),
316 MTL::Size(THREADGROUP_SIZE, 1, 1));
318 encoder->popDebugGroup();
319 encoder->endEncoding();
324 auto* encoder = commandBuffer->computeCommandEncoder();
326 spdlog::warn(
"[MetalInstanceCullPass] Failed to create compute encoder for writeArgs");
330 encoder->pushDebugGroup(
331 NS::String::string(
"WriteIndirectArgs", NS::UTF8StringEncoding));
333 encoder->setComputePipelineState(writeArgsPipeline_);
334 encoder->setBuffer(uniformBuffer_, 0, 0);
335 encoder->setBuffer(counterBuffer_, 0, 3);
336 encoder->setBuffer(indirectArgsBuffer_, 0, 4);
338 encoder->dispatchThreadgroups(
342 encoder->popDebugGroup();
343 encoder->endEncoding();
348 commandBuffer->commit();
349 commandBuffer->waitUntilCompleted();
355 const float* m,
float outPlanes[6][4])
367 outPlanes[0][0] = m[3] + m[0];
368 outPlanes[0][1] = m[7] + m[4];
369 outPlanes[0][2] = m[11] + m[8];
370 outPlanes[0][3] = m[15] + m[12];
373 outPlanes[1][0] = m[3] - m[0];
374 outPlanes[1][1] = m[7] - m[4];
375 outPlanes[1][2] = m[11] - m[8];
376 outPlanes[1][3] = m[15] - m[12];
379 outPlanes[2][0] = m[3] + m[1];
380 outPlanes[2][1] = m[7] + m[5];
381 outPlanes[2][2] = m[11] + m[9];
382 outPlanes[2][3] = m[15] + m[13];
385 outPlanes[3][0] = m[3] - m[1];
386 outPlanes[3][1] = m[7] - m[5];
387 outPlanes[3][2] = m[11] - m[9];
388 outPlanes[3][3] = m[15] - m[13];
391 outPlanes[4][0] = m[3] + m[2];
392 outPlanes[4][1] = m[7] + m[6];
393 outPlanes[4][2] = m[11] + m[10];
394 outPlanes[4][3] = m[15] + m[14];
397 outPlanes[5][0] = m[3] - m[2];
398 outPlanes[5][1] = m[7] - m[6];
399 outPlanes[5][2] = m[11] - m[10];
400 outPlanes[5][3] = m[15] - m[14];
403 for (
int i = 0; i < 6; ++i) {
404 const float len = std::sqrt(
405 outPlanes[i][0] * outPlanes[i][0] +
406 outPlanes[i][1] * outPlanes[i][1] +
407 outPlanes[i][2] * outPlanes[i][2]);
409 const float invLen = 1.0f / len;
410 outPlanes[i][0] *= invLen;
411 outPlanes[i][1] *= invLen;
412 outPlanes[i][2] *= invLen;
413 outPlanes[i][3] *= invLen;