VisuTwin Canvas
C++ 3D Engine — Metal Backend
Loading...
Searching...
No Matches
renderer.cpp
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2025-2026 Arnis Lektauers
3//
4// Created by Arnis Lektauers on 11.09.2025.
5//
6#include "renderer.h"
7
8#include <algorithm>
9#include <array>
10#include <chrono>
11#include <cmath>
12#include <cstring>
13#include <numbers>
14
15#include "core/objectPool.h"
16#include "core/math/color.h"
17#include "core/math/matrix4.h"
18#include "core/math/vector3.h"
19#include "framework/entity.h"
23#include "scene/frustumUtils.h"
24#include "scene/light.h"
26#include "scene/graphNode.h"
30#include "lightingValidation.h"
32#include "spdlog/spdlog.h"
33
34namespace visutwin::canvas
35{
36 namespace
37 {
38 constexpr std::array<std::array<float, 2>, 16> haltonSequence = {{
39 {0.5f, 0.333333f},
40 {0.25f, 0.666667f},
41 {0.75f, 0.111111f},
42 {0.125f, 0.444444f},
43 {0.625f, 0.777778f},
44 {0.375f, 0.222222f},
45 {0.875f, 0.555556f},
46 {0.0625f, 0.888889f},
47 {0.5625f, 0.037037f},
48 {0.3125f, 0.370370f},
49 {0.8125f, 0.703704f},
50 {0.1875f, 0.148148f},
51 {0.6875f, 0.481481f},
52 {0.4375f, 0.814815f},
53 {0.9375f, 0.259259f},
54 {0.03125f, 0.592593f}
55 }};
56
57 struct ForwardDrawEntry
58 {
59 MeshInstance* meshInstance = nullptr;
60 Material* material = nullptr;
61 // Raw pointers — buffers are kept alive by the Mesh objects (which are kept alive
62 // by MeshInstances in the same draw entry). Using raw pointers avoids atomic
63 // refcount increments/decrements per draw entry (~200-2000 ops/frame savings).
64 std::shared_ptr<VertexBuffer> vertexBuffer;
65 std::shared_ptr<IndexBuffer> indexBuffer;
66 Primitive primitive;
67 uint64_t sortKey = 0;
68 float distanceToCameraSq = 0.0f;
69 };
70
71 struct LightDispatchEntry
72 {
73 GpuLightData light;
74 uint32_t mask = MASK_AFFECT_DYNAMIC;
75 };
76
77 uint64_t makeOpaqueSortKey(const MeshInstance* meshInstance)
78 {
79 //uses material / shader variants in opaque sort keys.
80 const auto material = meshInstance ? meshInstance->material() : nullptr;
81 const auto materialKey = material ? material->sortKey()
82 : static_cast<uint64_t>(reinterpret_cast<uintptr_t>(material)) >> 4;
83 const auto meshKey = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(meshInstance ? meshInstance->mesh() : nullptr)) >> 4;
84 return (materialKey << 32) ^ (meshKey & 0xffffffffu);
85 }
86
87 // Material's base cull mode — reads the parameter map (unordered_map lookup).
88 // This is the expensive part that should be cached per material.
89 CullMode resolveMaterialCullMode(const Material* material)
90 {
91 auto readIntParameter = [](const Material::ParameterValue* value, int& out) -> bool {
92 if (!value) {
93 return false;
94 }
95 if (const auto* v = std::get_if<int32_t>(value)) {
96 out = static_cast<int>(*v);
97 return true;
98 }
99 if (const auto* v = std::get_if<uint32_t>(value)) {
100 out = static_cast<int>(*v);
101 return true;
102 }
103 if (const auto* v = std::get_if<float>(value)) {
104 out = static_cast<int>(*v);
105 return true;
106 }
107 if (const auto* v = std::get_if<bool>(value)) {
108 out = *v ? 1 : 0;
109 return true;
110 }
111 return false;
112 };
113
114 CullMode mode = material ? material->cullMode() : CullMode::CULLFACE_BACK;
115 if (material) {
116 int cullModeValue = static_cast<int>(mode);
117 const auto* cullModeParam = material->parameter("material_cullMode");
118 if (!cullModeParam) {
119 cullModeParam = material->parameter("cullMode");
120 }
121 if (readIntParameter(cullModeParam, cullModeValue)) {
122 if (cullModeValue >= static_cast<int>(CullMode::CULLFACE_NONE) &&
123 cullModeValue <= static_cast<int>(CullMode::CULLFACE_FRONTANDBACK)) {
124 mode = static_cast<CullMode>(cullModeValue);
125 }
126 }
127 }
128 return mode;
129 }
130
131 // Node-scale flip — trivial per-draw float check, not worth caching.
132 CullMode applyNodeScaleFlip(const CullMode mode, GraphNode* node)
133 {
134 if ((mode == CullMode::CULLFACE_BACK || mode == CullMode::CULLFACE_FRONT) && node) {
135 if (node->worldScaleSign() < 0.0f) {
137 }
138 }
139 return mode;
140 }
141
142 // Combined convenience wrapper (used where caching is not needed).
143 CullMode resolveCullMode(const Material* material, GraphNode* node)
144 {
145 return applyNodeScaleFlip(resolveMaterialCullMode(material), node);
146 }
147 }
148
149 Renderer::Renderer(const std::shared_ptr<GraphicsDevice>& device, const std::shared_ptr<Scene>& scene) : _device(device), _scene(scene)
150 {
151 // DEVIATION: startup self-test guards recent attenuation/falloff regressions in this port.
153
154 _lightTextureAtlas = std::make_unique<LightTextureAtlas>(device);
155
156 _shadowRenderer = std::make_unique<ShadowRenderer>(this, _lightTextureAtlas.get());
157
158 _shadowRendererLocal = std::make_unique<ShadowRendererLocal>(this, _shadowRenderer.get());
159 _shadowRendererDirectional = std::make_unique<ShadowRendererDirectional>(device, this, _shadowRenderer.get());
160
161 if (scene->clusteredLightingEnabled())
162 {
163 _renderPassUpdateClustered = std::make_unique<RenderPassUpdateClustered>(
164 device, this, _shadowRenderer.get(), _shadowRendererLocal.get(), _lightTextureAtlas.get()
165 );
166 }
167
168 }
169
171 {
173
174 if (!camera || !_shadowRendererDirectional) {
175 return;
176 }
177
178 std::vector<Light*> dirShadowLights;
179
180 for (auto* lightComponent : LightComponent::instances()) {
181 if (!lightComponent || !lightComponent->enabled()) {
182 continue;
183 }
184 if (lightComponent->type() != LightType::LIGHTTYPE_DIRECTIONAL || !lightComponent->castShadows()) {
185 continue;
186 }
187
188 Light* sceneLight = lightComponent->light();
189 if (!sceneLight) {
190 continue;
191 }
192
193 // Allocate shadow map if not yet created.
194 if (!sceneLight->shadowMap()) {
195 auto shadowMap = ShadowMap::create(_device.get(), sceneLight);
196 if (shadowMap) {
197 // Store the owned ShadowMap on the Renderer (keep alive), set raw pointer on Light.
198 sceneLight->setShadowMap(shadowMap.get());
199 _ownedShadowMaps.push_back(std::move(shadowMap));
200 }
201 }
202
203 if (!sceneLight->shadowMap()) {
204 continue;
205 }
206
207 // Set up the shadow camera (position, projection, snap).
208 _shadowRendererDirectional->cull(sceneLight, camera);
209
210 dirShadowLights.push_back(sceneLight);
211 }
212
213 if (!dirShadowLights.empty()) {
214 _cameraDirShadowLights[camera] = std::move(dirShadowLights);
215 }
216 }
217
218 void Renderer::renderForwardLayer(Camera* camera, RenderTarget* renderTarget, Layer* layer, bool transparent)
219 {
220 if (!camera || !layer || !_device) {
221 return;
222 }
223
224 const auto sortStart = std::chrono::high_resolution_clock::now();
225
226 auto programLibrary = getProgramLibrary(_device);
227 if (!programLibrary) {
228 spdlog::error("ProgramLibrary is not initialized. Forward rendering requires ProgramLibrary.");
229 return;
230 }
231
232 // tell ProgramLibrary whether a skybox cubemap is available
233 // so skybox shaders compile with VT_FEATURE_SKY_CUBEMAP.
234 programLibrary->setSkyCubemapAvailable(_scene && _scene->skybox() != nullptr);
235
236 // when camera is in depth pass mode, compile shaders
237 // with VT_FEATURE_PLANAR_REFLECTION_DEPTH_PASS to override fragment output
238 // with distance-from-reflection-plane (setShaderPass).
239 programLibrary->setPlanarReflectionDepthPass(camera && camera->planarReflectionDepthPass());
240
241 // Tell ProgramLibrary whether any local light casts shadows AND has
242 // an allocated shadow map. Only enable VT_FEATURE_LOCAL_SHADOWS / VT_FEATURE_OMNI_SHADOWS
243 // when actual shadow textures exist, so the shader doesn't compile with
244 // depth2d / depthcube parameters that would be nil at runtime.
245 {
246 bool hasLocalShadows = false;
247 bool hasOmniShadows = false;
248 for (const auto* lc : LightComponent::instances()) {
249 if (lc && lc->enabled() && lc->castShadows() &&
250 lc->type() != LightType::LIGHTTYPE_DIRECTIONAL) {
251 Light* sceneLight = lc->light();
252 if (sceneLight && sceneLight->shadowMap()) {
253 if (lc->type() == LightType::LIGHTTYPE_OMNI) {
254 hasOmniShadows = true;
255 } else {
256 hasLocalShadows = true;
257 }
258 }
259 }
260 }
261 programLibrary->setLocalShadowsEnabled(hasLocalShadows);
262 programLibrary->setOmniShadowsEnabled(hasOmniShadows);
263 }
264
265 // Area lights: enable when any LightComponent has LIGHTTYPE_AREA_RECT.
266 {
267 bool hasAreaLights = false;
268 for (const auto* lc : LightComponent::instances()) {
269 if (lc && lc->enabled() && lc->type() == LightType::LIGHTTYPE_AREA_RECT) {
270 hasAreaLights = true;
271 break;
272 }
273 }
274 programLibrary->setAreaLightsEnabled(hasAreaLights);
275 }
276
277 // Clustered lighting: when enabled on the scene, compile forward shaders
278 // with VT_FEATURE_LIGHT_CLUSTERING so the fragment shader samples the cluster grid.
279 const bool clusteredEnabled = _scene && _scene->clusteredLightingEnabled();
280 programLibrary->setClusteredLightingEnabled(clusteredEnabled);
281
282 // SSAO per-material: when the device has a forward SSAO texture, compile
283 // forward shaders with VT_FEATURE_SSAO so the fragment shader modulates
284 // ambient occlusion by sampling the SSAO texture at screen-space UV.
285 programLibrary->setSsaoEnabled(_device->ssaoForwardTexture() != nullptr);
286
287 // Atmosphere scattering: when enabled on the scene, compile skybox shaders
288 // with VT_FEATURE_ATMOSPHERE and push atmosphere uniforms to the device.
289 const bool atmosphereEnabled = _scene && _scene->atmosphereEnabled();
290 programLibrary->setAtmosphereEnabled(atmosphereEnabled);
291 _device->setAtmosphereEnabled(atmosphereEnabled);
292 if (atmosphereEnabled) {
293 _device->setAtmosphereUniforms(_scene->atmosphereUniformData(), _scene->atmosphereUniformSize());
294 }
295
296 // Lazily create WorldClusters when clustering is first enabled.
297 if (clusteredEnabled && !_worldClusters) {
298 _worldClusters = std::make_unique<WorldClusters>();
299 }
300
301 const auto defaultMaterial = getDefaultMaterial(_device);
302
303 auto* cameraNode = camera->node().get();
304 const auto cameraPosition = cameraNode ? cameraNode->position() : Vector3{};
305 const auto viewMatrix = cameraNode ? cameraNode->worldTransform().inverse() : Matrix4::identity();
306 const auto activeTarget = renderTarget ? renderTarget : camera->renderTarget().get();
307 const int targetWidth = std::max(activeTarget ? activeTarget->width() : _device->size().first, 1);
308 const int targetHeight = std::max(activeTarget ? activeTarget->height() : _device->size().second, 1);
309
310 const auto clamp01 = [](const float v) {
311 return std::clamp(v, 0.0f, 1.0f);
312 };
313 const Vector4 rect = camera->rect();
314 const float rectXNorm = clamp01(rect.getX());
315 const float rectYNorm = clamp01(rect.getY());
316 const float rectWNorm = clamp01(rect.getZ());
317 const float rectHNorm = clamp01(rect.getW());
318 const float rectTopNorm = clamp01(rectYNorm + rectHNorm);
319
320 int viewportX = static_cast<int>(rectXNorm * static_cast<float>(targetWidth));
321 // Upstream rect origin is bottom-left. Metal viewport/scissor origin is top-left.
322 int viewportY = targetHeight - static_cast<int>(rectTopNorm * static_cast<float>(targetHeight));
323 viewportX = std::clamp(viewportX, 0, std::max(targetWidth - 1, 0));
324 viewportY = std::clamp(viewportY, 0, std::max(targetHeight - 1, 0));
325 int viewportW = std::max(1, static_cast<int>(rectWNorm * static_cast<float>(targetWidth)));
326 int viewportH = std::max(1, static_cast<int>(rectHNorm * static_cast<float>(targetHeight)));
327 viewportW = std::clamp(viewportW, 1, targetWidth - viewportX);
328 viewportH = std::clamp(viewportH, 1, targetHeight - viewportY);
329
330 const Vector4 scissorRect = camera->scissorRect();
331 const float sxNorm = clamp01(scissorRect.getX());
332 const float syNorm = clamp01(scissorRect.getY());
333 const float swNorm = clamp01(scissorRect.getZ());
334 const float shNorm = clamp01(scissorRect.getW());
335 const float sTopNorm = clamp01(syNorm + shNorm);
336 int scissorX = static_cast<int>(sxNorm * static_cast<float>(targetWidth));
337 int scissorY = targetHeight - static_cast<int>(sTopNorm * static_cast<float>(targetHeight));
338 scissorX = std::clamp(scissorX, 0, std::max(targetWidth - 1, 0));
339 scissorY = std::clamp(scissorY, 0, std::max(targetHeight - 1, 0));
340 int scissorW = std::max(1, static_cast<int>(swNorm * static_cast<float>(targetWidth)));
341 int scissorH = std::max(1, static_cast<int>(shNorm * static_cast<float>(targetHeight)));
342 scissorW = std::clamp(scissorW, 1, targetWidth - scissorX);
343 scissorH = std::clamp(scissorH, 1, targetHeight - scissorY);
344
345 // ASPECT_AUTO cameras use viewport size, not full target size.
347 camera->setAspectRatio(static_cast<float>(viewportW) / static_cast<float>(viewportH));
348 }
349
350 const CameraComponent* cameraComponent = nullptr;
351 for (const auto* candidate : CameraComponent::instances()) {
352 if (candidate && candidate->camera() == camera) {
353 cameraComponent = candidate;
354 break;
355 }
356 }
357
358 Matrix4 projMatrix = camera->projectionMatrix();
359 float jitterX = 0.0f;
360 float jitterY = 0.0f;
361 const float jitter = std::max(camera->jitter(), 0.0f);
362 if (jitter > 0.0f) {
363 const auto& offset = haltonSequence[static_cast<size_t>(_device->renderVersion() % haltonSequence.size())];
364 jitterX = jitter * (offset[0] * 2.0f - 1.0f) / static_cast<float>(viewportW);
365 jitterY = jitter * (offset[1] * 2.0f - 1.0f) / static_cast<float>(viewportH);
366 projMatrix.setElement(2, 0, jitterX);
367 projMatrix.setElement(2, 1, jitterY);
368 }
369 const auto viewProjection = projMatrix * viewMatrix;
370 camera->storeShaderMatrices(viewProjection, jitterX, jitterY, _device->renderVersion());
371
372 // apply per-camera rect on active render target.
373 const float oldVx = _device->vx();
374 const float oldVy = _device->vy();
375 const float oldVw = _device->vw();
376 const float oldVh = _device->vh();
377 const int oldSx = _device->sx();
378 const int oldSy = _device->sy();
379 const int oldSw = _device->sw();
380 const int oldSh = _device->sh();
381
382 _device->setViewport(
383 static_cast<float>(viewportX),
384 static_cast<float>(viewportY),
385 static_cast<float>(viewportW),
386 static_cast<float>(viewportH)
387 );
388 _device->setScissor(scissorX, scissorY, scissorW, scissorH);
389 // DEVIATION: pooled frame-local query objects reduce allocator churn in this native port.
390 static thread_local ObjectPool<ForwardDrawEntry> drawEntryPool(256);
391 drawEntryPool.freeAll();
392 std::vector<ForwardDrawEntry*> drawEntries;
393 drawEntries.reserve(256);
394
395 const auto appendMeshInstance = [&](MeshInstance* meshInstance) {
396 if (!meshInstance || !meshInstance->visible()) {
397 return;
398 }
399
400 auto* mesh = meshInstance->mesh();
401 if (!mesh) {
402 return;
403 }
404
405 auto vertexBuffer = mesh->getVertexBuffer();
406 if (!vertexBuffer) {
407 return;
408 }
409
410 auto* entry = drawEntryPool.allocate();
411 entry->meshInstance = meshInstance;
412 entry->material = meshInstance->material() ? meshInstance->material() : defaultMaterial.get();
413 if (!entry->material) {
414 return;
415 }
416 if (entry->material->transparent() != transparent) {
417 return;
418 }
419
420 const bool isSkyboxMaterial = entry->material->isSkybox();
421 const auto worldBounds = meshInstance->aabb();
422 if (!isSkyboxMaterial && !isVisibleInCameraFrustum(camera, cameraNode, worldBounds)) {
423 _numDrawCallsCulled++;
424 return;
425 }
426
427 entry->vertexBuffer = vertexBuffer;
428 entry->indexBuffer = mesh->getIndexBuffer();
429 entry->primitive = mesh->getPrimitive();
430 entry->sortKey = makeOpaqueSortKey(meshInstance);
431
432 auto* node = meshInstance->node();
433 if (node && !isSkyboxMaterial) {
434 const auto delta = worldBounds.center() - cameraPosition;
435 entry->distanceToCameraSq = delta.lengthSquared();
436 } else {
437 entry->distanceToCameraSq = 0.0f;
438 }
439
440 drawEntries.push_back(entry);
441 };
442
443 for (auto* renderComponent : RenderComponent::instances()) {
444 if (!renderComponent || !renderComponent->enabled()) {
445 continue;
446 }
447
448 // Also check that the owning entity (and its entire hierarchy) is enabled.
449 // Component::enabled() only returns the component's own flag — it does not
450 // reflect the parent entity's setEnabled(false) state.
451 if (renderComponent->entity() && !renderComponent->entity()->enabled()) {
452 continue;
453 }
454
455 const auto& componentLayers = renderComponent->layers();
456 if (std::find(componentLayers.begin(), componentLayers.end(), layer->id()) == componentLayers.end()) {
457 continue;
458 }
459
460 for (auto* meshInstance : renderComponent->meshInstances()) {
461 appendMeshInstance(meshInstance);
462 }
463 }
464
465 for (auto* meshInstance : layer->meshInstances()) {
466 appendMeshInstance(meshInstance);
467 }
468
469 if (transparent) {
470 // transparent sublayer is sorted back-to-front.
471 std::stable_sort(drawEntries.begin(), drawEntries.end(),
472 [](const ForwardDrawEntry* a, const ForwardDrawEntry* b) {
473 if (a->distanceToCameraSq == b->distanceToCameraSq) {
474 return a->sortKey < b->sortKey;
475 }
476 return a->distanceToCameraSq > b->distanceToCameraSq;
477 });
478 } else {
479 // opaque sublayer prioritizes material/mesh sort, then front-to-back.
480 std::stable_sort(drawEntries.begin(), drawEntries.end(),
481 [](const ForwardDrawEntry* a, const ForwardDrawEntry* b) {
482 if (a->sortKey != b->sortKey) {
483 return a->sortKey < b->sortKey;
484 }
485 return a->distanceToCameraSq < b->distanceToCameraSq;
486 });
487 }
488
489 const auto sortEnd = std::chrono::high_resolution_clock::now();
490 _sortTime += static_cast<int>(std::chrono::duration_cast<std::chrono::milliseconds>(sortEnd - sortStart).count());
491
492 // Intentional temporary deviation from JS:
493 // this path now binds core material uniforms/textures (including Material::setParameter overrides)
494 // and forward shader variants, while full parameter scope is still being ported.
495 const auto ambientColor = _scene ? _scene->ambientLight() : Color(0.0f, 0.0f, 0.0f, 1.0f);
496 const auto fogParams = _scene ? _scene->fog() : FogParams{};
497 std::vector<LightDispatchEntry> directionalLights;
498 std::vector<LightDispatchEntry> localLights;
499 directionalLights.reserve(4);
500 localLights.reserve(8);
501 ShadowParams shadowParams{};
502
503 auto toRadians = [](const float degrees) {
504 return degrees * (std::numbers::pi_v<float> / 180.0f);
505 };
506
507 auto makeGpuLight = [&](const LightComponent* lightComponent, GpuLightData& lightData) {
508 if (!lightComponent) {
509 return;
510 }
511
512 switch (lightComponent->type()) {
513 case LightType::LIGHTTYPE_DIRECTIONAL:
514 lightData.type = GpuLightType::Directional;
515 break;
516 case LightType::LIGHTTYPE_SPOT:
517 lightData.type = GpuLightType::Spot;
518 break;
519 case LightType::LIGHTTYPE_AREA_RECT:
520 lightData.type = GpuLightType::AreaRect;
521 lightData.areaHalfWidth = lightComponent->areaWidth() * 0.5f;
522 lightData.areaHalfHeight = lightComponent->areaHeight() * 0.5f;
523 {
524 // Right vector from entity's world transform X axis.
525 const auto& wt = lightComponent->entity()->worldTransform();
526 Vector3 right(wt.getElement(0, 0), wt.getElement(1, 0), wt.getElement(2, 0));
527 if (right.lengthSquared() > 1e-8f) {
528 lightData.areaRight = right.normalized();
529 }
530 }
531 break;
532 case LightType::LIGHTTYPE_OMNI:
533 case LightType::LIGHTTYPE_POINT:
534 default:
535 lightData.type = GpuLightType::Point;
536 break;
537 }
538
539 lightData.position = lightComponent->position();
540 lightData.direction = lightComponent->direction();
541 if (lightData.direction.lengthSquared() > 1e-8f) {
542 lightData.direction = lightData.direction.normalized();
543 } else {
544 lightData.direction = Vector3(0.0f, -1.0f, 0.0f);
545 }
546 lightData.color = lightComponent->color();
547 lightData.intensity = std::max(lightComponent->intensity(), 0.0f);
548 lightData.range = std::max(lightComponent->range(), 1e-4f);
549 lightData.innerConeCos = std::cos(toRadians(std::max(lightComponent->innerConeAngle(), 0.0f) * 0.5f));
550 lightData.outerConeCos = std::cos(toRadians(std::max(lightComponent->outerConeAngle(), 0.0f) * 0.5f));
551 if (lightData.innerConeCos < lightData.outerConeCos) {
552 lightData.innerConeCos = lightData.outerConeCos;
553 }
554 lightData.falloffModeLinear = lightComponent->falloffMode() == LightFalloff::LIGHTFALLOFF_LINEAR;
555 lightData.castShadows = lightComponent->castShadows();
556 };
557
558 for (const auto* lightComponent : LightComponent::instances()) {
559 if (!lightComponent || !lightComponent->enabled()) {
560 continue;
561 }
562 if (layer && !lightComponent->rendersLayer(layer->id())) {
563 continue;
564 }
565 if (cameraComponent && layer && !cameraComponent->rendersLayer(layer->id())) {
566 continue;
567 }
568
569 GpuLightData lightData{};
570 makeGpuLight(lightComponent, lightData);
571 if (lightData.intensity <= 0.0f) {
572 continue;
573 }
574
575 if (lightData.castShadows && !shadowParams.enabled &&
576 lightData.type == GpuLightType::Directional) {
577 shadowParams.enabled = true;
578 shadowParams.normalBias = lightComponent->shadowNormalBias();
579 shadowParams.strength = lightComponent->shadowStrength();
580
581 // Wire actual shadow map and cascade data from scene Light object.
582 Light* sceneLight = lightComponent->light();
583 if (sceneLight && sceneLight->shadowMap()) {
584 shadowParams.shadowMap = sceneLight->shadowMap()->shadowTexture();
585
586 // CSM: copy the full matrix palette and cascade distances.
587 shadowParams.numCascades = sceneLight->numCascades();
588 shadowParams.cascadeBlend = sceneLight->cascadeBlend();
589 std::memcpy(shadowParams.shadowMatrixPalette,
590 sceneLight->shadowMatrixPalette().data(),
591 sizeof(shadowParams.shadowMatrixPalette));
592 std::memcpy(shadowParams.shadowCascadeDistances,
593 sceneLight->shadowCascadeDistances().data(),
594 sizeof(shadowParams.shadowCascadeDistances));
595
596 // Keep single VP matrix for cascade 0 (backward compat).
597 LightRenderData* rd = sceneLight->getRenderData(camera, 0);
598 if (rd && rd->shadowCamera && rd->shadowCamera->node()) {
599 shadowParams.viewProjection = rd->shadowCamera->projectionMatrix()
600 * rd->shadowCamera->node()->worldTransform().inverse();
601
602 // fixed small shader-side depth bias for directional
603 // shadow sampling. The real bias work is done by hardware polygon offset
604 // (depthBias) during the shadow render pass, which is slope-aware.
605 //"saturate(z) - 0.0001".
606 shadowParams.bias = 0.0001f;
607 }
608 }
609 }
610
611 // Wire local light shadow data (spot/point).
612 // Assign shadow map index and populate ShadowParams.localShadows.
613 // Omni lights use cubemap depth textures; spot lights use 2D textures.
614 if (lightData.castShadows && lightData.type != GpuLightType::Directional) {
615 Light* sceneLight = lightComponent->light();
616 if (sceneLight && sceneLight->shadowMap() &&
617 shadowParams.localShadowCount < ShadowParams::kMaxLocalShadows) {
618
619 const int shadowIdx = shadowParams.localShadowCount;
620 lightData.shadowMapIndex = shadowIdx;
621
622 const bool isOmni = (sceneLight->type() == LightType::LIGHTTYPE_OMNI);
623 auto& ls = shadowParams.localShadows[shadowIdx];
624 ls.shadowMap = sceneLight->shadowMap()->shadowTexture();
625 ls.isOmni = isOmni;
626
627 if (isOmni) {
628 // For omni lights, pack the far clip (range) into VP[0][0] so the
629 // uniform binder can extract it for the cubemap depth comparison.
630 Matrix4 rangePack = Matrix4::identity();
631 rangePack.setElement(0, 0, sceneLight->range());
632 ls.viewProjection = rangePack;
633 } else {
634 ls.viewProjection = sceneLight->shadowViewProjection();
635 }
636 ls.bias = sceneLight->shadowBias();
637 ls.normalBias = sceneLight->normalBias();
638 ls.intensity = sceneLight->shadowIntensity();
639
640 shadowParams.localShadowCount++;
641 } else {
642 // No shadow slot available — clear castShadows so the shader
643 // doesn't attempt to sample a non-existent shadow map.
644 lightData.castShadows = false;
645 }
646 }
647
648 LightDispatchEntry dispatchEntry{};
649 dispatchEntry.light = lightData;
650 dispatchEntry.mask = lightComponent->mask();
651
652 if (dispatchEntry.light.type == GpuLightType::Directional) {
653 directionalLights.push_back(dispatchEntry);
654 } else {
655 localLights.push_back(dispatchEntry);
656 }
657 }
658
659 // Environment uniforms are constant across the entire layer (depend only on
660 // _scene, not on per-draw state). Hoisted out of the per-draw loop to avoid
661 // redundant calls — setEnvironmentUniforms writes to _lightingUniforms fields
662 // and sets texture pointers that are the same for every draw in the layer.
663 {
664 Vector3 skyDomeCenter(0, 0, 0);
665 bool isDome = false;
666 if (_scene && _scene->sky() && _scene->sky()->type() != SKYTYPE_INFINITE && _scene->sky()->type() != SKYTYPE_ATMOSPHERE) {
667 skyDomeCenter = _scene->sky()->centerWorldPos();
668 isDome = true;
669 }
670 _device->setEnvironmentUniforms(_scene ? _scene->envAtlas() : nullptr,
671 _scene ? _scene->skyboxIntensity() : 1.0f,
672 static_cast<float>(_scene ? _scene->skyboxMip() : 0),
673 skyDomeCenter, isDome,
674 _scene ? _scene->skybox() : nullptr);
675 }
676
677 // --- Clustered lighting: feed local lights into WorldClusters ---
678 if (clusteredEnabled && _worldClusters) {
679 // Convert local light dispatch entries to WorldClusters input format.
680 std::vector<ClusterLightData> clusterLocalLights;
681 clusterLocalLights.reserve(localLights.size());
682
683 for (const auto& dispatchEntry : localLights) {
684 const auto& ld = dispatchEntry.light;
685 // Area rect lights are not clustered — they go through the main 8-light array.
686 if (ld.type == GpuLightType::AreaRect) continue;
687 ClusterLightData lcd;
688 lcd.position = ld.position;
689 lcd.direction = ld.direction;
690 lcd.color = ld.color;
691 lcd.intensity = ld.intensity;
692 lcd.range = ld.range;
693 lcd.innerConeAngle = std::acos(std::clamp(ld.innerConeCos, -1.0f, 1.0f))
694 * (360.0f / std::numbers::pi_v<float>); // radians half-angle → degrees full-angle
695 lcd.outerConeAngle = std::acos(std::clamp(ld.outerConeCos, -1.0f, 1.0f))
696 * (360.0f / std::numbers::pi_v<float>);
697 lcd.isSpot = (ld.type == GpuLightType::Spot);
698 lcd.falloffModeLinear = ld.falloffModeLinear;
699 clusterLocalLights.push_back(lcd);
700 }
701
702 // Compute camera frustum AABB for cluster grid bounds.
703 // Use camera position ± reasonable range as a simple approximation.
704 // A full frustum AABB would require unprojecting corners, but camera
705 // position ± max light range is sufficient for the grid to cover all lights.
706 BoundingBox cameraBounds(cameraPosition, Vector3(50.0f, 50.0f, 50.0f));
707
708 _worldClusters->update(clusterLocalLights, cameraBounds);
709
710 // Bind cluster GPU buffers.
711 if (_worldClusters->lightCount() > 0) {
712 _device->setClusterBuffers(
713 _worldClusters->lightData(), _worldClusters->lightDataSize(),
714 _worldClusters->cellData(), _worldClusters->cellDataSize());
715
716 // Pack cluster grid params into LightingUniforms.
717 const auto& bMin = _worldClusters->boundsMin();
718 const auto bRange = _worldClusters->boundsRange();
719 const auto cellsBySize = _worldClusters->cellsCountByBoundsSize();
720 const auto& cfg = _worldClusters->config();
721
722 const float boundsMinArr[3] = {bMin.getX(), bMin.getY(), bMin.getZ()};
723 const float boundsRangeArr[3] = {bRange.getX(), bRange.getY(), bRange.getZ()};
724 const float cellsBySizeArr[3] = {cellsBySize.getX(), cellsBySize.getY(), cellsBySize.getZ()};
725
726 _device->setClusterGridParams(boundsMinArr, boundsRangeArr, cellsBySizeArr,
727 cfg.cellsX, cfg.cellsY, cfg.cellsZ, cfg.maxLightsPerCell,
728 _worldClusters->lightCount());
729 }
730 }
731
732 // --- Phase 4: pre-compute filtered light list for the common mask ---
733 // 95%+ of mesh instances use MASK_AFFECT_DYNAMIC (default). Pre-filter the
734 // light list for this mask once, then reuse it across all draws with the
735 // same mask. Only re-filter when a draw has a different mask.
736 std::vector<GpuLightData> cachedGpuLights;
737 cachedGpuLights.reserve(8);
738 uint32_t cachedLightMask = MASK_AFFECT_DYNAMIC;
739
740 auto buildFilteredLights = [&](uint32_t mask, std::vector<GpuLightData>& out) {
741 out.clear();
742 // Directional lights always go into LightingData.lights[].
743 for (const auto& dispatchEntry : directionalLights) {
744 if ((dispatchEntry.mask & mask) == 0u) continue;
745 out.push_back(dispatchEntry.light);
746 if (out.size() >= 8) break;
747 }
748 // Area rect lights always go into the main 8-light array (not clustered).
749 // They must be added before the clustering guard so they're always present.
750 for (const auto& dispatchEntry : localLights) {
751 if (out.size() >= 8) break;
752 if (dispatchEntry.light.type != GpuLightType::AreaRect) continue;
753 if ((dispatchEntry.mask & mask) == 0u) continue;
754 out.push_back(dispatchEntry.light);
755 }
756 // When clustering is enabled, non-area local lights are handled by the
757 // cluster grid (fragment shader samples buffer slots 7/8).
758 if (!clusteredEnabled) {
759 for (const auto& dispatchEntry : localLights) {
760 if (out.size() >= 8) break;
761 if ((dispatchEntry.mask & mask) == 0u) continue;
762 if (dispatchEntry.light.type == GpuLightType::AreaRect) continue; // already added above
763 out.push_back(dispatchEntry.light);
764 }
765 }
766 };
767
768 // Pre-build for the default mask (covers 95%+ of draws).
769 buildFilteredLights(MASK_AFFECT_DYNAMIC, cachedGpuLights);
770
771 // Phase 4: cull mode cache — skip material parameter map lookups for same material.
772 const Material* lastCullMaterial = nullptr;
773 CullMode cachedCullMode = CullMode::CULLFACE_BACK;
774
775 for (const auto* entry : drawEntries) {
776 const Material* boundMaterial = entry->material ? entry->material : defaultMaterial.get();
777 const bool isDynBatch = entry->meshInstance && entry->meshInstance->isDynamicBatch();
778 programLibrary->bindMaterial(_device, boundMaterial, transparent, isDynBatch);
779
780 // Phase 4: reuse cached light list when mask matches (zero allocation per draw).
781 const uint32_t drawLightMask = (entry->meshInstance ? entry->meshInstance->mask() : MASK_AFFECT_DYNAMIC);
782 if (drawLightMask != cachedLightMask) {
783 buildFilteredLights(drawLightMask, cachedGpuLights);
784 cachedLightMask = drawLightMask;
785 }
786
787 //controls SHADERDEF_NOSHADOW.
788 // When a mesh instance has receiveShadow=false, suppress shadow params for this draw.
789 const bool drawReceivesShadow = (!entry->meshInstance || entry->meshInstance->receiveShadow());
790 if (drawReceivesShadow) {
791 _device->setLightingUniforms(ambientColor, cachedGpuLights, cameraPosition, true,
792 (_scene ? _scene->exposure() : 1.0f), fogParams, shadowParams,
793 (_scene ? _scene->toneMapping() : 0));
794 } else {
795 ShadowParams noShadow;
796 noShadow.enabled = false;
797 _device->setLightingUniforms(ambientColor, cachedGpuLights, cameraPosition, true,
798 (_scene ? _scene->exposure() : 1.0f), fogParams, noShadow,
799 (_scene ? _scene->toneMapping() : 0));
800 }
801
802 // Phase 4: cache material's base cull mode (skip parameter map lookups),
803 // then apply node-scale flip per draw (trivial float check).
804 if (boundMaterial != lastCullMaterial) {
805 cachedCullMode = resolveMaterialCullMode(boundMaterial);
806 lastCullMaterial = boundMaterial;
807 }
808 const auto cullMode = applyNodeScaleFlip(cachedCullMode,
809 entry->meshInstance ? entry->meshInstance->node() : nullptr);
810 _device->setCullMode(cullMode);
811
812 _device->setVertexBuffer(entry->vertexBuffer, 0);
813
814 // Hardware instancing: bind instance buffer at slot 5, pass instanceCount to draw.
815 //checks meshInstance.instancingData before draw.
816 const auto& instData = entry->meshInstance ? entry->meshInstance->instancingData() : MeshInstance::InstancingData{};
817
818 if (instData.indirectArgsBuffer && instData.indirectSlot >= 0 && instData.compactedVertexBuffer) {
819 // GPU-culled indirect instancing (Phase 3):
820 // Bind the compacted buffer (visible instances only) at slot 5.
821 // Instance count comes from the GPU via indirect draw arguments.
822 _device->setVertexBuffer(instData.compactedVertexBuffer, 5);
823 _device->setIndirectDrawBuffer(instData.indirectArgsBuffer);
824 // Identity model matrix — each instance carries its own transform via stage_in.
825 _device->setTransformUniforms(viewProjection, Matrix4::identity());
826 _device->draw(entry->primitive, entry->indexBuffer, 0, instData.indirectSlot, true, true);
827 } else if (instData.vertexBuffer && instData.count > 0) {
828 // Direct instancing (Phase 2): all instances drawn, CPU-provided count.
829 _device->setVertexBuffer(instData.vertexBuffer, 5);
830
831 Matrix4 modelMatrix;
832 if (boundMaterial && boundMaterial->isSkybox()) {
833 if (_scene && _scene->sky() && _scene->sky()->type() != SKYTYPE_INFINITE && _scene->sky()->type() != SKYTYPE_ATMOSPHERE) {
834 modelMatrix = entry->meshInstance && entry->meshInstance->node()
835 ? entry->meshInstance->node()->worldTransform()
836 : Matrix4::identity();
837 } else {
838 modelMatrix = Matrix4::translation(cameraPosition.getX(), cameraPosition.getY(), cameraPosition.getZ());
839 }
840 } else {
841 modelMatrix = Matrix4::identity();
842 }
843 _device->setTransformUniforms(viewProjection, modelMatrix);
844 _device->draw(entry->primitive, entry->indexBuffer, instData.count, -1, true, true);
845 } else if (isDynBatch) {
846 // Dynamic batch draw: bind palette, use identity model matrix.
847 //— dynamic batches use SkinBatchInstance
848 // with a per-frame matrix palette. The vertex shader indexes into the palette
849 // using a per-vertex bone index.
850 auto* sbi = entry->meshInstance->skinBatchInstance();
851 if (sbi) {
852 _device->setDynamicBatchPalette(sbi->paletteData(), sbi->paletteSizeBytes());
853 }
854 _device->setTransformUniforms(viewProjection, Matrix4::identity());
855 _device->draw(entry->primitive, entry->indexBuffer, 1, -1, true, true);
856 } else {
857 // Non-instanced draw.
858 Matrix4 modelMatrix;
859 if (boundMaterial && boundMaterial->isSkybox()) {
860 if (_scene && _scene->sky() && _scene->sky()->type() != SKYTYPE_INFINITE && _scene->sky()->type() != SKYTYPE_ATMOSPHERE) {
861 modelMatrix = entry->meshInstance && entry->meshInstance->node()
862 ? entry->meshInstance->node()->worldTransform()
863 : Matrix4::identity();
864 } else {
865 modelMatrix = Matrix4::translation(cameraPosition.getX(), cameraPosition.getY(), cameraPosition.getZ());
866 }
867 } else {
868 modelMatrix = (entry->meshInstance && entry->meshInstance->node())
869 ? entry->meshInstance->node()->worldTransform()
870 : Matrix4::identity();
871 }
872 _device->setTransformUniforms(viewProjection, modelMatrix);
873 _device->draw(entry->primitive, entry->indexBuffer, 1, -1, true, true);
874 }
875 _forwardDrawCalls++;
876 }
877
878 // Restore global viewport/scissor after this camera-layer pass.
879 _device->setViewport(oldVx, oldVy, oldVw, oldVh);
880 _device->setScissor(oldSx, oldSy, oldSw, oldSh);
881
882 }
883}
static const std::vector< CameraComponent * > & instances()
Perspective or orthographic camera with projection matrix, jitter (TAA), and render target binding.
Definition camera.h:40
AspectRatioMode aspectRatioMode() const
Definition camera.h:50
float jitter() const
Definition camera.h:76
const Vector4 & rect() const
Definition camera.h:94
void setAspectRatio(float value)
Definition camera.cpp:25
bool planarReflectionDepthPass() const
Definition camera.h:220
const Matrix4 & projectionMatrix()
Definition camera.h:65
const std::unique_ptr< GraphNode > & node() const
Definition camera.h:102
const std::shared_ptr< RenderTarget > & renderTarget() const
Definition camera.h:86
const Vector4 & scissorRect() const
Definition camera.h:99
void storeShaderMatrices(const Matrix4 &viewProjection, float jitterX, float jitterY, int renderVersion)
Definition camera.cpp:58
Hierarchical scene graph node with local/world transforms and parent-child relationships.
Definition graphNode.h:28
int id() const
Definition layer.h:26
const std::vector< MeshInstance * > & meshInstances() const
Definition layer.h:52
static const std::vector< LightComponent * > & instances()
Directional, point, spot, or area light with shadow mapping and cookie projection.
Definition light.h:54
ShadowMap * shadowMap() const
Definition light.h:99
void setShadowMap(ShadowMap *value)
Definition light.h:100
Base class for GPU materials — owns uniform data, texture bindings, blend/depth state,...
Definition material.h:143
std::variant< float, int32_t, uint32_t, bool, Color, Vector2, Vector3, Vector4, Matrix4, Texture * > ParameterValue
Definition material.h:145
Renderable instance of a Mesh with its own material, transform node, and optional GPU instancing.
static const std::vector< RenderComponent * > & instances()
std::shared_ptr< Scene > _scene
Definition renderer.h:43
std::shared_ptr< RenderPassUpdateClustered > _renderPassUpdateClustered
Definition renderer.h:45
void renderForwardLayer(Camera *camera, RenderTarget *renderTarget, Layer *layer, bool transparent)
Definition renderer.cpp:218
std::unordered_map< Camera *, std::vector< Light * > > _cameraDirShadowLights
Definition renderer.h:57
std::unique_ptr< ShadowRendererLocal > _shadowRendererLocal
Definition renderer.h:47
std::shared_ptr< GraphicsDevice > _device
Definition renderer.h:41
void cullShadowmaps(Camera *camera)
Definition renderer.cpp:170
std::vector< std::unique_ptr< ShadowMap > > _ownedShadowMaps
Definition renderer.h:62
Renderer(const std::shared_ptr< GraphicsDevice > &device, const std::shared_ptr< Scene > &scene)
Definition renderer.cpp:149
static std::unique_ptr< ShadowMap > create(GraphicsDevice *device, Light *light)
Definition shadowMap.cpp:15
std::shared_ptr< ProgramLibrary > getProgramLibrary(const std::shared_ptr< GraphicsDevice > &device)
std::shared_ptr< Material > getDefaultMaterial(const std::shared_ptr< GraphicsDevice > &device)
Definition material.cpp:382
bool isVisibleInCameraFrustum(Camera *camera, GraphNode *cameraNode, const BoundingBox &bounds)
constexpr uint32_t MASK_AFFECT_DYNAMIC
Definition constants.h:31
4x4 column-major transformation matrix with SIMD acceleration.
Definition matrix4.h:31
void setElement(const int col, int row, const float value)
Definition matrix4.h:376
static Matrix4 identity()
Definition matrix4.h:108
3D vector for positions, directions, and normals with multi-backend SIMD acceleration.
Definition vector3.h:29
4D vector for homogeneous coordinates, color values, and SIMD operations.
Definition vector4.h:20
float getX() const
Definition vector4.h:85
float getY() const
Definition vector4.h:98