VisuTwin Canvas
C++ 3D Engine — Metal Backend
Loading...
Searching...
No Matches
metalSsaoPass.cpp
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
2// Copyright 2025-2026 Arnis Lektauers
3//
4// SSAO (Screen-Space Ambient Occlusion) pass implementation.
5// Shader ported from upstream scene/shader-lib/glsl/chunks/render-pass/frag/ssao.js
6//
7#include "metalSsaoPass.h"
8
9#include "metalComposePass.h"
10#include "metalGraphicsDevice.h"
11#include "metalRenderPipeline.h"
12#include "metalTexture.h"
13#include "metalVertexBuffer.h"
22#include "spdlog/spdlog.h"
23
24namespace visutwin::canvas
25{
26 namespace
27 {
28 // SSAO (GLSL to Metal).
29 // Based on 'Scalable Ambient Obscurance' by Morgan McGuire, adapted by Naughty Dog.
30 constexpr const char* SSAO_SOURCE = R"(
31#include <metal_stdlib>
32using namespace metal;
33
34struct ComposeVertexIn {
35 float3 position [[attribute(0)]];
36 float3 normal [[attribute(1)]];
37 float2 uv0 [[attribute(2)]];
38 float4 tangent [[attribute(3)]];
39 float2 uv1 [[attribute(4)]];
40};
41
42struct SsaoVarying {
43 float4 position [[position]];
44 float2 uv;
45};
46
47struct SsaoUniforms {
48 float aspect;
49 float2 invResolution;
50 float2 sampleCount; // x=count, y=1/count
51 float spiralTurns;
52 float2 angleIncCosSin;
53 float maxLevel;
54 float invRadiusSquared;
55 float minHorizonAngleSineSquared;
56 float bias;
57 float peak2;
58 float intensity;
59 float power;
60 float projectionScaleRadius;
61 float randomize;
62 float cameraNear;
63 float cameraFar;
64};
65
66vertex SsaoVarying ssaoVertex(ComposeVertexIn in [[stage_in]])
67{
68 SsaoVarying out;
69 out.position = float4(in.position, 1.0);
70 out.uv = in.uv0;
71 return out;
72}
73
74static inline float getLinearDepth(float rawDepth, float cameraNear, float cameraFar)
75{
76 // Standard depth [0,1]: near=0, far=1 (vertex shader maps via clip.z = 0.5*(clip.z + clip.w)).
77 // Returns positive linear view-space distance from camera.
78 return (cameraNear * cameraFar) / (cameraFar - rawDepth * (cameraFar - cameraNear));
79}
80
81static constant float kLog2LodRate = 3.0;
82
83// Random number between 0 and 1 using interleaved gradient noise
84static inline float random(float2 fragCoord)
85{
86 const float3 m = float3(0.06711056, 0.00583715, 52.9829189);
87 return fract(m.z * fract(dot(fragCoord, m.xy)));
88}
89
90static inline float3 computeViewSpacePositionFromDepth(float2 uv, float linearDepth, float aspect)
91{
92 return float3((0.5 - uv) * float2(aspect, 1.0) * linearDepth, linearDepth);
93}
94
95static inline float3 faceNormal(float3 dpdx, float3 dpdy)
96{
97 return normalize(cross(dpdx, dpdy));
98}
99
100// Compute normals directly from the depth texture (full resolution normals)
101static inline float3 computeViewSpaceNormal(float3 position, float2 uv, float2 invResolution,
102 float aspect, depth2d<float> depthTexture, sampler linearSampler, float cameraNear, float cameraFar)
103{
104 float2 uvdx = uv + float2(invResolution.x, 0.0);
105 float2 uvdy = uv + float2(0.0, invResolution.y);
106 float depthDx = depthTexture.sample(linearSampler, uvdx);
107 float depthDy = depthTexture.sample(linearSampler, uvdy);
108 float3 px = computeViewSpacePositionFromDepth(uvdx, getLinearDepth(depthDx, cameraNear, cameraFar), aspect);
109 float3 py = computeViewSpacePositionFromDepth(uvdy, getLinearDepth(depthDy, cameraNear, cameraFar), aspect);
110 float3 dpdx = px - position;
111 float3 dpdy = py - position;
112 return faceNormal(dpdx, dpdy);
113}
114
115// Spiral tap position (fast path)
116static inline float2 startPosition(float noise)
117{
118 float angle = ((2.0 * M_PI_F) * 2.4) * noise;
119 return float2(cos(angle), sin(angle));
120}
121
122static inline float3 tapLocationFast(float i, float2 p, float noise, float invSampleCount)
123{
124 float radius = (i + noise + 0.5) * invSampleCount;
125 return float3(p, radius * radius);
126}
127
128static inline float2x2 tapAngleStep(float2 angleIncCosSin)
129{
130 return float2x2(angleIncCosSin.x, angleIncCosSin.y, -angleIncCosSin.y, angleIncCosSin.x);
131}
132
133static inline void computeAmbientOcclusionSAO(
134 thread float& occlusion, float i, float ssDiskRadius,
135 float2 uv, float3 origin, float3 normal,
136 float2 tapPosition, float noise, float invSampleCount,
137 float2 invResolution, float invRadiusSquared, float minHorizonAngleSineSquared,
138 float bias, float peak2, float aspect,
139 depth2d<float> depthTexture, sampler linearSampler, float cameraNear, float cameraFar)
140{
141 float3 tap = tapLocationFast(i, tapPosition, noise, invSampleCount);
142
143 float ssRadius = max(1.0, tap.z * ssDiskRadius); // at least 1 pixel screen-space radius
144
145 float2 uvSamplePos = uv + float2(ssRadius * tap.xy) * invResolution;
146
147 float occlusionDepth = getLinearDepth(depthTexture.sample(linearSampler, uvSamplePos), cameraNear, cameraFar);
148 float3 p = computeViewSpacePositionFromDepth(uvSamplePos, occlusionDepth, aspect);
149
150 // now we have the sample, compute AO
151 float3 v = p - origin; // sample vector
152 float vv = dot(v, v); // squared distance
153 float vn = dot(v, normal); // distance * cos(v, normal)
154
155 // discard samples that are outside of the radius
156 float w = max(0.0, 1.0 - vv * invRadiusSquared);
157 w = w * w;
158
159 // discard samples that are too close to the horizon
160 w *= step(vv * minHorizonAngleSineSquared, vn * vn);
161
162 occlusion += w * max(0.0, vn + origin.z * bias) / (vv + peak2);
163}
164
165static inline float scalableAmbientObscurance(
166 float2 uv, float3 origin, float3 normal, float2 fragCoord,
167 float2 sampleCount, float2 angleIncCosSin, float projectionScaleRadius,
168 float2 invResolution, float invRadiusSquared, float minHorizonAngleSineSquared,
169 float bias, float peak2, float aspect, float randomizeValue,
170 depth2d<float> depthTexture, sampler linearSampler, float cameraNear, float cameraFar)
171{
172 float noise = random(fragCoord) + randomizeValue;
173 float2 tapPos = startPosition(noise);
174 float2x2 angleStep = tapAngleStep(angleIncCosSin);
175
176 // Choose the screen-space sample radius proportional to the projected area of the sphere
177 // DEVIATION: upstream uses -(projInfo.z / position.z) with negative Z (OpenGL -Z convention).
178 // Our view-space Z is positive (linearDepth), so we use positive division directly.
179 float ssDiskRadius = projectionScaleRadius / origin.z;
180
181 float occlusion = 0.0;
182 for (float i = 0.0; i < sampleCount.x; i += 1.0) {
183 computeAmbientOcclusionSAO(occlusion, i, ssDiskRadius, uv, origin, normal, tapPos, noise,
184 sampleCount.y, invResolution, invRadiusSquared, minHorizonAngleSineSquared,
185 bias, peak2, aspect, depthTexture, linearSampler, cameraNear, cameraFar);
186 tapPos = angleStep * tapPos;
187 }
188 return occlusion;
189}
190
191fragment float4 ssaoFragment(
192 SsaoVarying in [[stage_in]],
193 depth2d<float> depthTexture [[texture(0)]],
194 sampler linearSampler [[sampler(0)]],
195 constant SsaoUniforms& uniforms [[buffer(5)]])
196{
197 const float2 uv = clamp(in.uv, float2(0.0), float2(1.0));
198
199 float rawDepth = depthTexture.sample(linearSampler, uv);
200 float depth = getLinearDepth(rawDepth, uniforms.cameraNear, uniforms.cameraFar);
201 float3 origin = computeViewSpacePositionFromDepth(uv, depth, uniforms.aspect);
202 // DEVIATION: upstream reconstructs positions with negative Z (depth = -getLinearScreenDepth),
203 // so cross(dpdx, dpdy) naturally yields normals pointing towards the camera (-Z).
204 // Our Metal path uses positive depth (distance from camera), so the cross product
205 // produces normals pointing away (+Z). Negate to match upstream convention.
206 float3 normal = -computeViewSpaceNormal(origin, uv, uniforms.invResolution, uniforms.aspect,
207 depthTexture, linearSampler, uniforms.cameraNear, uniforms.cameraFar);
208
209 float occlusion = 0.0;
210 if (uniforms.intensity > 0.0) {
211 occlusion = scalableAmbientObscurance(uv, origin, normal, in.position.xy,
212 uniforms.sampleCount, uniforms.angleIncCosSin, uniforms.projectionScaleRadius,
213 uniforms.invResolution, uniforms.invRadiusSquared, uniforms.minHorizonAngleSineSquared,
214 uniforms.bias, uniforms.peak2, uniforms.aspect, uniforms.randomize,
215 depthTexture, linearSampler, uniforms.cameraNear, uniforms.cameraFar);
216 }
217
218 // occlusion to visibility
219 float ao = max(0.0, 1.0 - occlusion * uniforms.intensity);
220 ao = pow(ao, uniforms.power);
221
222 return float4(ao, ao, ao, 1.0);
223}
224)";
225 }
226
228 : _device(device), _composePass(composePass)
229 {
230 }
231
233 {
234 if (_depthStencilState) {
235 _depthStencilState->release();
236 _depthStencilState = nullptr;
237 }
238 }
239
240 void MetalSsaoPass::ensureResources()
241 {
242 // Ensure the compose pass's shared vertex buffer/format are created first
243 _composePass->ensureResources();
244
245 if (_shader && _composePass->vertexBuffer() && _composePass->vertexFormat() &&
246 _blendState && _depthState && _depthStencilState) {
247 return;
248 }
249
250 if (!_shader) {
251 ShaderDefinition definition;
252 definition.name = "SsaoPass";
253 definition.vshader = "ssaoVertex";
254 definition.fshader = "ssaoFragment";
255 _shader = createShader(_device, definition, SSAO_SOURCE);
256 }
257
258 if (!_blendState) {
259 _blendState = std::make_shared<BlendState>();
260 }
261 if (!_depthState) {
262 _depthState = std::make_shared<DepthState>();
263 }
264 if (!_depthStencilState && _device->raw()) {
265 auto* depthDesc = MTL::DepthStencilDescriptor::alloc()->init();
266 depthDesc->setDepthCompareFunction(MTL::CompareFunctionAlways);
267 depthDesc->setDepthWriteEnabled(false);
268 _depthStencilState = _device->raw()->newDepthStencilState(depthDesc);
269 depthDesc->release();
270 }
271 }
272
273 void MetalSsaoPass::execute(MTL::RenderCommandEncoder* encoder,
274 const SsaoPassParams& params,
275 MetalRenderPipeline* pipeline, const std::shared_ptr<RenderTarget>& renderTarget,
276 const std::vector<std::shared_ptr<MetalBindGroupFormat>>& bindGroupFormats,
277 MTL::SamplerState* defaultSampler, MTL::DepthStencilState* defaultDepthStencilState)
278 {
279 if (!encoder || !params.depthTexture) {
280 return;
281 }
282
283 ensureResources();
284 if (!_shader || !_composePass->vertexBuffer() || !_composePass->vertexFormat() || !_blendState || !_depthState) {
285 spdlog::warn("[executeSsaoPass] missing SSAO resources");
286 return;
287 }
288
289 Primitive primitive;
290 primitive.type = PRIMITIVE_TRIANGLES;
291 primitive.base = 0;
292 primitive.count = 3;
293 primitive.indexed = false;
294
295 auto pipelineState = pipeline->get(primitive, _composePass->vertexFormat(), nullptr, -1, _shader, renderTarget,
296 bindGroupFormats, _blendState, _depthState, CullMode::CULLFACE_NONE, false, nullptr, nullptr);
297 if (!pipelineState) {
298 spdlog::warn("[executeSsaoPass] failed to get pipeline state");
299 return;
300 }
301
302 auto* vb = dynamic_cast<MetalVertexBuffer*>(_composePass->vertexBuffer().get());
303 if (!vb || !vb->raw()) {
304 spdlog::warn("[executeSsaoPass] missing vertex buffer");
305 return;
306 }
307
308 encoder->setRenderPipelineState(pipelineState);
309 encoder->setCullMode(MTL::CullModeNone);
310 encoder->setDepthStencilState(_depthStencilState ? _depthStencilState : defaultDepthStencilState);
311 encoder->setVertexBuffer(vb->raw(), 0, 0);
312
313 auto* depthHw = dynamic_cast<gpu::MetalTexture*>(params.depthTexture->impl());
314 encoder->setFragmentTexture(depthHw ? depthHw->raw() : nullptr, 0);
315 if (defaultSampler) {
316 encoder->setFragmentSamplerState(defaultSampler, 0);
317 }
318
319 // IMPORTANT: This struct must match the Metal shader's SsaoUniforms layout exactly.
320 // Metal float2 has 8-byte alignment, so padding is needed after scalar floats that
321 // precede float2 members (aspect before invResolution, spiralTurns before angleIncCosSin).
322 struct alignas(16) SsaoUniforms
323 {
324 float aspect; // offset 0
325 float _pad0; // offset 4 (align invResolution to 8-byte boundary)
326 float invResolution[2]; // offset 8 (matches Metal float2)
327 float sampleCount[2]; // offset 16 (matches Metal float2)
328 float spiralTurns; // offset 24
329 float _pad1; // offset 28 (align angleIncCosSin to 8-byte boundary)
330 float angleIncCosSin[2]; // offset 32 (matches Metal float2)
331 float maxLevel; // offset 40
332 float invRadiusSquared; // offset 44
333 float minHorizonAngleSineSquared; // offset 48
334 float bias; // offset 52
335 float peak2; // offset 56
336 float intensity; // offset 60
337 float power; // offset 64
338 float projectionScaleRadius; // offset 68
339 float randomize; // offset 72
340 float cameraNear; // offset 76
341 float cameraFar; // offset 80
342 } uniforms{};
343
344 uniforms.aspect = params.aspect;
345 uniforms._pad0 = 0.0f;
346 uniforms.invResolution[0] = params.invResolutionX;
347 uniforms.invResolution[1] = params.invResolutionY;
348 uniforms.sampleCount[0] = static_cast<float>(params.sampleCount);
349 uniforms.sampleCount[1] = 1.0f / static_cast<float>(params.sampleCount);
350 uniforms.spiralTurns = params.spiralTurns;
351 uniforms._pad1 = 0.0f;
352 uniforms.angleIncCosSin[0] = params.angleIncCos;
353 uniforms.angleIncCosSin[1] = params.angleIncSin;
354 uniforms.maxLevel = 0.0f;
355 uniforms.invRadiusSquared = params.invRadiusSquared;
356 uniforms.minHorizonAngleSineSquared = params.minHorizonAngleSineSquared;
357 uniforms.bias = params.bias;
358 uniforms.peak2 = params.peak2;
359 uniforms.intensity = params.intensity;
360 uniforms.power = params.power;
361 uniforms.projectionScaleRadius = params.projectionScaleRadius;
362 uniforms.randomize = params.randomize;
363 uniforms.cameraNear = params.cameraNear;
364 uniforms.cameraFar = params.cameraFar;
365 encoder->setFragmentBytes(&uniforms, sizeof(SsaoUniforms), 5);
366
367 encoder->drawPrimitives(MTL::PrimitiveTypeTriangle, static_cast<NS::UInteger>(0),
368 static_cast<NS::UInteger>(3));
369 _device->recordDrawCall();
370 }
371}
std::shared_ptr< VertexFormat > vertexFormat() const
Shared vertex format (full-screen triangle, 14 floats per vertex).
std::shared_ptr< VertexBuffer > vertexBuffer() const
Shared vertex buffer (3-vertex full-screen triangle).
MTL::RenderPipelineState * get(const Primitive &primitive, const std::shared_ptr< VertexFormat > &vertexFormat0, const std::shared_ptr< VertexFormat > &vertexFormat1, int ibFormat, const std::shared_ptr< Shader > &shader, const std::shared_ptr< RenderTarget > &renderTarget, const std::vector< std::shared_ptr< MetalBindGroupFormat > > &bindGroupFormats, const std::shared_ptr< BlendState > &blendState, const std::shared_ptr< DepthState > &depthState, CullMode cullMode, bool stencilEnabled, const std::shared_ptr< StencilParameters > &stencilFront, const std::shared_ptr< StencilParameters > &stencilBack, const std::shared_ptr< VertexFormat > &instancingFormat=nullptr)
void execute(MTL::RenderCommandEncoder *encoder, const SsaoPassParams &params, MetalRenderPipeline *pipeline, const std::shared_ptr< RenderTarget > &renderTarget, const std::vector< std::shared_ptr< MetalBindGroupFormat > > &bindGroupFormats, MTL::SamplerState *defaultSampler, MTL::DepthStencilState *defaultDepthStencilState)
Execute the SSAO pass on the active render command encoder.
MetalSsaoPass(MetalGraphicsDevice *device, MetalComposePass *composePass)
gpu::HardwareTexture * impl() const
Definition texture.h:101
std::shared_ptr< Shader > createShader(GraphicsDevice *graphicsDevice, const ShaderDefinition &definition, const std::string &sourceCode)
Definition shader.cpp:39
@ PRIMITIVE_TRIANGLES
Definition mesh.h:23
Describes how vertex and index data should be interpreted for a draw call.
Definition mesh.h:33
PrimitiveType type
Definition mesh.h:34