Skip to content

Commit 7b18542

Browse files
authored
perf(export): blit GPU composite result directly to canvas instead of readback (#125)
Replace the costly GPU→CPU readback path (mapAsync + putImageData) with a blit render pass that draws the composited texture directly onto a GPUCanvasContext. This eliminates per-frame buffer mapping, row-stride unpacking, and ImageData allocation during blend-mode compositing. Add a dedicated blit shader and pipeline to CompositorPipeline with compositeToCanvas(), and wire the render engine to use a persistent OffscreenCanvas with a configured GPU context. Falls back to the Canvas2D compositor path when GPU presentation isn't available.
1 parent 0efaec2 commit 7b18542

2 files changed

Lines changed: 176 additions & 34 deletions

File tree

src/features/export/utils/client-render-engine.ts

Lines changed: 69 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,10 @@ export async function createCompositionRenderer(
210210
// Lazily created from the effects pipeline's GPU device
211211
let gpuCompositor: CompositorPipeline | null = null;
212212
let gpuMaskManager: MaskTextureManager | null = null;
213+
let gpuCompositeCanvas: OffscreenCanvas | null = null;
214+
let gpuCompositeCtx: GPUCanvasContext | null = null;
215+
let gpuCompositeW = 0;
216+
let gpuCompositeH = 0;
213217

214218
function ensureGpuCompositor(): boolean {
215219
if (gpuCompositor) return true;
@@ -220,6 +224,35 @@ export async function createCompositionRenderer(
220224
return true;
221225
}
222226

227+
function ensureGpuCompositeOutput(
228+
width: number,
229+
height: number,
230+
): { canvas: OffscreenCanvas; ctx: GPUCanvasContext } | null {
231+
if (!gpuPipeline) return null;
232+
233+
if (!gpuCompositeCanvas) {
234+
gpuCompositeCanvas = new OffscreenCanvas(width, height);
235+
}
236+
237+
if (!gpuCompositeCtx || gpuCompositeW !== width || gpuCompositeH !== height) {
238+
if (gpuCompositeCanvas.width !== width || gpuCompositeCanvas.height !== height) {
239+
gpuCompositeCanvas.width = width;
240+
gpuCompositeCanvas.height = height;
241+
}
242+
gpuCompositeCtx = gpuPipeline.configureCanvas(gpuCompositeCanvas);
243+
if (!gpuCompositeCtx) {
244+
gpuCompositeCanvas = null;
245+
gpuCompositeW = 0;
246+
gpuCompositeH = 0;
247+
return null;
248+
}
249+
gpuCompositeW = width;
250+
gpuCompositeH = height;
251+
}
252+
253+
return { canvas: gpuCompositeCanvas, ctx: gpuCompositeCtx };
254+
}
255+
223256
// Build lookup maps
224257
const keyframesMap = buildKeyframesMap(keyframes);
225258

@@ -1300,6 +1333,7 @@ export async function createCompositionRenderer(
13001333
// Render tracks in order (bottom to top), with transitions at their track position
13011334
// Track order: higher values render first (behind), lower values render last (on top)
13021335
let skippedTracks = 0;
1336+
let finalCompositeSource: OffscreenCanvas = contentCanvas;
13031337

13041338
// Parallelize item rendering (video decode is the bottleneck).
13051339
// Collect all renderable items in z-order, fire all renders concurrently,
@@ -1354,19 +1388,27 @@ export async function createCompositionRenderer(
13541388
(t) => t.type === 'item' && t.item.blendMode && t.item.blendMode !== 'normal',
13551389
);
13561390
const useGpuCompositor = hasNonNormalBlend && gpuPipeline && ensureGpuCompositor();
1391+
const gpuCompositeOutput = useGpuCompositor
1392+
? ensureGpuCompositeOutput(canvasSettings.width, canvasSettings.height)
1393+
: null;
13571394

1358-
if (useGpuCompositor && gpuCompositor && gpuMaskManager) {
1395+
if (useGpuCompositor && gpuCompositor && gpuMaskManager && gpuCompositeOutput) {
13591396
// GPU compositing path — pixel-perfect blend modes via WebGPU
13601397
const device = gpuPipeline!.getDevice();
13611398
const w = canvasSettings.width;
13621399
const h = canvasSettings.height;
13631400
const layers: CompositeLayer[] = [];
13641401
const layerTextures: GPUTexture[] = [];
1402+
const compositedResults: Array<{
1403+
task: typeof renderTasks[number];
1404+
result: { source: OffscreenCanvas; poolCanvases: OffscreenCanvas[] };
1405+
}> = [];
13651406

13661407
for (let i = 0; i < results.length; i++) {
13671408
const task = renderTasks[i]!;
13681409
const result = applyTrackScopedMasks(results[i] ?? null, task.trackOrder);
13691410
if (!result) continue;
1411+
compositedResults.push({ task, result });
13701412

13711413
const blendMode = task.type === 'item' ? (task.item.blendMode ?? 'normal') : 'normal';
13721414

@@ -1388,46 +1430,35 @@ export async function createCompositionRenderer(
13881430
textureView: tex.createView(),
13891431
maskView: gpuMaskManager.getFallbackView(),
13901432
});
1391-
1392-
for (const c of result.poolCanvases) canvasPool.release(c);
13931433
}
13941434

1395-
if (layers.length > 0) {
1396-
const commandEncoder = device.createCommandEncoder();
1397-
const composited = gpuCompositor.compositeToTexture(layers, w, h, commandEncoder);
1435+
const compositedToGpuCanvas = layers.length > 0
1436+
&& gpuCompositor.compositeToCanvas(layers, w, h, gpuCompositeOutput.ctx);
13981437

1399-
if (composited) {
1400-
// Readback composited result to Canvas2D
1401-
const bytesPerRow = Math.ceil(w * 4 / 256) * 256;
1402-
const readBuffer = device.createBuffer({
1403-
size: bytesPerRow * h,
1404-
usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ,
1405-
});
1406-
commandEncoder.copyTextureToBuffer(
1407-
{ texture: composited.texture },
1408-
{ buffer: readBuffer, bytesPerRow },
1409-
{ width: w, height: h },
1410-
);
1411-
device.queue.submit([commandEncoder.finish()]);
1412-
1413-
await readBuffer.mapAsync(GPUMapMode.READ);
1414-
const mapped = new Uint8Array(readBuffer.getMappedRange());
1415-
const pixels = new Uint8ClampedArray(w * h * 4);
1416-
for (let row = 0; row < h; row++) {
1417-
pixels.set(
1418-
mapped.subarray(row * bytesPerRow, row * bytesPerRow + w * 4),
1419-
row * w * 4,
1420-
);
1438+
if (compositedToGpuCanvas) {
1439+
finalCompositeSource = gpuCompositeOutput.canvas;
1440+
} else {
1441+
// Fall back to the established Canvas2D compositor if the GPU target
1442+
// isn't available for this frame. This preserves feature parity and
1443+
// avoids dropping content when WebGPU canvas presentation fails.
1444+
for (const { task, result } of compositedResults) {
1445+
const blendMode = task.type === 'item' ? task.item.blendMode : undefined;
1446+
if (blendMode && blendMode !== 'normal') {
1447+
contentCtx.globalCompositeOperation = getCompositeOperation(blendMode);
14211448
}
1422-
readBuffer.unmap();
1423-
readBuffer.destroy();
14241449

1425-
contentCtx.putImageData(new ImageData(pixels, w, h), 0, 0);
1426-
} else {
1427-
device.queue.submit([commandEncoder.finish()]);
1450+
contentCtx.drawImage(result.source, 0, 0);
1451+
1452+
if (blendMode && blendMode !== 'normal') {
1453+
contentCtx.globalCompositeOperation = 'source-over';
1454+
}
14281455
}
14291456
}
14301457

1458+
for (const { result } of compositedResults) {
1459+
for (const c of result.poolCanvases) canvasPool.release(c);
1460+
}
1461+
14311462
// Destroy per-frame textures
14321463
for (const tex of layerTextures) tex.destroy();
14331464
} else {
@@ -1458,7 +1489,7 @@ export async function createCompositionRenderer(
14581489
log.debug(`Occlusion culling: skipped ${skippedTracks} tracks at frame ${frame}`);
14591490
}
14601491

1461-
ctx.drawImage(contentCanvas, 0, 0);
1492+
ctx.drawImage(finalCompositeSource, 0, 0);
14621493

14631494
// Release content canvas back to pool
14641495
canvasPool.release(contentCanvas);
@@ -1616,6 +1647,10 @@ export async function createCompositionRenderer(
16161647
gpuCompositor = null;
16171648
gpuMaskManager?.destroy();
16181649
gpuMaskManager = null;
1650+
gpuCompositeCtx = null;
1651+
gpuCompositeCanvas = null;
1652+
gpuCompositeW = 0;
1653+
gpuCompositeH = 0;
16191654
gpuTransitionPipeline?.destroy();
16201655
gpuTransitionPipeline = null;
16211656
gpuPipeline?.destroy();

src/lib/gpu-compositor/compositor-pipeline.ts

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,36 @@ const logger = createLogger('CompositorPipeline');
1919

2020
// ─── Shader ───
2121

22+
const BLIT_SHADER = /* wgsl */ `
23+
struct VertexOutput {
24+
@builtin(position) position: vec4f,
25+
@location(0) uv: vec2f,
26+
};
27+
@vertex
28+
fn vertexMain(@builtin(vertex_index) vi: u32) -> VertexOutput {
29+
var pos = array<vec2f, 6>(
30+
vec2f(-1,-1), vec2f(1,-1), vec2f(-1,1),
31+
vec2f(-1,1), vec2f(1,-1), vec2f(1,1)
32+
);
33+
var uv = array<vec2f, 6>(
34+
vec2f(0,1), vec2f(1,1), vec2f(0,0),
35+
vec2f(0,0), vec2f(1,1), vec2f(1,0)
36+
);
37+
var o: VertexOutput;
38+
o.position = vec4f(pos[vi], 0, 1);
39+
o.uv = uv[vi];
40+
return o;
41+
}
42+
43+
@group(0) @binding(0) var texSampler: sampler;
44+
@group(0) @binding(1) var inputTex: texture_2d<f32>;
45+
46+
@fragment
47+
fn blitFragment(input: VertexOutput) -> @location(0) vec4f {
48+
return textureSample(inputTex, texSampler, input.uv);
49+
}
50+
`;
51+
2252
const VERTEX_SHADER = /* wgsl */ `
2353
struct VertexOutput {
2454
@builtin(position) position: vec4f,
@@ -276,6 +306,7 @@ function packUniforms(p: CompositeLayerParams): Float32Array {
276306

277307
export class CompositorPipeline {
278308
private device: GPUDevice;
309+
private canvasFormat: GPUTextureFormat;
279310
private sampler: GPUSampler;
280311
private uniformBuffer: GPUBuffer;
281312

@@ -284,19 +315,24 @@ export class CompositorPipeline {
284315

285316
private externalPipeline: GPURenderPipeline | null = null;
286317
private externalLayout: GPUBindGroupLayout | null = null;
318+
private blitPipeline: GPURenderPipeline | null = null;
319+
private blitLayout: GPUBindGroupLayout | null = null;
287320

288321
private pingTexture: GPUTexture | null = null;
289322
private pongTexture: GPUTexture | null = null;
290323
private pingView: GPUTextureView | null = null;
291324
private pongView: GPUTextureView | null = null;
292325
private texW = 0;
293326
private texH = 0;
327+
private blitBindGroupPing: GPUBindGroup | null = null;
328+
private blitBindGroupPong: GPUBindGroup | null = null;
294329

295330
// Last packed uniforms for change detection
296331
private lastUniforms: Float32Array | null = null;
297332

298333
constructor(device: GPUDevice) {
299334
this.device = device;
335+
this.canvasFormat = navigator.gpu.getPreferredCanvasFormat();
300336
this.sampler = device.createSampler({ magFilter: 'linear', minFilter: 'linear' });
301337
this.uniformBuffer = device.createBuffer({
302338
size: UNIFORM_SIZE,
@@ -361,6 +397,29 @@ export class CompositorPipeline {
361397
this.externalPipeline = null;
362398
this.externalLayout = null;
363399
}
400+
401+
try {
402+
const module = this.device.createShaderModule({
403+
label: 'compositor-blit',
404+
code: BLIT_SHADER,
405+
});
406+
this.blitLayout = this.device.createBindGroupLayout({
407+
label: 'compositor-blit-layout',
408+
entries: [
409+
{ binding: 0, visibility: GPUShaderStage.FRAGMENT, sampler: {} },
410+
{ binding: 1, visibility: GPUShaderStage.FRAGMENT, texture: {} },
411+
],
412+
});
413+
this.blitPipeline = this.device.createRenderPipeline({
414+
label: 'compositor-blit-pipeline',
415+
layout: this.device.createPipelineLayout({ bindGroupLayouts: [this.blitLayout] }),
416+
vertex: { module, entryPoint: 'vertexMain' },
417+
fragment: { module, entryPoint: 'blitFragment', targets: [{ format: this.canvasFormat }] },
418+
primitive: { topology: 'triangle-list' },
419+
});
420+
} catch (e) {
421+
logger.warn('Failed to create compositor blit pipeline', e);
422+
}
364423
}
365424

366425
private ensurePingPong(w: number, h: number): void {
@@ -379,6 +438,8 @@ export class CompositorPipeline {
379438
this.pongView = this.pongTexture.createView();
380439
this.texW = w;
381440
this.texH = h;
441+
this.blitBindGroupPing = null;
442+
this.blitBindGroupPong = null;
382443
}
383444

384445
private writeUniforms(params: CompositeLayerParams): void {
@@ -486,6 +547,52 @@ export class CompositorPipeline {
486547
return { texture: inputTex, view: inputView };
487548
}
488549

550+
compositeToCanvas(
551+
layers: CompositeLayer[],
552+
width: number,
553+
height: number,
554+
outputCtx: GPUCanvasContext,
555+
): boolean {
556+
if (!this.blitPipeline || !this.blitLayout) return false;
557+
558+
const commandEncoder = this.device.createCommandEncoder();
559+
const composited = this.compositeToTexture(layers, width, height, commandEncoder);
560+
if (!composited) {
561+
return false;
562+
}
563+
564+
const blitBindGroup = composited.texture === this.pingTexture
565+
? (this.blitBindGroupPing ??= this.device.createBindGroup({
566+
layout: this.blitLayout,
567+
entries: [
568+
{ binding: 0, resource: this.sampler },
569+
{ binding: 1, resource: this.pingView! },
570+
],
571+
}))
572+
: (this.blitBindGroupPong ??= this.device.createBindGroup({
573+
layout: this.blitLayout,
574+
entries: [
575+
{ binding: 0, resource: this.sampler },
576+
{ binding: 1, resource: this.pongView! },
577+
],
578+
}));
579+
580+
const outputPass = commandEncoder.beginRenderPass({
581+
colorAttachments: [{
582+
view: outputCtx.getCurrentTexture().createView(),
583+
loadOp: 'clear',
584+
storeOp: 'store',
585+
}],
586+
});
587+
outputPass.setPipeline(this.blitPipeline);
588+
outputPass.setBindGroup(0, blitBindGroup);
589+
outputPass.draw(6);
590+
outputPass.end();
591+
592+
this.device.queue.submit([commandEncoder.finish()]);
593+
return true;
594+
}
595+
489596
getDevice(): GPUDevice {
490597
return this.device;
491598
}

0 commit comments

Comments
 (0)