@@ -455,18 +455,26 @@ def AIEInsertTraceFlows : Pass<"aie-insert-trace-flows", "DeviceOp"> {
455455 let summary = "Insert packet flows and runtime sequence trace setup";
456456 let description = [{
457457 For each aie.trace operation, this pass:
458- - Creates ONE packet flow from trace port to shim DMA
458+ - Creates packet flows from trace ports to shim DMA
459459 - Groups traces by target shim (minimizes shim usage, ideally 1)
460- - Inserts ONE shim buffer descriptor per shim tile for all traces
460+ - Inserts shim buffer descriptors and DMA control setup per shim tile
461461 - Inserts per-tile timer control register writes
462462 - Inserts per-shim broadcast and DMA control setup
463-
463+
464464 All trace configuration is injected at the beginning of the runtime
465465 sequence, before user data transfer operations.
466-
467- Multiple trace streams (from different tiles or different trace units
468- on the same tile) are routed to the same shim DMA channel and drained
469- by a single buffer descriptor.
466+
467+ By default, multiple trace streams are routed to the same shim DMA
468+ channel and drained by a single buffer descriptor. With
469+ `distribute-channels`, traces are round-robin distributed across two
470+ S2MM DMA channels per shim tile, each with its own BD. Both channels
471+ share the same host buffer argument (arg_idx) and are split by offset:
472+ channel 0 starts at the base offset, channel 1 at base + buffer_size.
473+ The host must allocate 2x buffer_size for the trace buffer.
474+
475+ With `lateral-routing`, trace destinations are redirected from columns
476+ with active cores to spare shim NOC columns, reducing data path
477+ perturbation. Use `lateral-target-col` to force a specific target.
470478 }];
471479
472480 let constructor = "xilinx::AIE::createAIEInsertTraceFlowsPass()";
@@ -484,7 +492,13 @@ def AIEInsertTraceFlows : Pass<"aie-insert-trace-flows", "DeviceOp"> {
484492 Option<"clPacketIdStart", "packet-id-start", "int", "1",
485493 "Starting packet ID for trace flows (default: 1)">,
486494 Option<"clTraceBurstLength", "burst-length", "int", "64",
487- "DMA burst length for trace transfers (default: 64 bytes)">
495+ "DMA burst length for trace transfers (default: 64 bytes)">,
496+ Option<"clDistributeChannels", "distribute-channels", "bool", "false",
497+ "Distribute traces across multiple S2MM channels per shim tile">,
498+ Option<"clLateralRouting", "lateral-routing", "bool", "false",
499+ "Route traces to spare columns to minimize data path perturbation">,
500+ Option<"clLateralTargetCol", "lateral-target-col", "int", "-1",
501+ "Force lateral routing target column (-1 = auto-detect nearest spare)">
488502 ];
489503}
490504
0 commit comments