ocen/compiler/formatter.oc at master · ocen-lang/ocen · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//* The ocen code formatter (v2)
//*
//* Architecture:
//*   - Doc IR layer: Wadler-Lindig style intermediate representation
//*     (Text, Line, SoftLine, Indent, Group, IfBreak, Concat).
//*     Width-aware renderer (doc_render) decides group breaking.
//*     build_collection_doc() demonstrates end-to-end Doc IR pipeline.
//*
//*   - Plan/Emit separation: formatting decisions are computed in a
//*     planning phase (plan_collection, plan_params) producing typed
//*     plan structs, then emitted separately (emit_collection, emit_params).
//*     No decisions are made during the emission phase.
//*
//*   - Pure decision functions: should_break_collection(), etc.
//*     decouple policy from traversal. Source-shape-driven currently,
//*     but designed to accept width budgets for future line-length rules.
//*
//*   - FormatOptions: centralized config (indent_size, line_width, etc.)
//*
//*   - CommentIndex: O(1) comment lookups by line number.
//*
//*   - Range reconstruction: helper methods (emit_source_lines,
//*     emit_formatted_lines, emit_mapping_substitution, filter_leaf_mappings,
//*     reconstruct_range) with validate_mappings() debug assertions.
//*
//*   - Consolidated patterns: emit_item_preamble() unifies the
//*     blank-line/comment preamble logic across format_block/format_struct.

import std::buffer::{ Buffer }
import std::vector::{ Vector }
import std::span::{ Span, Location }
import std::sv::{ SV }
import std::mem
import std::fs
import std::{ shift_args }
import std::sort::{ sort_by }
import std::libc::{ memset }
import @ast::nodes::{ * }
import @ast::program::{ Namespace, Program, CompilerOption }
import @ast::operators::{ Operator }
import @ast::scopes::{ Symbol, SymbolType }
import @tokens::{ Token, TokenType, Comment }
import @types::{ Type, BaseType, FunctionType, ArrayType }
import @parser::{ Parser }
import @lexer::{ Lexer }
import @errors::{ Error }
import @attributes::{ AttributeType }

//! Doc IR: Intermediate representation for formatting decisions.
//!
//! Instead of emitting text directly while walking the AST, we can
//! build a tree of Doc nodes that describe the *intent* of the
//! formatting. A renderer then decides how to lay out the output,
//! choosing whether to break groups based on line width constraints.
//!
//! This is the same fundamental approach used by Prettier, Wadler-
//! Lindig, and other modern formatters.
enum DocKind {
    Text      //! Literal text (no newlines)
    Line      //! Hard line break — always newline + indent
    SoftLine  //! Newline if group breaks, else space
    SoftEmpty //! Newline if group breaks, else nothing
    Concat    //! Sequence of child docs
    DocIndent //! Increase indent for child
    Group     //! Try to fit on one line; break if too wide
    IfBreak   //! Choose between break/flat alternatives
}

union DocUnion {
    text: str
    children: &Vector<&Doc>
    child: &Doc
    if_break: DocIfBreak
}

struct DocIfBreak {
    break_doc: &Doc
    flat_doc: &Doc
}

struct Doc {
    kind: DocKind
    u: DocUnion
}

// Allocate a Doc on the heap with the given kind
def Doc::make(kind: DocKind): &Doc {
    let d = mem::alloc<Doc>()
    d.kind = kind
    return d
}

def Doc::new_text(s: str): &Doc {
    let d = Doc::make(DocKind::Text)
    d.u.text = s
    return d
}

def Doc::new_line(): &Doc => Doc::make(DocKind::Line)

def Doc::new_softline(): &Doc => Doc::make(DocKind::SoftLine)

def Doc::new_softline_empty(): &Doc => Doc::make(DocKind::SoftEmpty)

def Doc::new_concat(parts: &Vector<&Doc>): &Doc {
    let d = Doc::make(DocKind::Concat)
    d.u.children = parts
    return d
}

def Doc::new_indent(inner: &Doc): &Doc {
    let d = Doc::make(DocKind::DocIndent)
    d.u.child = inner
    return d
}

def Doc::new_group(inner: &Doc): &Doc {
    let d = Doc::make(DocKind::Group)
    d.u.child = inner
    return d
}

def Doc::new_if_break(break_doc: &Doc, flat_doc: &Doc): &Doc {
    let d = Doc::make(DocKind::IfBreak)
    d.u.if_break.break_doc = break_doc
    d.u.if_break.flat_doc = flat_doc
    return d
}

// Join docs with a separator
def doc_join(items: &Vector<&Doc>, sep: &Doc): &Doc {
    let parts = Vector<&Doc>::new()
    for let i = 0u32; i < items.size; i++ {
        if i > 0 then parts.push(sep)
        parts.push(items.at(i))
    }
    return Doc::new_concat(parts)
}

// Build a list of docs into a single concat
def doc_list(): &Vector<&Doc> => Vector<&Doc>::new()

// Render mode for the doc printer
enum RenderMode {
    Flat  // Try to fit everything on one line
    Break // Break at soft line breaks
}

// A command on the render stack
struct RenderCommand {
    indent: u32
    mode: RenderMode
    doc: &Doc
}

// Measure flat width of a Doc (returns -1 if it contains a hard Line)
def doc_flat_width(doc: &Doc): i32 {
    match doc.kind {
        Text => return doc.u.text.len() as i32
        Line => return -1
        SoftLine => return 1  // space in flat mode
        SoftEmpty => return 0 // nothing in flat mode
        Concat => {
            let total = 0i32
            for let i = 0u32; i < doc.u.children.size; i++ {
                let w = doc_flat_width(doc.u.children.at(i))
                if w < 0 then return -1
                total += w
            }
            return total
        }
        DocIndent => return doc_flat_width(doc.u.child)
        Group => return doc_flat_width(doc.u.child)
        IfBreak => return doc_flat_width(doc.u.if_break.flat_doc)
    }
    return 0
}

// Render a Doc tree to a Buffer.
//
// The algorithm uses a stack-based approach (Wadler-Lindig):
// - Push commands onto the stack in reverse order
// - For each Group, check if the flat version fits in remaining width
// - If it fits, render in Flat mode; otherwise, in Break mode
def doc_render(doc: &Doc, indent_size: u32, line_width: u32): str {
    let out = Buffer::make()
    let stack = Vector<RenderCommand>::new()
    stack.push(RenderCommand(indent: 0, mode: RenderMode::Break, doc))

    let col = 0u32

    while stack.size > 0 {
        let cmd = stack.pop()
        match cmd.doc.kind {
            Text => {
                out += cmd.doc.u.text
                col += cmd.doc.u.text.len()
            }
            Line => {
                out += '\n'
                for let i = 0u32; i < cmd.indent * indent_size; i++ {
                    out += ' '
                }
                col = cmd.indent * indent_size
            }
            SoftLine => {
                if cmd.mode == RenderMode::Break {
                    out += '\n'
                    for let i = 0u32; i < cmd.indent * indent_size; i++ {
                        out += ' '
                    }
                    col = cmd.indent * indent_size
                } else {
                    out += ' '
                    col += 1
                }
            }
            SoftEmpty => {
                if cmd.mode == RenderMode::Break {
                    out += '\n'
                    for let i = 0u32; i < cmd.indent * indent_size; i++ {
                        out += ' '
                    }
                    col = cmd.indent * indent_size
                }
                // flat mode: emit nothing
            }
            Concat => {
                let parts = cmd.doc.u.children
                // Push in reverse order so first part is processed first
                for let i = parts.size; i > 0; i-- {
                    stack.push(RenderCommand(indent: cmd.indent, mode: cmd.mode, doc: parts.at(i - 1)))
                }
            }
            DocIndent => {
                stack.push(RenderCommand(indent: cmd.indent + 1, mode: cmd.mode, doc: cmd.doc.u.child))
            }
            Group => {
                // Check if flat version fits remaining line width
                let flat_w = doc_flat_width(cmd.doc.u.child)
                let fits = flat_w >= 0 and (col + flat_w as u32) <= line_width
                let group_mode = if fits then RenderMode::Flat else RenderMode::Break
                stack.push(RenderCommand(indent: cmd.indent, mode: group_mode, doc: cmd.doc.u.child))
            }
            IfBreak => {
                let chosen = if cmd.mode == RenderMode::Break {
                    yield cmd.doc.u.if_break.break_doc
                } else {
                    yield cmd.doc.u.if_break.flat_doc
                }

                stack.push(RenderCommand(indent: cmd.indent, mode: cmd.mode, doc: chosen))
            }
        }
    }

    return out.str()
}

//! Centralized formatting options.
//! All formatting decisions should be routed through this struct
//! to make it easy to add new options in the future.
struct FormatOptions {
    indent_size: u32
    line_width: u32 // Target line width for group breaking (0 = disabled)
}

def FormatOptions::default(): FormatOptions {
    return FormatOptions(
        indent_size: 4,
        line_width: 0 // 0 = use source-shape heuristics only (no width-aware breaking)
    )
}

//* Check if line-width-aware formatting is enabled
def FormatOptions::width_enabled(&this): bool => .line_width > 0

//! Decision functions: pure policy for formatting choices.
//!
//! These decouple "should we break?" from the AST traversal logic.
//! Currently source-shape-driven (matching existing behavior), but
//! the interface is designed to accept width budgets for future
//! line-length-aware formatting.
// Decide whether a collection (call args, array literal, etc.)
// should be formatted in multiline mode.
//
// Parameters:
//   start_line/end_line: source span of the collection
//   has_comments: whether there are inline comments in the range
//   options: formatting options (for future width-aware decisions)
//
// Current policy: break if multi-line in source AND has inline comments.
// This preserves existing behavior. Future: also break when content
// exceeds options.line_width.
def should_break_collection(start_line: u32, end_line: u32, has_comments: bool, options: &FormatOptions): bool {
    return start_line != end_line and has_comments
}

// Decide whether a vector/map literal should break.
// Current policy: break if multi-line in source (always, regardless of comments).
def should_break_list_literal(start_line: u32, end_line: u32, options: &FormatOptions): bool {
    return start_line != end_line
}

// Decide whether function parameters should be multiline.
// Current policy: break if params span multiple lines AND have inline comments.
def should_break_params(first_param_line: u32, last_param_line: u32, has_comments: bool, options: &FormatOptions): bool {
    return last_param_line > first_param_line and has_comments
}

// Decide whether a binary op RHS should be on the next line.
// Current policy: break if RHS starts on a different line than the operator.
def should_break_binary_rhs(op_line: u32, rhs_start_line: u32, in_format_str: bool, options: &FormatOptions): bool {
    return rhs_start_line > op_line and not in_format_str
}

//! Formatting plans: separate decision-making from emission.
//!
//! A plan captures all formatting choices for a construct (multiline,
//! trailing commas, etc.) as a pure data structure. The emission
//! phase then reads the plan without making any further decisions.
//!
//! This separation allows:
//! - Testing decisions independently from text generation
//! - Future Doc IR generation from the same plan
//! - Width-aware re-planning without changing emission logic
struct CollectionPlan {
    multiline: bool // Whether to break items across multiple lines
    size: u32       // Number of items in the collection
    open: str       // Opening delimiter
    close: str      // Closing delimiter
}

// Plan for formatting function parameters
struct ParamPlan {
    multiline: bool   // Whether to break params across multiple lines
    param_count: u32  // Number of parameters
    is_variadic: bool // Whether function has variadic "..."
}

//! A tagged union to represent a top-level declaration in source order
enum DeclType {
    Function
    Structure
    Enum
    Constant
    Variable
    Import
    Namespace
    TypeDef
    CompilerOpt
}

struct TypeDefInfo {
    name: str
    type: &Type
    span: Span
}

struct Decl {
    type: DeclType
    line: u32
    col: u32
    u: DeclUnion
}

union DeclUnion {
    func: &Function
    struc: &Structure
    enom: &Enum
    var_node: &AST
    import_node: &AST
    ns: &Namespace
    type_def: TypeDefInfo
    compiler_opt: CompilerOption
}

struct DeclMapping {
    source_start: u32
    source_end: u32
    output_start: u32
    output_end: u32
}

//* Get the end line of a declaration from its span
def decl_end_line(decl: &Decl): u32 => match decl.type {
    Function => decl.u.func.span.end.line
    Structure => decl.u.struc.span.end.line
    Enum => decl.u.enom.span.end.line
    Constant | Variable => decl.u.var_node.span.end.line
    Import => decl.u.import_node.span.end.line
    Namespace => decl.u.ns.span.end.line
    TypeDef => decl.u.type_def.span.end.line
    CompilerOpt => decl.u.compiler_opt.span.end.line
}

//! Per-line comment index for O(1) lookups.
//! Maps line numbers to indices in the comment array,
//! avoiding expensive linear scans for comment queries.
struct CommentIndex {
    // For each line, the indices of comments on that line.
    // line_to_comments[line] gives a vector of comment indices.
    line_to_comments: &Vector<&Vector<u32>>
    // Total number of lines
    num_lines: u32
}

def CommentIndex::build(all_comments: &Vector<Comment>, num_lines: u32): CommentIndex {
    let line_to_comments = Vector<&Vector<u32>>::new()
    // Pre-allocate for all lines (1-indexed, so num_lines+1)
    for let i = 0u32; i <= num_lines; i++ {
        line_to_comments.push(Vector<u32>::new())
    }
    for let i = 0u32; i < all_comments.size; i++ {
        let line = all_comments.at(i).span.start.line
        if line <= num_lines {
            line_to_comments.at(line).push(i)
        }
    }
    return CommentIndex(line_to_comments, num_lines)
}

//* Get all comment indices on a given line
def CommentIndex::comments_on_line(&this, line: u32): &Vector<u32> {
    if line > .num_lines then return Vector<u32>::new()
    return .line_to_comments.at(line)
}

struct Formatter {
    output: Buffer
    indent: u32
    options: FormatOptions
    source: str
    line_offsets: &Vector<u32>
    filename: str
    comments: &Vector<Comment>
    comment_index: u32
    program: &Program
    ns: &Namespace

    // Range formatting fields
    range_start: u32 // Start line of range (0 = no range, format all)
    range_end: u32   // End line of range (0 = no range)
    output_line: u32 // Tracks current output line (1-based)
    decl_mappings: &Vector<DeclMapping>
    stmt_mappings: &Vector<DeclMapping> // Statement-level source↔output mappings
    track_stmts: bool                   // Whether to record stmt_mappings in format_block

    in_format_str: bool = false // Suppress multi-line formatting inside format strings
    in_binary_continuation: bool = false // Track if we're in a binary op continuation indent
    ic_lines: &Vector<u32>      // Output line numbers of emitted inline comments
    last_comment_request_line: u32
    debug_cursor: bool
    cursor_regressions: u32
    comment_emitted: &Vector<bool>   // Track which comments have been emitted
    comment_line_index: CommentIndex // Per-line comment index for O(1) lookups
    output_col: u32 // Current column in the output (0-based)
}

//* Snapshot of comment-related state, used by measurement functions.
//* Measurement must not permanently advance the comment cursor or mark
//* comments as emitted, since the measured text is discarded.
struct CommentState {
    comment_index: u32
    last_comment_request_line: u32
    cursor_regressions: u32
    emitted_snapshot: &Vector<bool>
}

//* Save the current comment-tracking state so it can be restored after
//* a dry-run measurement pass.
//*
//* When in_format_str=true (measurement mode), emit_inline_comment and
//* emit_comments_before are no-ops, so comment_emitted is never modified.
//* In that case we skip the expensive deep-copy and store null for
//* emitted_snapshot as a sentinel. restore_comment_state checks for null
//* and skips the restore loop accordingly.
def Formatter::save_comment_state(&this): CommentState {
    if .in_format_str {
        // Fast path: comment state cannot be modified during measurement.
        return CommentState(
            comment_index: .comment_index,
            last_comment_request_line: .last_comment_request_line,
            cursor_regressions: .cursor_regressions,
            emitted_snapshot: null,
        )
    }
    // Slow path: deep-copy for non-measurement callers (e.g. capture_collection_item).
    let snapshot = Vector<bool>::new()
    for let i = 0u32; i < .comment_emitted.size; i++ {
        snapshot.push(.comment_emitted.at(i))
    }
    return CommentState(
        comment_index: .comment_index,
        last_comment_request_line: .last_comment_request_line,
        cursor_regressions: .cursor_regressions,
        emitted_snapshot: snapshot,
    )
}

//* Restore comment-tracking state saved by save_comment_state.
def Formatter::restore_comment_state(&this, state: &CommentState) {
    .comment_index = state.comment_index
    .last_comment_request_line = state.last_comment_request_line
    .cursor_regressions = state.cursor_regressions
    if state.emitted_snapshot? {
        for let i = 0u32; i < state.emitted_snapshot.size; i++ {
            .comment_emitted.data[i] = state.emitted_snapshot.at(i)
        }
    }
}

def build_line_offsets(source: str): &Vector<u32> {
    let offsets = Vector<u32>::new()
    offsets.push(0)
    for let i = 0u32; source[i] != '\0'; i++ {
        if source[i] == '\n' {
            offsets.push(i + 1)
        }
    }
    return offsets
}

def Formatter::make(program: &Program, ns: &Namespace, source: str, filename: str): Formatter {
    let debug_cursor_env = std::libc::getenv("OCEN_FORMAT_DEBUG_CURSOR")
    let debug_cursor = debug_cursor_env? and debug_cursor_env.len() > 0
    // Filter comments to only include those from the current file
    let filtered = Vector<Comment>::new()
    for comment in program.comments.iter() {
        if comment.span.start.filename.eq(filename) {
            filtered.push(comment)
        }
    }
    let emitted = Vector<bool>::new()
    for let i = 0u32; i < filtered.size; i++ {
        emitted.push(false)
    }
    let line_offsets = build_line_offsets(source)
    let num_lines = line_offsets.size as u32
    let comment_line_index = CommentIndex::build(filtered, num_lines)
    return Formatter(output: Buffer::make(), indent: 0, options: FormatOptions::default(), source, line_offsets, filename, comments: filtered, comment_index: 0, program, ns, range_start: 0, range_end: 0, output_line: 1, decl_mappings: Vector<DeclMapping>::new(), stmt_mappings: Vector<DeclMapping>::new(), track_stmts: false, ic_lines: Vector<u32>::new(), last_comment_request_line: 0, debug_cursor: debug_cursor, cursor_regressions: 0, comment_emitted: emitted, comment_line_index: comment_line_index, output_col: 0)
}

def Formatter::note_cursor_regression(&this, kind: str, requested: u32) {
    .cursor_regressions++
    if .debug_cursor {
        eprintln("[formatter] comment cursor regression (%s): requested=%u, last=%u, file=%s", kind, requested, .last_comment_request_line, .filename)
    }
}

//* Write raw string to output
def Formatter::write(&this, s: str) {
    .output += s
    // Optimised column/line tracking: find the last newline (if any) in a
    // single pass instead of doing a branch + output_col++ on every char.
    // Common case (no newlines): just add string length to output_col.
    let len = s.len() as u32
    let last_nl = -1i32
    for let i = 0u32; i < len; i++ {
        if s[i] == '\n' {
            .output_line++
            last_nl = i as i32
        }
    }
    if last_nl < 0 {
        .output_col += len
    } else {
        .output_col = len - (last_nl as u32) - 1
    }
}

//* Write a newline
def Formatter::newline(&this) {
    .output += '\n'
    .output_line++
    .output_col = 0
}

def Formatter::push_indent(&this) {
    .indent++
}

def Formatter::pop_indent(&this) {
    assert .indent > 0, "Formatter indent underflow"
    .indent--
}

//* Write indentation at current level
def Formatter::write_indent(&this) {
    let spaces = .indent * .options.indent_size
    if spaces > 0 {
        // Use memset for fast space-filling instead of per-char appends.
        .output.resize_if_necessary(.output.size + spaces)
        memset(.output.data + .output.size, ' ' as u8, spaces)
        .output.size += spaces
    }
    .output_col = spaces
}

//* Begin a continuation break. Used when a single statement or expression
//* won't fit on the current line and needs to break to the next line with
//* one extra level of indentation. Must be paired with end_continuation().
def Formatter::begin_continuation(&this) {
    .newline()
    .push_indent()
    .write_indent()
}

//* End a continuation break started by begin_continuation().
def Formatter::end_continuation(&this) {
    .pop_indent()
}

//* Begin a continuation break for binary operators. Similar to
//* begin_continuation() but tracks the in_binary_continuation flag to
//* prevent double-indenting chained binary operators (e.g. a + b + c
//* should only get one continuation indent, not one per operator).
//* Returns the previous in_binary_continuation value which must be
//* passed to end_binary_continuation().
def Formatter::begin_binary_continuation(&this): bool {
    .newline()
    let saved = .in_binary_continuation
    if not .in_binary_continuation {
        .push_indent()
        .in_binary_continuation = true
    }
    .write_indent()
    return saved
}

//* End a binary continuation break. Pass the value returned by
//* begin_binary_continuation(). Only pops indent if we actually pushed
//* it, and restores the in_binary_continuation flag.
def Formatter::end_binary_continuation(&this, saved: bool) {
    if not saved {
        .pop_indent()
    }
    .in_binary_continuation = saved
}

//* Measure how wide an expression would be if formatted on a single line.
//* Returns the width in characters (not including any leading indentation).
//* This uses buffer capture - it temporarily swaps the output buffer.
def Formatter::measure_expr(&this, node: &AST): u32 {
    let saved_output = .output
    let saved_line = .output_line
    let saved_col = .output_col
    let saved_in_fmt = .in_format_str
    // Set in_format_str=true BEFORE save_comment_state so it can skip the
    // expensive deep-copy (emit_* functions are no-ops when in_format_str).
    .in_format_str = true
    let saved_comments = .save_comment_state()
    let saved_ic_size = .ic_lines.size
    .output = Buffer::make()
    .output_line = 1
    .output_col = 0
    .format_expr(node)
    let width = .output_col
    // For multi-line expressions (e.g. if-then-else with block body),
    // output_col reflects only the last line (e.g. "}").  Use the first
    // line's width instead, since that's the line that determines whether
    // the expression fits inline.
    if .output_line > 1 {
        let buf = .output.str()
        for let c = 0u32; buf[c] != '\0'; c++ {
            if buf[c] == '\n' {
                width = c
                break
            }
        }
    }
    .output = saved_output
    .output_line = saved_line
    .output_col = saved_col
    .in_format_str = saved_in_fmt
    .restore_comment_state(&saved_comments)
    // Discard any ic_lines entries added during measurement
    .ic_lines.size = saved_ic_size
    return width
}

//* Measure how wide a statement would be if formatted on a single line.
def Formatter::measure_statement(&this, node: &AST): u32 {
    let saved_output = .output
    let saved_line = .output_line
    let saved_col = .output_col
    let saved_in_fmt = .in_format_str
    // Set in_format_str=true BEFORE save so the save skips the expensive deep-copy.
    .in_format_str = true
    let saved_comments = .save_comment_state()
    let saved_ic_size = .ic_lines.size
    .output = Buffer::make()
    .output_line = 1
    .output_col = 0
    .format_statement(node)
    let width = .output_col
    // For multi-line statements, use the first line's width.
    if .output_line > 1 {
        let buf = .output.str()
        for let c = 0u32; buf[c] != '\0'; c++ {
            if buf[c] == '\n' {
                width = c
                break
            }
        }
    }
    .output = saved_output
    .output_line = saved_line
    .output_col = saved_col
    .in_format_str = saved_in_fmt
    .restore_comment_state(&saved_comments)
    // Discard any ic_lines entries added during measurement
    .ic_lines.size = saved_ic_size
    return width
}

//* Measure the width of import parts as if formatted on a single line.

def Formatter::measure_import_parts(&this, parts: &Vector<&ImportPart>): u32 {
    let w = 0u32
    for let i = 0u32; i < parts.size; i++ {
        if i > 0 then w += 2 // "::"
        let part = parts.at(i)
        match part.type {
            Single => {
                w += part.u.single.name.len() as u32
                if part.u.single.alias? {
                    w += 4 + part.u.single.alias.len() as u32 // " as X"
                }
            }
            Multiple => {
                w += 4 // "{ " + " }"
                for let j = 0u32; j < part.u.multiple.paths.size; j++ {
                    if j > 0 then w += 2 // ", "
                    w += .measure_import_parts(part.u.multiple.paths.at(j))
                }
            }
            Wildcard => w += 1
        }
    }
    return w
}

//* Measure the width of a match condition as if formatted on a single line.
def Formatter::measure_match_cond(&this, cond: &MatchCond): u32 {
    let w = .measure_expr(cond.expr)
    if cond.args? and cond.args.size > 0 {
        w += 2 // "()"
        for let i = 0u32; i < cond.args.size; i++ {
            if i > 0 then w += 2 // ", "
            w += cond.args.at(i).var.sym.name.len() as u32
        }
    }
    return w
}

//* Check if the current output column plus additional text would exceed line width.
//* Returns false if line_width is 0 (disabled).
def Formatter::would_exceed_width(&this, additional: u32): bool {
    if not .options.width_enabled() then return false
    return (.output_col + additional) > .options.line_width
}

//* Get the remaining width on the current line.
//* Returns a large number if line_width is 0 (disabled).
def Formatter::remaining_width(&this): u32 {
    if not .options.width_enabled() then return 10000u32
    if .output_col >= .options.line_width then return 0u32
    return .options.line_width - .output_col
}

//* Measure the full width of a collection as if formatted on one line.
//* For Calls: callee(arg1, arg2, arg3)
//* For Arrays: [elem1, elem2, elem3]
//* etc.
def Formatter::measure_collection(&this, node: &AST, open: str, close: str): u32 {
    let size = .collection_size(node)
    let width = open.len() + close.len()
    // Set in_format_str=true once before the loop so that:
    //  (a) save_comment_state uses the fast shallow path (no deep-copy), and
    //  (b) emit_* are no-ops, meaning comment state never changes between items.
    // We therefore need only ONE save/restore wrapping the whole loop.
    let saved_in_fmt = .in_format_str
    .in_format_str = true
    let saved_comments = .save_comment_state()
    let saved_ic_size = .ic_lines.size
    for let i = 0u32; i < size; i++ {
        if i > 0 then width += 2 // ", "
        let saved_output = .output
        let saved_line = .output_line
        let saved_col = .output_col
        .output = Buffer::make()
        .output_line = 1
        .output_col = 0
        .format_collection_item(node, i)
        width += .output_col
        .output = saved_output
        .output_line = saved_line
        .output_col = saved_col
    }
    .in_format_str = saved_in_fmt
    .restore_comment_state(&saved_comments)
    .ic_lines.size = saved_ic_size
    return width
}

//* Measure the full width of function params as if on one line.
//* e.g.: (a: u32, b: str, c: bool)
def Formatter::measure_params(&this, func: &Function): u32 {
    let width = 2u32 // "(" + ")"
    // Set in_format_str=true once before the loop (same rationale as measure_collection).
    let saved_in_fmt = .in_format_str
    .in_format_str = true
    let saved_comments = .save_comment_state()
    let saved_ic_size = .ic_lines.size
    for let i = 0u32; i < func.params.size; i++ {
        if i > 0 then width += 2 // ", "
        let saved_output = .output
        let saved_line = .output_line
        let saved_col = .output_col
        .output = Buffer::make()
        .output_line = 1
        .output_col = 0
        .format_param(func.params.at(i), is_first: i == 0)
        width += .output_col
        .output = saved_output
        .output_line = saved_line
        .output_col = saved_col
    }
    if func.is_variadic {
        if func.params.size > 0 then width += 2 // ", "
        width += 3 // "..."
    }
    .in_format_str = saved_in_fmt
    .restore_comment_state(&saved_comments)
    .ic_lines.size = saved_ic_size
    return width
}

//* Get source text for a span
def Formatter::source_text(&this, span: Span): str {
    let text = .program.get_source_text(span)
    if not text? then return ""
    return text
}

//* Check if a function has an explicitly written return type.
//* The parser synthesizes return types for functions without explicit ones,
//* using the function name span. We detect this by checking if the return
//* type span location matches the function name span location.
def Formatter::has_explicit_return_type(&this, func: &Function): bool {
    if not func.parsed_return_type? then return false
    if func.parsed_return_type.base == BaseType::Void then return false
    // When no explicit return type is given, the parser synthesizes one using
    // the function name span. Check if the spans share the same position.
    let rt_start = func.parsed_return_type.span.start
    let name_start = func.sym.span.start
    if rt_start.line == name_start.line and rt_start.col == name_start.col then return false
    return true
}

def Formatter::is_implicit_void_function_type(&this, type: &Type): bool {
    if not type? or not type.u.func.return_type? then return false
    let return_type = type.u.func.return_type
    if return_type.base != BaseType::Unresolved then return false
    if not return_type.u.unresolved? or return_type.u.unresolved.type != Identifier then return false
    if not return_type.u.unresolved.u.ident.name.eq("void") then return false
    return return_type.span.start.line == type.span.start.line and return_type.span.start.col == type.span.start.col
}

//* Emit comments that appear before `line`.
//* When preserve_blanks is true, preserve source blank lines between emitted comments.
//* Returns end line of the last emitted non-inline comment, or prev_end_line when none.
def Formatter::emit_comments_before(&this, line: u32, preserve_blanks: bool = false, prev_end_line: u32 = 0): u32 {
    // During dry-run measurement (in_format_str), don't modify comment state.
    if .in_format_str then return prev_end_line
    if line < .last_comment_request_line {
        .note_cursor_regression("before", line)
    }
    if line > .last_comment_request_line {
        .last_comment_request_line = line
    }
    let comment_end = prev_end_line
    while .comment_index < .comments.size {
        let comment = .comments.at(.comment_index)
        if comment.span.start.line >= line then break
        if comment.is_inline {
            .comment_index++
            continue
        }
        if preserve_blanks and comment_end > prev_end_line and comment.span.start.line > comment_end + 1 {
            .newline()
        }
        .write_indent()
        .write(comment.text)
        .newline()
        .comment_emitted.data[.comment_index] = true
        comment_end = comment.span.end.line
        .comment_index++
    }
    return comment_end
}

//* Emit any inline comment on the given line
def Formatter::emit_inline_comment(&this, line: u32) {
    // During dry-run measurement (in_format_str), don't modify comment state.
    if .in_format_str then return
    if line < .last_comment_request_line {
        .note_cursor_regression("inline", line)
        // Backward scan: search for un-emitted inline comments on this line
        .emit_inline_comment_backward(line)
        return
    }
    if line > .last_comment_request_line {
        .last_comment_request_line = line
    }
    let emitted = false
    while .comment_index < .comments.size {
        let comment = .comments.at(.comment_index)

        // If the cursor lags behind on inline comments from earlier lines,
        // skip them so they do not block inline comments on the current line.
        if comment.span.start.line < line {
            if comment.is_inline {
                .comment_index++
                continue
            }
            break
        }

        if comment.span.start.line > line then break
        if not comment.is_inline {
            break
        }

        if not emitted {
            .ic_lines.push(.output_line)
            emitted = true
        }
        .write(" ")
        .write(comment.text)
        .comment_emitted.data[.comment_index] = true
        .comment_index++
    }
}

//* Backward scan for inline comments when cursor regression detected.
//* Uses the comment line index for efficient O(k) lookup instead of linear scan.
def Formatter::emit_inline_comment_backward(&this, line: u32) {
    let emitted = false
    // Use indexed lookup: get all comment indices on this line
    let indices = .comment_line_index.comments_on_line(line)
    for let i = 0u32; i < indices.size; i++ {
        let ci = indices.at(i)
        let comment = .comments.at(ci)
        if comment.is_inline and not .comment_emitted.at(ci) {
            if not emitted {
                .ic_lines.push(.output_line)
                emitted = true
            }
            .write(" ")
            .write(comment.text)
            .comment_emitted.data[ci] = true
        }
    }
}

//* Emit any remaining comments, optionally limited to before `max_line`
//* When max_line is 0, emit all remaining comments (for top-level namespace)
def Formatter::emit_remaining_comments(&this, max_line: u32 = 0, prev_end_line: u32 = 0) {
    let prev_end = prev_end_line
    while .comment_index < .comments.size {
        let comment = .comments.at(.comment_index)
        // Use >= so inline comments on the closing brace line itself remain
        // available for the caller's emit_inline_comment call.
        if max_line > 0 and comment.span.start.line >= max_line then break
        if comment.is_inline {
            .comment_index++
            continue
        }
        if prev_end > 0 and comment.span.start.line > prev_end + 1 {
            .newline()
        }
        .write_indent()
        .write(comment.text)
        .newline()
        .comment_emitted.data[.comment_index] = true
        prev_end = comment.span.end.line
        .comment_index++
    }
}

//* Check if any inline comments exist in the given line range
def Formatter::has_inline_comments_in_range(&this, start_line: u32, end_line: u32): bool {
    if .in_format_str then return false
    for let ci = .comment_index; ci < .comments.size; ci++ {
        let comment = .comments.at(ci)
        if comment.span.start.line > end_line then break
        if comment.span.start.line >= start_line and comment.is_inline then return true
    }