Skip to main content

shape_lsp/
formatting.rs

1//! Document formatting provider for Shape
2//!
3//! Provides code formatting with configurable options.
4//! Uses a token-level approach that ONLY adjusts whitespace/indentation.
5//! Non-whitespace tokens pass through unchanged — code can never be destroyed.
6
7use crate::util::{offset_to_line_col, parser_source, position_to_offset};
8use shape_ast::ast::Item;
9use tower_lsp_server::ls_types::{FormattingOptions, Position, Range, TextEdit};
10
11/// Formatting configuration
12#[derive(Debug, Clone)]
13pub struct FormatConfig {
14    /// Number of spaces for indentation (or 0 for tabs)
15    pub indent_size: u32,
16    /// Use tabs instead of spaces
17    pub use_tabs: bool,
18    /// Maximum line length before wrapping
19    pub max_line_length: u32,
20    /// Add trailing commas in arrays/objects
21    pub trailing_commas: bool,
22    /// Space after colons in objects
23    pub space_after_colon: bool,
24    /// Spaces around binary operators
25    pub spaces_around_operators: bool,
26    /// Blank lines between top-level items
27    pub blank_lines_between_items: u32,
28}
29
30impl Default for FormatConfig {
31    fn default() -> Self {
32        Self {
33            indent_size: 4,
34            use_tabs: false,
35            max_line_length: 100,
36            trailing_commas: true,
37            space_after_colon: true,
38            spaces_around_operators: true,
39            blank_lines_between_items: 1,
40        }
41    }
42}
43
44impl From<&FormattingOptions> for FormatConfig {
45    fn from(opts: &FormattingOptions) -> Self {
46        Self {
47            indent_size: opts.tab_size,
48            use_tabs: !opts.insert_spaces,
49            ..Default::default()
50        }
51    }
52}
53
54// ─── Token-level formatter ──────────────────────────────────────────────────
55
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
57enum TokenKind {
58    Whitespace,    // spaces/tabs (NOT newlines)
59    Newline,       // \n or \r\n
60    LineComment,   // // ... (not including trailing newline)
61    BlockComment,  // /* ... */ (with nesting)
62    OpenBrace,     // {
63    CloseBrace,    // }
64    StringLiteral, // all string varieties, opaque
65    Other,         // identifiers, keywords, operators, numbers, parens, brackets
66}
67
68struct Token {
69    kind: TokenKind,
70    start: usize,
71    end: usize,
72}
73
74#[derive(Debug, Clone, Copy, PartialEq, Eq)]
75struct ByteRange {
76    start: usize,
77    end: usize,
78}
79
80/// Tokenize source into a flat token stream.
81/// Invariant: concatenating all token texts reproduces the original source exactly.
82fn tokenize(source: &str) -> Vec<Token> {
83    let bytes = source.as_bytes();
84    let mut tokens = Vec::new();
85    let mut i = 0;
86
87    while i < bytes.len() {
88        let start = i;
89        let b = bytes[i];
90
91        // Newline: \r\n or \n
92        if b == b'\r' && i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
93            i += 2;
94            tokens.push(Token {
95                kind: TokenKind::Newline,
96                start,
97                end: i,
98            });
99            continue;
100        }
101        if b == b'\n' {
102            i += 1;
103            tokens.push(Token {
104                kind: TokenKind::Newline,
105                start,
106                end: i,
107            });
108            continue;
109        }
110
111        // Whitespace (spaces/tabs only)
112        if b == b' ' || b == b'\t' {
113            while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
114                i += 1;
115            }
116            tokens.push(Token {
117                kind: TokenKind::Whitespace,
118                start,
119                end: i,
120            });
121            continue;
122        }
123
124        // Line comment
125        if b == b'/' && i + 1 < bytes.len() && bytes[i + 1] == b'/' {
126            i += 2;
127            while i < bytes.len() && bytes[i] != b'\n' && bytes[i] != b'\r' {
128                i += 1;
129            }
130            tokens.push(Token {
131                kind: TokenKind::LineComment,
132                start,
133                end: i,
134            });
135            continue;
136        }
137
138        // Block comment (with nesting)
139        if b == b'/' && i + 1 < bytes.len() && bytes[i + 1] == b'*' {
140            let mut depth = 1usize;
141            i += 2;
142            while i < bytes.len() && depth > 0 {
143                if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'*' {
144                    depth += 1;
145                    i += 2;
146                } else if i + 1 < bytes.len() && bytes[i] == b'*' && bytes[i + 1] == b'/' {
147                    depth -= 1;
148                    i += 2;
149                } else {
150                    i += 1;
151                }
152            }
153            tokens.push(Token {
154                kind: TokenKind::BlockComment,
155                start,
156                end: i,
157            });
158            continue;
159        }
160
161        // F-string triple: f"""...""", f$"""...""", f#"""..."""
162        if b == b'f' {
163            let prefix_len = if i + 4 < bytes.len()
164                && (bytes[i + 1] == b'$' || bytes[i + 1] == b'#')
165                && bytes[i + 2] == b'"'
166                && bytes[i + 3] == b'"'
167                && bytes[i + 4] == b'"'
168            {
169                Some(5usize)
170            } else if i + 3 < bytes.len()
171                && bytes[i + 1] == b'"'
172                && bytes[i + 2] == b'"'
173                && bytes[i + 3] == b'"'
174            {
175                Some(4usize)
176            } else {
177                None
178            };
179            if let Some(prefix_len) = prefix_len {
180                i += prefix_len;
181                loop {
182                    if i + 2 >= bytes.len() {
183                        i = bytes.len();
184                        break;
185                    }
186                    if bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
187                        i += 3;
188                        break;
189                    }
190                    i += 1;
191                }
192                tokens.push(Token {
193                    kind: TokenKind::StringLiteral,
194                    start,
195                    end: i,
196                });
197                continue;
198            }
199        }
200
201        // Triple string: """..."""
202        if b == b'"' && i + 2 < bytes.len() && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
203            i += 3;
204            loop {
205                if i + 2 >= bytes.len() {
206                    i = bytes.len();
207                    break;
208                }
209                if bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
210                    i += 3;
211                    break;
212                }
213                i += 1;
214            }
215            tokens.push(Token {
216                kind: TokenKind::StringLiteral,
217                start,
218                end: i,
219            });
220            continue;
221        }
222
223        // F-string simple: f"...", f$"...", f#"..."
224        if b == b'f' {
225            let prefix_len = if i + 2 < bytes.len()
226                && (bytes[i + 1] == b'$' || bytes[i + 1] == b'#')
227                && bytes[i + 2] == b'"'
228            {
229                Some(3usize)
230            } else if i + 1 < bytes.len() && bytes[i + 1] == b'"' {
231                Some(2usize)
232            } else {
233                None
234            };
235            if let Some(prefix_len) = prefix_len {
236                i += prefix_len;
237                let mut escaped = false;
238                while i < bytes.len() {
239                    if escaped {
240                        escaped = false;
241                    } else if bytes[i] == b'\\' {
242                        escaped = true;
243                    } else if bytes[i] == b'"' {
244                        i += 1;
245                        break;
246                    }
247                    i += 1;
248                }
249                tokens.push(Token {
250                    kind: TokenKind::StringLiteral,
251                    start,
252                    end: i,
253                });
254                continue;
255            }
256        }
257
258        // Simple string: "..."
259        if b == b'"' {
260            i += 1;
261            let mut escaped = false;
262            while i < bytes.len() {
263                if escaped {
264                    escaped = false;
265                } else if bytes[i] == b'\\' {
266                    escaped = true;
267                } else if bytes[i] == b'"' {
268                    i += 1;
269                    break;
270                }
271                i += 1;
272            }
273            tokens.push(Token {
274                kind: TokenKind::StringLiteral,
275                start,
276                end: i,
277            });
278            continue;
279        }
280
281        // Delimiters
282        if b == b'{' {
283            i += 1;
284            tokens.push(Token {
285                kind: TokenKind::OpenBrace,
286                start,
287                end: i,
288            });
289            continue;
290        }
291        if b == b'}' {
292            i += 1;
293            tokens.push(Token {
294                kind: TokenKind::CloseBrace,
295                start,
296                end: i,
297            });
298            continue;
299        }
300
301        // Other: everything else (identifiers, keywords, operators, parens, brackets, etc.)
302        i += 1;
303        while i < bytes.len() {
304            let c = bytes[i];
305            if c == b' '
306                || c == b'\t'
307                || c == b'\n'
308                || c == b'\r'
309                || c == b'"'
310                || c == b'{'
311                || c == b'}'
312                || (c == b'/'
313                    && i + 1 < bytes.len()
314                    && (bytes[i + 1] == b'/' || bytes[i + 1] == b'*'))
315            {
316                break;
317            }
318            i += 1;
319        }
320        tokens.push(Token {
321            kind: TokenKind::Other,
322            start,
323            end: i,
324        });
325    }
326
327    tokens
328}
329
330/// Walk the token stream and emit TextEdits that fix indentation.
331/// Only leading whitespace on each line is modified; all other content is untouched.
332fn reindent(
333    tokens: &[Token],
334    source: &str,
335    config: &FormatConfig,
336    protected_lines: &std::collections::HashSet<u32>,
337    protected_ranges: &[ByteRange],
338) -> Vec<TextEdit> {
339    let mut edits = Vec::new();
340    let mut brace_depth: usize = 0;
341    let mut line: u32 = 0;
342    let mut i = 0;
343    let mut after_newline = true; // start of file counts as start of line
344
345    while i < tokens.len() {
346        let tok = &tokens[i];
347
348        if after_newline {
349            let ws_token = if tok.kind == TokenKind::Whitespace {
350                Some(tok)
351            } else {
352                None
353            };
354
355            // Preserve foreign-language body lines exactly as authored.
356            if protected_lines.contains(&line) {
357                after_newline = false;
358                if ws_token.is_some() {
359                    i += 1;
360                }
361                continue;
362            }
363
364            let first_non_ws_idx = if ws_token.is_some() { i + 1 } else { i };
365
366            // Peek ahead to find first substantive token on this line
367            let mut line_is_blank = true;
368            let mut first_code_kind = None;
369            if first_non_ws_idx < tokens.len() {
370                let pk = &tokens[first_non_ws_idx];
371                if pk.kind != TokenKind::Newline {
372                    line_is_blank = false;
373                    first_code_kind = Some(pk.kind);
374                }
375            }
376
377            if line_is_blank {
378                // Blank line — strip any trailing whitespace
379                if let Some(ws) = ws_token {
380                    let ws_len = (ws.end - ws.start) as u32;
381                    if ws_len > 0 {
382                        edits.push(TextEdit {
383                            range: Range {
384                                start: Position { line, character: 0 },
385                                end: Position {
386                                    line,
387                                    character: ws_len,
388                                },
389                            },
390                            new_text: String::new(),
391                        });
392                    }
393                }
394            } else {
395                // Compute target indentation
396                let effective_depth = if first_code_kind == Some(TokenKind::CloseBrace) {
397                    brace_depth.saturating_sub(1)
398                } else {
399                    brace_depth
400                };
401                let target_indent = make_indent(effective_depth, config);
402                let current_indent = if let Some(ws) = ws_token {
403                    &source[ws.start..ws.end]
404                } else {
405                    ""
406                };
407
408                if current_indent != target_indent {
409                    let current_len = current_indent.len() as u32;
410                    edits.push(TextEdit {
411                        range: Range {
412                            start: Position { line, character: 0 },
413                            end: Position {
414                                line,
415                                character: current_len,
416                            },
417                        },
418                        new_text: target_indent,
419                    });
420                }
421            }
422
423            after_newline = false;
424            if ws_token.is_some() {
425                i += 1;
426                continue;
427            }
428        }
429
430        // Track brace depth and line numbers
431        match tok.kind {
432            TokenKind::OpenBrace => {
433                if !is_offset_in_ranges(tok.start, protected_ranges) {
434                    brace_depth += 1;
435                }
436            }
437            TokenKind::CloseBrace => {
438                if !is_offset_in_ranges(tok.start, protected_ranges) {
439                    brace_depth = brace_depth.saturating_sub(1);
440                }
441            }
442            TokenKind::Newline => {
443                after_newline = true;
444                line += 1;
445            }
446            TokenKind::StringLiteral | TokenKind::BlockComment => {
447                // Multi-line tokens: count internal newlines for line tracking
448                let text = &source[tok.start..tok.end];
449                line += text.matches('\n').count() as u32;
450            }
451            _ => {}
452        }
453
454        i += 1;
455    }
456
457    edits
458}
459
460// ─── Public API ─────────────────────────────────────────────────────────────
461
462/// Format an entire document
463pub fn format_document(text: &str, options: &FormattingOptions) -> Vec<TextEdit> {
464    let config = FormatConfig::from(options);
465    let tokens = tokenize(text);
466    let protected_ranges = collect_foreign_body_ranges(text);
467    let protected_lines = collect_protected_lines(text, &protected_ranges);
468    let mut edits = reindent(&tokens, text, &config, &protected_lines, &protected_ranges);
469    edits.extend(align_table_row_columns(text));
470    edits
471}
472
473/// Format a range within a document
474pub fn format_range(text: &str, range: Range, options: &FormattingOptions) -> Vec<TextEdit> {
475    let config = FormatConfig::from(options);
476    let tokens = tokenize(text);
477    let protected_ranges = collect_foreign_body_ranges(text);
478    let protected_lines = collect_protected_lines(text, &protected_ranges);
479    reindent(&tokens, text, &config, &protected_lines, &protected_ranges)
480        .into_iter()
481        .filter(|edit| {
482            edit.range.start.line >= range.start.line && edit.range.end.line <= range.end.line
483        })
484        .collect()
485}
486
487/// Format a document while typing (lightweight indentation-only path).
488///
489/// This avoids full AST formatting on each keystroke, which keeps on-type
490/// formatting responsive for larger files and partially-typed code.
491pub fn format_on_type(
492    text: &str,
493    position: Position,
494    ch: &str,
495    options: &FormattingOptions,
496) -> Vec<TextEdit> {
497    if ch != "\n" && ch != "}" {
498        return vec![];
499    }
500
501    let protected_ranges = collect_foreign_body_ranges(text);
502    if let Some(offset) = position_to_offset(text, position)
503        && is_offset_in_ranges(offset, &protected_ranges)
504    {
505        return vec![];
506    }
507
508    let line_index = position.line as usize;
509    let lines: Vec<&str> = text.split('\n').collect();
510    if line_index >= lines.len() {
511        return vec![];
512    }
513
514    let line = lines[line_index];
515    let config = FormatConfig::from(options);
516
517    if let Some(quote_column) = triple_string_quote_column_before_line(text, line_index) {
518        if ch == "}" {
519            return vec![];
520        }
521
522        let target_indent = make_indent_to_column(quote_column, &config);
523        return line_indent_edit(line_index, line, target_indent);
524    }
525
526    if ch == "}" && !line.trim_start().starts_with('}') {
527        return vec![];
528    }
529
530    let depth_before_line = brace_depth_before_line(text, line_index, &protected_ranges);
531    let leading_closers = line.trim_start().chars().take_while(|c| *c == '}').count();
532    let target_depth = depth_before_line.saturating_sub(leading_closers);
533    let target_indent = make_indent(target_depth, &config);
534
535    line_indent_edit(line_index, line, target_indent)
536}
537
538// ─── Helpers ────────────────────────────────────────────────────────────────
539
540fn line_indent_edit(line_index: usize, line: &str, target_indent: String) -> Vec<TextEdit> {
541    let current_indent_len = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
542    let current_indent: String = line.chars().take(current_indent_len).collect();
543
544    if current_indent == target_indent {
545        return vec![];
546    }
547
548    vec![TextEdit {
549        range: Range {
550            start: Position {
551                line: line_index as u32,
552                character: 0,
553            },
554            end: Position {
555                line: line_index as u32,
556                character: current_indent_len as u32,
557            },
558        },
559        new_text: target_indent,
560    }]
561}
562
563#[derive(Clone, Copy, Debug, PartialEq, Eq)]
564enum ScannerStringMode {
565    Simple,
566    Triple { quote_column: usize },
567}
568
569/// If the given line is inside a triple-quoted string, return the column of
570/// the opening quote (`"`). Used by on-type formatting to align body/closing
571/// lines with the delimiter column.
572fn triple_string_quote_column_before_line(text: &str, target_line: usize) -> Option<usize> {
573    let bytes = text.as_bytes();
574    let mut i = 0usize;
575    let mut line = 0usize;
576    let mut col = 0usize;
577    let mut in_line_comment = false;
578    let mut block_comment_depth = 0usize;
579    let mut string_mode: Option<ScannerStringMode> = None;
580    let mut escaped = false;
581
582    while i < bytes.len() {
583        if line >= target_line {
584            break;
585        }
586
587        let b = bytes[i];
588
589        if in_line_comment {
590            if b == b'\n' {
591                in_line_comment = false;
592                line += 1;
593                col = 0;
594            } else {
595                col += 1;
596            }
597            i += 1;
598            continue;
599        }
600
601        if block_comment_depth > 0 {
602            if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'*' {
603                block_comment_depth += 1;
604                i += 2;
605                col += 2;
606                continue;
607            }
608            if i + 1 < bytes.len() && bytes[i] == b'*' && bytes[i + 1] == b'/' {
609                block_comment_depth -= 1;
610                i += 2;
611                col += 2;
612                continue;
613            }
614            if b == b'\n' {
615                line += 1;
616                col = 0;
617            } else {
618                col += 1;
619            }
620            i += 1;
621            continue;
622        }
623
624        if let Some(mode) = string_mode {
625            match mode {
626                ScannerStringMode::Simple => {
627                    if escaped {
628                        escaped = false;
629                    } else if b == b'\\' {
630                        escaped = true;
631                    } else if b == b'"' {
632                        string_mode = None;
633                    }
634
635                    if b == b'\n' {
636                        line += 1;
637                        col = 0;
638                    } else {
639                        col += 1;
640                    }
641                    i += 1;
642                }
643                ScannerStringMode::Triple { .. } => {
644                    if i + 2 < bytes.len()
645                        && bytes[i] == b'"'
646                        && bytes[i + 1] == b'"'
647                        && bytes[i + 2] == b'"'
648                    {
649                        string_mode = None;
650                        i += 3;
651                        col += 3;
652                    } else if b == b'\n' {
653                        line += 1;
654                        col = 0;
655                        i += 1;
656                    } else {
657                        col += 1;
658                        i += 1;
659                    }
660                }
661            }
662            continue;
663        }
664
665        if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'/' {
666            in_line_comment = true;
667            i += 2;
668            col += 2;
669            continue;
670        }
671
672        if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'*' {
673            block_comment_depth = 1;
674            i += 2;
675            col += 2;
676            continue;
677        }
678
679        if i + 3 < bytes.len()
680            && bytes[i] == b'f'
681            && bytes[i + 1] == b'"'
682            && bytes[i + 2] == b'"'
683            && bytes[i + 3] == b'"'
684        {
685            string_mode = Some(ScannerStringMode::Triple {
686                quote_column: col + 1,
687            });
688            i += 4;
689            col += 4;
690            continue;
691        }
692
693        if i + 2 < bytes.len() && bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
694            string_mode = Some(ScannerStringMode::Triple { quote_column: col });
695            i += 3;
696            col += 3;
697            continue;
698        }
699
700        if i + 1 < bytes.len() && bytes[i] == b'f' && bytes[i + 1] == b'"' {
701            string_mode = Some(ScannerStringMode::Simple);
702            escaped = false;
703            i += 2;
704            col += 2;
705            continue;
706        }
707
708        if b == b'"' {
709            string_mode = Some(ScannerStringMode::Simple);
710            escaped = false;
711            i += 1;
712            col += 1;
713            continue;
714        }
715
716        if b == b'\n' {
717            line += 1;
718            col = 0;
719        } else {
720            col += 1;
721        }
722
723        i += 1;
724    }
725
726    if line == target_line {
727        if let Some(ScannerStringMode::Triple { quote_column }) = string_mode {
728            return Some(quote_column);
729        }
730    }
731
732    None
733}
734
735#[derive(Clone, Copy, Debug, PartialEq, Eq)]
736enum StringMode {
737    Simple,
738    Triple,
739}
740
741/// Compute brace nesting depth before a given 0-based line.
742/// Braces inside comments and strings are ignored.
743fn brace_depth_before_line(
744    text: &str,
745    target_line: usize,
746    protected_ranges: &[ByteRange],
747) -> usize {
748    if target_line == 0 {
749        return 0;
750    }
751
752    let bytes = text.as_bytes();
753    let mut i = 0usize;
754    let mut line = 0usize;
755    let mut depth = 0usize;
756    let mut in_line_comment = false;
757    let mut block_comment_depth = 0usize;
758    let mut string_mode: Option<StringMode> = None;
759    let mut escaped = false;
760
761    while i < bytes.len() {
762        if line >= target_line {
763            break;
764        }
765
766        if is_offset_in_ranges(i, protected_ranges) {
767            if bytes[i] == b'\n' {
768                line += 1;
769            }
770            i += 1;
771            continue;
772        }
773
774        let b = bytes[i];
775
776        if in_line_comment {
777            if b == b'\n' {
778                in_line_comment = false;
779                line += 1;
780            }
781            i += 1;
782            continue;
783        }
784
785        if block_comment_depth > 0 {
786            if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'*' {
787                block_comment_depth += 1;
788                i += 2;
789                continue;
790            }
791            if i + 1 < bytes.len() && bytes[i] == b'*' && bytes[i + 1] == b'/' {
792                block_comment_depth -= 1;
793                i += 2;
794                continue;
795            }
796            if b == b'\n' {
797                line += 1;
798            }
799            i += 1;
800            continue;
801        }
802
803        if let Some(mode) = string_mode {
804            match mode {
805                StringMode::Simple => {
806                    if escaped {
807                        escaped = false;
808                    } else if b == b'\\' {
809                        escaped = true;
810                    } else if b == b'"' {
811                        string_mode = None;
812                    } else if b == b'\n' {
813                        line += 1;
814                    }
815                    i += 1;
816                }
817                StringMode::Triple => {
818                    if i + 2 < bytes.len()
819                        && bytes[i] == b'"'
820                        && bytes[i + 1] == b'"'
821                        && bytes[i + 2] == b'"'
822                    {
823                        string_mode = None;
824                        i += 3;
825                    } else {
826                        if b == b'\n' {
827                            line += 1;
828                        }
829                        i += 1;
830                    }
831                }
832            }
833            continue;
834        }
835
836        if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'/' {
837            in_line_comment = true;
838            i += 2;
839            continue;
840        }
841
842        if i + 1 < bytes.len() && bytes[i] == b'/' && bytes[i + 1] == b'*' {
843            block_comment_depth = 1;
844            i += 2;
845            continue;
846        }
847
848        if i + 3 < bytes.len()
849            && bytes[i] == b'f'
850            && bytes[i + 1] == b'"'
851            && bytes[i + 2] == b'"'
852            && bytes[i + 3] == b'"'
853        {
854            string_mode = Some(StringMode::Triple);
855            i += 4;
856            continue;
857        }
858
859        if i + 1 < bytes.len() && bytes[i] == b'f' && bytes[i + 1] == b'"' {
860            string_mode = Some(StringMode::Simple);
861            escaped = false;
862            i += 2;
863            continue;
864        }
865
866        if i + 2 < bytes.len() && bytes[i] == b'"' && bytes[i + 1] == b'"' && bytes[i + 2] == b'"' {
867            string_mode = Some(StringMode::Triple);
868            i += 3;
869            continue;
870        }
871
872        if b == b'"' {
873            string_mode = Some(StringMode::Simple);
874            escaped = false;
875            i += 1;
876            continue;
877        }
878
879        if b == b'{' {
880            depth += 1;
881        } else if b == b'}' {
882            depth = depth.saturating_sub(1);
883        } else if b == b'\n' {
884            line += 1;
885        }
886
887        i += 1;
888    }
889
890    depth
891}
892
893fn collect_foreign_body_ranges(source: &str) -> Vec<ByteRange> {
894    let mut ranges = Vec::new();
895    // Frontmatter (`--- ... ---`) is not Shape syntax; mask it so spans stay
896    // byte-aligned while resilient parsing can still discover foreign blocks.
897    let parse_source = parser_source(source);
898    let partial = shape_ast::parse_program_resilient(parse_source.as_ref());
899    for item in partial.items {
900        if let Item::ForeignFunction(def, _) = item {
901            let start = def.body_span.start.min(source.len());
902            let end = def.body_span.end.min(source.len());
903            if end > start {
904                ranges.push(ByteRange { start, end });
905            }
906        }
907    }
908    ranges.sort_by_key(|range| range.start);
909    ranges
910}
911
912fn collect_protected_lines(source: &str, ranges: &[ByteRange]) -> std::collections::HashSet<u32> {
913    let mut lines = std::collections::HashSet::new();
914    for range in ranges {
915        if range.end <= range.start {
916            continue;
917        }
918        let start_line = offset_to_line_col(source, range.start).0;
919        let end_line = offset_to_line_col(source, range.end.saturating_sub(1)).0;
920        for line in start_line..=end_line {
921            lines.insert(line);
922        }
923    }
924    lines
925}
926
927fn is_offset_in_ranges(offset: usize, ranges: &[ByteRange]) -> bool {
928    ranges
929        .iter()
930        .any(|range| offset >= range.start && offset < range.end)
931}
932
933/// Create indentation string
934fn make_indent(level: usize, config: &FormatConfig) -> String {
935    if config.use_tabs {
936        "\t".repeat(level)
937    } else {
938        " ".repeat(level * config.indent_size as usize)
939    }
940}
941
942fn make_indent_to_column(column: usize, config: &FormatConfig) -> String {
943    if config.use_tabs {
944        let tab_width = config.indent_size.max(1) as usize;
945        let tabs = column / tab_width;
946        let spaces = column % tab_width;
947        format!("{}{}", "\t".repeat(tabs), " ".repeat(spaces))
948    } else {
949        " ".repeat(column)
950    }
951}
952
953// ─── Table row column alignment ─────────────────────────────────────────────
954
955/// Detect consecutive lines that are table row brackets (`[a, b, c],`)
956/// and emit TextEdits to align each column to the same width across all rows.
957fn align_table_row_columns(text: &str) -> Vec<TextEdit> {
958    let lines: Vec<&str> = text.split('\n').collect();
959    let mut edits = Vec::new();
960    let mut i = 0;
961
962    while i < lines.len() {
963        // Find runs of consecutive table-row lines
964        if let Some(row) = parse_table_row_line(lines[i]) {
965            let group_start = i;
966            let mut group: Vec<(usize, Vec<TableCell>)> = vec![(i, row)];
967            i += 1;
968            while i < lines.len() {
969                if let Some(row) = parse_table_row_line(lines[i]) {
970                    group.push((i, row));
971                    i += 1;
972                } else {
973                    break;
974                }
975            }
976
977            // Only align if there are 2+ rows
978            if group.len() < 2 {
979                continue;
980            }
981
982            // Compute max column count and max width per column
983            let max_cols = group
984                .iter()
985                .map(|(_, cells)| cells.len())
986                .max()
987                .unwrap_or(0);
988            let mut col_widths = vec![0usize; max_cols];
989            for (_, cells) in &group {
990                for (j, cell) in cells.iter().enumerate() {
991                    col_widths[j] = col_widths[j].max(cell.trimmed_len);
992                }
993            }
994
995            // Emit edits: for each cell, replace the whitespace region after the value
996            // with the correct amount of padding
997            for (line_idx, cells) in &group {
998                for (j, cell) in cells.iter().enumerate() {
999                    if j >= max_cols {
1000                        break;
1001                    }
1002                    let target_width = col_widths[j];
1003                    let padding_needed = target_width - cell.trimmed_len;
1004
1005                    // Current whitespace after the trimmed value
1006                    let current_trailing = cell.content_end_col - cell.trimmed_end_col;
1007
1008                    if padding_needed != current_trailing {
1009                        edits.push(TextEdit {
1010                            range: Range {
1011                                start: Position {
1012                                    line: *line_idx as u32,
1013                                    character: cell.trimmed_end_col as u32,
1014                                },
1015                                end: Position {
1016                                    line: *line_idx as u32,
1017                                    character: cell.content_end_col as u32,
1018                                },
1019                            },
1020                            new_text: " ".repeat(padding_needed),
1021                        });
1022                    }
1023                }
1024            }
1025
1026            let _ = group_start; // suppress unused warning
1027        } else {
1028            i += 1;
1029        }
1030    }
1031
1032    edits
1033}
1034
1035struct TableCell {
1036    /// Length of the trimmed cell content (no leading/trailing whitespace)
1037    trimmed_len: usize,
1038    /// Column where trimmed content ends
1039    trimmed_end_col: usize,
1040    /// Column where the cell region ends (before comma or `]`)
1041    content_end_col: usize,
1042}
1043
1044/// Try to parse a line as a table row: `[a, b, c],` or `[a, b, c]`
1045/// (possibly with leading whitespace and `= ` prefix on first row).
1046/// Returns the cells if the line matches.
1047fn parse_table_row_line(line: &str) -> Option<Vec<TableCell>> {
1048    let trimmed = line.trim();
1049
1050    // Find the bracket content: must start with `[` (possibly after `= `)
1051    let bracket_content = if let Some(rest) = trimmed.strip_prefix('[') {
1052        rest
1053    } else if let Some(idx) = trimmed.find("= [") {
1054        &trimmed[idx + 3..]
1055    } else {
1056        return None;
1057    };
1058
1059    // Must end with `]` or `],`
1060    let inner = if let Some(rest) = bracket_content.strip_suffix("],") {
1061        rest
1062    } else if let Some(rest) = bracket_content.strip_suffix(']') {
1063        rest
1064    } else {
1065        return None;
1066    };
1067
1068    // Find the start column of `[` in the original line
1069    let bracket_pos = line.find('[')? + 1; // column after `[`
1070
1071    // Split by commas, respecting string literals
1072    let elements = split_respecting_strings(inner);
1073    if elements.is_empty() {
1074        return None;
1075    }
1076
1077    let mut cells = Vec::new();
1078    let mut col = bracket_pos;
1079
1080    for (i, elem) in elements.iter().enumerate() {
1081        let trimmed_elem = elem.trim();
1082        let leading_ws = elem.len() - elem.trim_start().len();
1083        let trimmed_start_col = col + leading_ws;
1084        let trimmed_end_col = trimmed_start_col + trimmed_elem.len();
1085        let content_end_col = col + elem.len();
1086
1087        cells.push(TableCell {
1088            trimmed_len: trimmed_elem.len(),
1089            trimmed_end_col,
1090            content_end_col,
1091        });
1092
1093        // Advance past this element + the comma separator
1094        col = content_end_col;
1095        if i < elements.len() - 1 {
1096            col += 1; // for the `,`
1097        }
1098    }
1099
1100    Some(cells)
1101}
1102
1103/// Split a string by top-level commas, respecting string literals.
1104fn split_respecting_strings(s: &str) -> Vec<&str> {
1105    let mut parts = Vec::new();
1106    let mut start = 0;
1107    let mut in_string = false;
1108    let mut escaped = false;
1109    let bytes = s.as_bytes();
1110
1111    for i in 0..bytes.len() {
1112        if escaped {
1113            escaped = false;
1114            continue;
1115        }
1116        match bytes[i] {
1117            b'\\' if in_string => escaped = true,
1118            b'"' => in_string = !in_string,
1119            b',' if !in_string => {
1120                parts.push(&s[start..i]);
1121                start = i + 1;
1122            }
1123            _ => {}
1124        }
1125    }
1126    parts.push(&s[start..]);
1127    parts
1128}
1129
1130#[cfg(test)]
1131mod tests {
1132    use super::*;
1133
1134    /// Apply indentation edits to source text (for test assertions).
1135    fn apply_edits(source: &str, edits: &[TextEdit]) -> String {
1136        let mut lines: Vec<String> = source.split('\n').map(String::from).collect();
1137        let mut sorted: Vec<&TextEdit> = edits.iter().collect();
1138        sorted.sort_by(|a, b| b.range.start.line.cmp(&a.range.start.line));
1139        for edit in sorted {
1140            let idx = edit.range.start.line as usize;
1141            if idx < lines.len() {
1142                let start = edit.range.start.character as usize;
1143                let end = edit.range.end.character as usize;
1144                let line = &lines[idx];
1145                lines[idx] = format!("{}{}{}", &line[..start], edit.new_text, &line[end..]);
1146            }
1147        }
1148        lines.join("\n")
1149    }
1150
1151    #[test]
1152    fn test_tokenize_roundtrip() {
1153        let sources = [
1154            "fn test() {\n    let x = 1\n}\n",
1155            "let s = \"hello world\"\n",
1156            "let s = f\"value: {x}\"\n",
1157            "let s = \"\"\"\ntriple\n\"\"\"\n",
1158            "let s = f\"\"\"\ntriple {x}\n\"\"\"\n",
1159            "// line comment\nlet x = 1\n",
1160            "/* block */ let x = 1\n",
1161            "/* nested /* inner */ outer */ let x = 1\n",
1162            "let (d: {x: int}, e: {y: int}) = c\n",
1163            "",
1164            "a",
1165        ];
1166        for source in &sources {
1167            let tokens = tokenize(source);
1168            let reconstructed: String = tokens.iter().map(|t| &source[t.start..t.end]).collect();
1169            assert_eq!(
1170                &reconstructed, source,
1171                "tokenizer roundtrip failed for: {:?}",
1172                source
1173            );
1174        }
1175    }
1176
1177    #[test]
1178    fn test_format_document_preserves_function_keyword() {
1179        let source = "function add(a, b) { return a + b; }\n";
1180        let options = FormattingOptions {
1181            tab_size: 4,
1182            insert_spaces: true,
1183            ..Default::default()
1184        };
1185
1186        let edits = format_document(source, &options);
1187        // Token-level formatter preserves `function` keyword as-is (no rename to `fn`)
1188        // Single-line function — all braces on same line — no indentation changes
1189        assert_eq!(edits.len(), 0);
1190    }
1191
1192    #[test]
1193    fn test_format_document_preserves_enum_declaration() {
1194        let source = "enum Signal {\nBuy,\nSell = \"sell\",\nLimit { price: number, size: number },\nMarket(number, number)\n}\n";
1195        let options = FormattingOptions {
1196            tab_size: 4,
1197            insert_spaces: true,
1198            ..Default::default()
1199        };
1200
1201        let edits = format_document(source, &options);
1202        let result = apply_edits(source, &edits);
1203        // Content preserved exactly — only indentation adjusted, no trailing comma added
1204        assert_eq!(
1205            result,
1206            "enum Signal {\n    Buy,\n    Sell = \"sell\",\n    Limit { price: number, size: number },\n    Market(number, number)\n}\n"
1207        );
1208    }
1209
1210    #[test]
1211    fn test_format_document_preserves_triple_style_for_single_line_formatted_string() {
1212        let source = "function test() {\n  let test3 = f\"\"\"\nbla {33+1}\n\"\"\"\n}\n";
1213        let options = FormattingOptions {
1214            tab_size: 4,
1215            insert_spaces: true,
1216            ..Default::default()
1217        };
1218
1219        let edits = format_document(source, &options);
1220        // Only the indentation on line 1 changes (2 spaces → 4 spaces).
1221        // String content on lines 2-3 is inside the StringLiteral token and untouched.
1222        assert_eq!(edits.len(), 1);
1223        assert_eq!(edits[0].range.start.line, 1);
1224        assert_eq!(
1225            edits[0].range.end,
1226            Position {
1227                line: 1,
1228                character: 2
1229            }
1230        );
1231        assert_eq!(edits[0].new_text, "    ");
1232    }
1233
1234    #[test]
1235    fn test_format_document_preserves_sigil_formatted_strings() {
1236        let source = "function test() {\n  let a = f$\"{\\\"name\\\": ${user.name}}\";\n  let b = f#\"run #{cmd}\";\n}\n";
1237        let options = FormattingOptions {
1238            tab_size: 4,
1239            insert_spaces: true,
1240            ..Default::default()
1241        };
1242
1243        let edits = format_document(source, &options);
1244        let result = apply_edits(source, &edits);
1245        assert!(result.contains("let a = f$\"{\\\"name\\\": ${user.name}}\";"));
1246        assert!(result.contains("let b = f#\"run #{cmd}\";"));
1247    }
1248
1249    #[test]
1250    fn test_format_document_preserves_blank_lines_in_function_body() {
1251        let source = "function test() {\nlet a = 1;\n\n\nlet b = 2;\n}\n";
1252        let options = FormattingOptions {
1253            tab_size: 4,
1254            insert_spaces: true,
1255            ..Default::default()
1256        };
1257
1258        let edits = format_document(source, &options);
1259        let result = apply_edits(source, &edits);
1260        assert!(
1261            result.contains("let a = 1;\n\n\n    let b = 2;"),
1262            "formatter should preserve the original number of blank lines, got:\n{}",
1263            result
1264        );
1265    }
1266
1267    #[test]
1268    fn test_format_document_preserves_blank_lines_between_top_level_items() {
1269        let options = FormattingOptions {
1270            tab_size: 4,
1271            insert_spaces: true,
1272            ..Default::default()
1273        };
1274
1275        // Single blank line between items
1276        let source1 = "let a = 1\n\nlet b = 2\n";
1277        let edits1 = format_document(source1, &options);
1278        let result1 = apply_edits(source1, &edits1);
1279        assert!(
1280            result1.contains("let a = 1\n\nlet b = 2"),
1281            "single blank line should be preserved, got:\n{:?}",
1282            result1
1283        );
1284
1285        // Two blank lines between items
1286        let source2 = "let a = 1\n\n\nlet b = 2\n";
1287        let edits2 = format_document(source2, &options);
1288        let result2 = apply_edits(source2, &edits2);
1289        assert!(
1290            result2.contains("let a = 1\n\n\nlet b = 2"),
1291            "two blank lines should be preserved, got:\n{:?}",
1292            result2
1293        );
1294
1295        // Blank lines between functions
1296        let source3 = "fn foo() {\n    42\n}\n\nfn bar() {\n    43\n}\n";
1297        let edits3 = format_document(source3, &options);
1298        let result3 = apply_edits(source3, &edits3);
1299        assert!(
1300            result3.contains("}\n\nfn bar"),
1301            "blank line between functions should be preserved, got:\n{:?}",
1302            result3
1303        );
1304    }
1305
1306    #[test]
1307    fn test_format_document_preserves_destructuring() {
1308        let source = "fn test() {\n    let (d: {x: int}, e: {y: int, z: int}) = c\n}\n";
1309        let options = FormattingOptions {
1310            tab_size: 4,
1311            insert_spaces: true,
1312            ..Default::default()
1313        };
1314        let edits = format_document(source, &options);
1315        assert_eq!(
1316            edits.len(),
1317            0,
1318            "properly indented destructuring should produce no edits"
1319        );
1320    }
1321
1322    #[test]
1323    fn test_format_document_preserves_as_expression() {
1324        let source = "fn test() {\n    let (f: TypeA, g: TypeB) = c as (TypeA + TypeB)\n}\n";
1325        let options = FormattingOptions {
1326            tab_size: 4,
1327            insert_spaces: true,
1328            ..Default::default()
1329        };
1330        let edits = format_document(source, &options);
1331        assert_eq!(edits.len(), 0);
1332    }
1333
1334    #[test]
1335    fn test_format_document_preserves_inline_comments() {
1336        let source = "fn test() {\n    let x = 1 // important\n}\n";
1337        let options = FormattingOptions {
1338            tab_size: 4,
1339            insert_spaces: true,
1340            ..Default::default()
1341        };
1342        let edits = format_document(source, &options);
1343        assert_eq!(
1344            edits.len(),
1345            0,
1346            "inline comments should stay on the same line"
1347        );
1348    }
1349
1350    #[test]
1351    fn test_format_document_formats_invalid_syntax() {
1352        let source = "fn test() {\nlet x = @#$\n}\n";
1353        let options = FormattingOptions {
1354            tab_size: 4,
1355            insert_spaces: true,
1356            ..Default::default()
1357        };
1358        let edits = format_document(source, &options);
1359        // Even invalid syntax gets indented (no parse required)
1360        assert_eq!(edits.len(), 1);
1361        assert_eq!(edits[0].range.start.line, 1);
1362        assert_eq!(edits[0].new_text, "    ");
1363    }
1364
1365    #[test]
1366    fn test_format_document_fstring_nested_braces() {
1367        let source = "fn test() {\nlet x = f\"{obj.method({x: 1})}\"\n}\n";
1368        let options = FormattingOptions {
1369            tab_size: 4,
1370            insert_spaces: true,
1371            ..Default::default()
1372        };
1373        let edits = format_document(source, &options);
1374        // F-string braces are inside the string token and don't affect depth
1375        assert_eq!(edits.len(), 1);
1376        assert_eq!(edits[0].range.start.line, 1);
1377        assert_eq!(edits[0].new_text, "    ");
1378    }
1379
1380    #[test]
1381    fn test_format_document_multiple_closing_braces() {
1382        let source = "fn outer() {\nfn inner() {\nlet x = 1\n}\n}\n";
1383        let options = FormattingOptions {
1384            tab_size: 4,
1385            insert_spaces: true,
1386            ..Default::default()
1387        };
1388        let edits = format_document(source, &options);
1389        let result = apply_edits(source, &edits);
1390        assert_eq!(
1391            result,
1392            "fn outer() {\n    fn inner() {\n        let x = 1\n    }\n}\n"
1393        );
1394    }
1395
1396    #[test]
1397    fn test_format_document_preserves_foreign_block_indentation() {
1398        let source = "fn python percentile(values: Array<number>, pct: number) -> number {\nsorted_v = sorted(values)\nif len(sorted_v) > 0:\n  return sorted_v[0]\nreturn 0.0\n}\nfn shape_fn() {\nlet x = 1\n}\n";
1399        let options = FormattingOptions {
1400            tab_size: 4,
1401            insert_spaces: true,
1402            ..Default::default()
1403        };
1404
1405        let edits = format_document(source, &options);
1406        let result = apply_edits(source, &edits);
1407        assert_eq!(
1408            result,
1409            "fn python percentile(values: Array<number>, pct: number) -> number {\nsorted_v = sorted(values)\nif len(sorted_v) > 0:\n  return sorted_v[0]\nreturn 0.0\n}\nfn shape_fn() {\n    let x = 1\n}\n"
1410        );
1411    }
1412
1413    #[test]
1414    fn test_format_document_preserves_foreign_block_indentation_with_frontmatter() {
1415        let source = r#"---
1416[[extensions]]
1417name = "python"
1418path = "/tmp/libshape_ext_python.so"
1419---
1420fn python percentile(values: Array<number>, pct: number) -> number {
1421  sorted_v = sorted(values)
1422  if len(sorted_v) > 0:
1423    return sorted_v[-1]
1424  return 0.0
1425}
1426fn shape_fn() {
1427let x = 1
1428}
1429"#;
1430        let options = FormattingOptions {
1431            tab_size: 2,
1432            insert_spaces: true,
1433            ..Default::default()
1434        };
1435
1436        let edits = format_document(source, &options);
1437        let result = apply_edits(source, &edits);
1438        assert_eq!(
1439            result,
1440            r#"---
1441[[extensions]]
1442name = "python"
1443path = "/tmp/libshape_ext_python.so"
1444---
1445fn python percentile(values: Array<number>, pct: number) -> number {
1446  sorted_v = sorted(values)
1447  if len(sorted_v) > 0:
1448    return sorted_v[-1]
1449  return 0.0
1450}
1451fn shape_fn() {
1452  let x = 1
1453}
1454"#
1455        );
1456    }
1457
1458    #[test]
1459    fn test_format_on_type_newline_indents_inside_block() {
1460        let text = "fn test() {\n\n}";
1461        let options = FormattingOptions {
1462            tab_size: 4,
1463            insert_spaces: true,
1464            ..Default::default()
1465        };
1466
1467        let edits = format_on_type(
1468            text,
1469            Position {
1470                line: 1,
1471                character: 0,
1472            },
1473            "\n",
1474            &options,
1475        );
1476
1477        assert_eq!(edits.len(), 1);
1478        assert_eq!(
1479            edits[0].range,
1480            Range {
1481                start: Position {
1482                    line: 1,
1483                    character: 0
1484                },
1485                end: Position {
1486                    line: 1,
1487                    character: 0
1488                }
1489            }
1490        );
1491        assert_eq!(edits[0].new_text, "    ");
1492    }
1493
1494    #[test]
1495    fn test_format_on_type_skips_foreign_block() {
1496        let text = "fn python p() -> number {\nif True:\n\n    return 1\n}\n";
1497        let options = FormattingOptions {
1498            tab_size: 4,
1499            insert_spaces: true,
1500            ..Default::default()
1501        };
1502
1503        let edits = format_on_type(
1504            text,
1505            Position {
1506                line: 2,
1507                character: 0,
1508            },
1509            "\n",
1510            &options,
1511        );
1512
1513        assert!(edits.is_empty());
1514    }
1515
1516    #[test]
1517    fn test_format_on_type_closing_brace_dedents() {
1518        let text = "fn test() {\n    let x = 1;\n    }\n";
1519        let options = FormattingOptions {
1520            tab_size: 4,
1521            insert_spaces: true,
1522            ..Default::default()
1523        };
1524
1525        let edits = format_on_type(
1526            text,
1527            Position {
1528                line: 2,
1529                character: 5,
1530            },
1531            "}",
1532            &options,
1533        );
1534
1535        assert_eq!(edits.len(), 1);
1536        assert_eq!(
1537            edits[0].range,
1538            Range {
1539                start: Position {
1540                    line: 2,
1541                    character: 0
1542                },
1543                end: Position {
1544                    line: 2,
1545                    character: 4
1546                }
1547            }
1548        );
1549        assert_eq!(edits[0].new_text, "");
1550    }
1551
1552    #[test]
1553    fn test_format_on_type_ignores_braces_inside_strings() {
1554        let text = "fn test() {\n    let s = f\"{x}\";\n\n}";
1555        let options = FormattingOptions {
1556            tab_size: 4,
1557            insert_spaces: true,
1558            ..Default::default()
1559        };
1560
1561        let edits = format_on_type(
1562            text,
1563            Position {
1564                line: 2,
1565                character: 0,
1566            },
1567            "\n",
1568            &options,
1569        );
1570
1571        assert_eq!(edits.len(), 1);
1572        assert_eq!(edits[0].new_text, "    ");
1573    }
1574
1575    #[test]
1576    fn test_format_on_type_newline_aligns_inside_formatted_triple_string() {
1577        let text = "fn test() {\n    let test = f\"\"\"\n\n                \"\"\"\n}\n";
1578        let options = FormattingOptions {
1579            tab_size: 4,
1580            insert_spaces: true,
1581            ..Default::default()
1582        };
1583
1584        let edits = format_on_type(
1585            text,
1586            Position {
1587                line: 2,
1588                character: 0,
1589            },
1590            "\n",
1591            &options,
1592        );
1593
1594        assert_eq!(edits.len(), 1);
1595        assert_eq!(edits[0].new_text, "                ");
1596    }
1597
1598    #[test]
1599    fn test_align_table_row_columns() {
1600        let source = "    [1, 100, 60, \"jan\"],\n    [2, 1200, 70, \"feb\"],\n";
1601        let edits = align_table_row_columns(source);
1602        let result = apply_edits(source, &edits);
1603        // "100" is 3 chars, "1200" is 4 chars → first row's second col gets padded
1604        assert_eq!(
1605            result,
1606            "    [1, 100 , 60, \"jan\"],\n    [2, 1200, 70, \"feb\"],\n"
1607        );
1608    }
1609
1610    #[test]
1611    fn test_align_table_row_columns_strings() {
1612        let source = "    [1, \"short\"],\n    [2, \"much longer\"],\n";
1613        let edits = align_table_row_columns(source);
1614        let result = apply_edits(source, &edits);
1615        assert_eq!(
1616            result,
1617            "    [1, \"short\"      ],\n    [2, \"much longer\"],\n"
1618        );
1619    }
1620
1621    #[test]
1622    fn test_align_table_row_columns_no_change_when_single_row() {
1623        let source = "    [1, 100, 60],\n";
1624        let edits = align_table_row_columns(source);
1625        assert!(
1626            edits.is_empty(),
1627            "Single row should not produce alignment edits"
1628        );
1629    }
1630}