styx_format/
cst_format.rs

1//! CST-based formatter for Styx documents.
2//!
3//! This formatter works directly with the lossless CST (Concrete Syntax Tree),
4//! preserving all comments and producing properly indented output.
5
6use styx_cst::ast::{AstNode, Document, Entry, Object, Separator, Sequence};
7use styx_cst::{SyntaxKind, SyntaxNode};
8
9use crate::FormatOptions;
10
11/// Format a Styx document from its CST.
12///
13/// This preserves all comments and produces properly indented output.
14pub fn format_cst(node: &SyntaxNode, options: FormatOptions) -> String {
15    let mut formatter = CstFormatter::new(options);
16    formatter.format_node(node);
17    formatter.finish()
18}
19
20/// Format a Styx document from source text.
21///
22/// Parses the source, formats the CST, and returns the formatted output.
23/// Returns the original source if parsing fails.
24pub fn format_source(source: &str, options: FormatOptions) -> String {
25    let parsed = styx_cst::parse(source);
26    if !parsed.is_ok() {
27        // Don't format documents with parse errors
28        return source.to_string();
29    }
30    format_cst(&parsed.syntax(), options)
31}
32
33struct CstFormatter {
34    out: String,
35    options: FormatOptions,
36    indent_level: usize,
37    /// Track if we're at the start of a line (for indentation)
38    at_line_start: bool,
39    /// Track if we just wrote a newline
40    after_newline: bool,
41}
42
43impl CstFormatter {
44    fn new(options: FormatOptions) -> Self {
45        Self {
46            out: String::new(),
47            options,
48            indent_level: 0,
49            at_line_start: true,
50            after_newline: false,
51        }
52    }
53
54    fn finish(mut self) -> String {
55        // Ensure trailing newline
56        if !self.out.ends_with('\n') && !self.out.is_empty() {
57            self.out.push('\n');
58        }
59        self.out
60    }
61
62    fn write_indent(&mut self) {
63        if self.at_line_start && self.indent_level > 0 {
64            for _ in 0..self.indent_level {
65                self.out.push_str(self.options.indent);
66            }
67        }
68        self.at_line_start = false;
69    }
70
71    fn write(&mut self, s: &str) {
72        if s.is_empty() {
73            return;
74        }
75        self.write_indent();
76        self.out.push_str(s);
77        self.after_newline = false;
78    }
79
80    fn write_newline(&mut self) {
81        self.out.push('\n');
82        self.at_line_start = true;
83        self.after_newline = true;
84    }
85
86    fn format_node(&mut self, node: &SyntaxNode) {
87        match node.kind() {
88            // Nodes
89            SyntaxKind::DOCUMENT => self.format_document(node),
90            SyntaxKind::ENTRY => self.format_entry(node),
91            SyntaxKind::OBJECT => self.format_object(node),
92            SyntaxKind::SEQUENCE => self.format_sequence(node),
93            SyntaxKind::KEY => self.format_key(node),
94            SyntaxKind::VALUE => self.format_value(node),
95            SyntaxKind::SCALAR => self.format_scalar(node),
96            SyntaxKind::TAG => self.format_tag(node),
97            SyntaxKind::TAG_NAME => self.format_tag_name(node),
98            SyntaxKind::TAG_PAYLOAD => self.format_tag_payload(node),
99            SyntaxKind::UNIT => self.write("@"),
100            SyntaxKind::HEREDOC => self.format_heredoc(node),
101            SyntaxKind::ATTRIBUTES => self.format_attributes(node),
102            SyntaxKind::ATTRIBUTE => self.format_attribute(node),
103
104            // Tokens - should not appear as nodes, but handle gracefully
105            SyntaxKind::L_BRACE
106            | SyntaxKind::R_BRACE
107            | SyntaxKind::L_PAREN
108            | SyntaxKind::R_PAREN
109            | SyntaxKind::COMMA
110            | SyntaxKind::GT
111            | SyntaxKind::AT
112            | SyntaxKind::BARE_SCALAR
113            | SyntaxKind::QUOTED_SCALAR
114            | SyntaxKind::RAW_SCALAR
115            | SyntaxKind::HEREDOC_START
116            | SyntaxKind::HEREDOC_CONTENT
117            | SyntaxKind::HEREDOC_END
118            | SyntaxKind::LINE_COMMENT
119            | SyntaxKind::DOC_COMMENT
120            | SyntaxKind::WHITESPACE
121            | SyntaxKind::NEWLINE
122            | SyntaxKind::EOF
123            | SyntaxKind::ERROR
124            | SyntaxKind::__LAST_TOKEN => {
125                // Tokens shouldn't be passed to format_node (they're not nodes)
126                // but if they are, ignore them - they're handled by their parent
127            }
128        }
129    }
130
131    fn format_document(&mut self, node: &SyntaxNode) {
132        let doc = Document::cast(node.clone()).unwrap();
133        let entries: Vec<_> = doc.entries().collect();
134
135        // Track consecutive newlines to preserve blank lines from input
136        let mut consecutive_newlines = 0;
137        let mut entry_index = 0;
138        let mut wrote_content = false;
139        // Track if we just wrote a doc comment (entry should follow without blank line)
140        let mut just_wrote_doc_comment = false;
141
142        for el in node.children_with_tokens() {
143            match el.kind() {
144                SyntaxKind::NEWLINE => {
145                    consecutive_newlines += 1;
146                }
147                SyntaxKind::WHITESPACE => {
148                    // Ignore whitespace
149                }
150                SyntaxKind::LINE_COMMENT => {
151                    if let Some(token) = el.into_token() {
152                        if wrote_content {
153                            self.write_newline();
154                            // 2+ consecutive newlines means there was a blank line
155                            if consecutive_newlines >= 2 {
156                                self.write_newline();
157                            }
158                        }
159                        self.write(token.text());
160                        wrote_content = true;
161                        consecutive_newlines = 0;
162                        just_wrote_doc_comment = false;
163                    }
164                }
165                SyntaxKind::DOC_COMMENT => {
166                    if let Some(token) = el.into_token() {
167                        if wrote_content {
168                            self.write_newline();
169
170                            // Add extra blank line before doc comment:
171                            // - if source had 2+ consecutive newlines (preserve existing)
172                            // - if previous entry was schema declaration
173                            // - if previous entry had doc comments
174                            // - if previous entry is a block (issue #28)
175                            let had_blank_line = consecutive_newlines >= 2;
176                            let prev_was_schema =
177                                entry_index == 1 && is_schema_declaration(&entries[0]);
178                            let prev_had_doc = entry_index > 0
179                                && entries[entry_index - 1].doc_comments().next().is_some();
180                            let prev_is_block =
181                                entry_index > 0 && is_block_entry(&entries[entry_index - 1]);
182
183                            if had_blank_line || prev_was_schema || prev_had_doc || prev_is_block {
184                                self.write_newline();
185                            }
186                        }
187                        self.write(token.text());
188                        wrote_content = true;
189                        consecutive_newlines = 0;
190                        just_wrote_doc_comment = true;
191                    }
192                }
193                SyntaxKind::ENTRY => {
194                    if let Some(entry_node) = el.into_node() {
195                        let entry = &entries[entry_index];
196
197                        if wrote_content {
198                            self.write_newline();
199
200                            // Add extra blank line before entry (only if not preceded by doc comment):
201                            // - if source had 2+ consecutive newlines (preserve existing blank lines)
202                            // - if previous entry was schema declaration (@ entry at root)
203                            // - if previous entry had doc comments (and this entry has none)
204                            // - if previous or current entry is a block (issue #28)
205                            if !just_wrote_doc_comment {
206                                let had_blank_line = consecutive_newlines >= 2;
207                                let prev_was_schema =
208                                    entry_index == 1 && is_schema_declaration(&entries[0]);
209                                let prev_had_doc = entry_index > 0
210                                    && entries[entry_index - 1].doc_comments().next().is_some();
211                                let prev_is_block =
212                                    entry_index > 0 && is_block_entry(&entries[entry_index - 1]);
213                                let current_is_block = is_block_entry(entry);
214
215                                if had_blank_line
216                                    || prev_was_schema
217                                    || prev_had_doc
218                                    || prev_is_block
219                                    || current_is_block
220                                {
221                                    self.write_newline();
222                                }
223                            }
224                        }
225
226                        self.format_node(&entry_node);
227                        wrote_content = true;
228                        consecutive_newlines = 0;
229                        entry_index += 1;
230                        just_wrote_doc_comment = false;
231                    }
232                }
233                _ => {
234                    // Skip other tokens
235                }
236            }
237        }
238    }
239
240    fn format_entry(&mut self, node: &SyntaxNode) {
241        let entry = Entry::cast(node.clone()).unwrap();
242
243        if let Some(key) = entry.key() {
244            self.format_node(key.syntax());
245        }
246
247        // Space between key and value
248        if entry.value().is_some() {
249            self.write(" ");
250        }
251
252        if let Some(value) = entry.value() {
253            self.format_node(value.syntax());
254        }
255    }
256
257    fn format_object(&mut self, node: &SyntaxNode) {
258        let obj = Object::cast(node.clone()).unwrap();
259        let entries: Vec<_> = obj.entries().collect();
260        let separator = obj.separator();
261
262        self.write("{");
263
264        // Check if the object contains any comments (even if no entries)
265        let has_comments = node.children_with_tokens().any(|el| {
266            matches!(
267                el.kind(),
268                SyntaxKind::LINE_COMMENT | SyntaxKind::DOC_COMMENT
269            )
270        });
271
272        // Empty object with no comments
273        if entries.is_empty() && !has_comments {
274            self.write("}");
275            return;
276        }
277
278        // Check if any entry contains a block object - if so, parent should expand too
279        let has_block_child = entries.iter().any(|e| contains_block_object(e.syntax()));
280
281        // Determine if we need multiline format
282        let is_multiline = matches!(separator, Separator::Newline | Separator::Mixed)
283            || has_comments
284            || has_block_child
285            || entries.is_empty(); // Empty with comments needs multiline
286
287        if is_multiline {
288            // Multiline format - preserve comments as children of the object
289            self.write_newline();
290            self.indent_level += 1;
291
292            // Iterate through all children to preserve comments in order
293            // Track consecutive newlines to preserve blank lines
294            let mut wrote_content = false;
295            let mut consecutive_newlines = 0;
296            for el in node.children_with_tokens() {
297                match el.kind() {
298                    SyntaxKind::NEWLINE => {
299                        consecutive_newlines += 1;
300                    }
301                    SyntaxKind::LINE_COMMENT | SyntaxKind::DOC_COMMENT => {
302                        if let Some(token) = el.into_token() {
303                            if wrote_content {
304                                self.write_newline();
305                                // 2+ consecutive newlines means there was a blank line
306                                if consecutive_newlines >= 2 {
307                                    self.write_newline();
308                                }
309                            }
310                            self.write(token.text());
311                            wrote_content = true;
312                            consecutive_newlines = 0;
313                        }
314                    }
315                    SyntaxKind::ENTRY => {
316                        if let Some(entry_node) = el.into_node() {
317                            if wrote_content {
318                                self.write_newline();
319                                // 2+ consecutive newlines means there was a blank line
320                                if consecutive_newlines >= 2 {
321                                    self.write_newline();
322                                }
323                            }
324                            self.format_node(&entry_node);
325                            wrote_content = true;
326                            consecutive_newlines = 0;
327                        }
328                    }
329                    // Skip whitespace, braces - we handle formatting ourselves
330                    // Whitespace doesn't reset newline count (it comes between newlines)
331                    SyntaxKind::WHITESPACE | SyntaxKind::L_BRACE | SyntaxKind::R_BRACE => {}
332                    _ => {
333                        consecutive_newlines = 0;
334                    }
335                }
336            }
337
338            self.write_newline();
339            self.indent_level -= 1;
340            self.write("}");
341        } else {
342            // Inline format (comma-separated, no comments)
343            for (i, entry) in entries.iter().enumerate() {
344                self.format_node(entry.syntax());
345
346                if i < entries.len() - 1 {
347                    self.write(", ");
348                }
349            }
350            self.write("}");
351        }
352    }
353
354    fn format_sequence(&mut self, node: &SyntaxNode) {
355        let seq = Sequence::cast(node.clone()).unwrap();
356        let entries: Vec<_> = seq.entries().collect();
357
358        self.write("(");
359
360        // Check if the sequence contains any comments (even if no entries)
361        let has_comments = node.children_with_tokens().any(|el| {
362            matches!(
363                el.kind(),
364                SyntaxKind::LINE_COMMENT | SyntaxKind::DOC_COMMENT
365            )
366        });
367
368        // Empty sequence with no comments
369        if entries.is_empty() && !has_comments {
370            self.write(")");
371            return;
372        }
373
374        // Determine if we need multiline format
375        // But collapse trivial sequences: single simple element should be inline
376        let should_collapse =
377            !has_comments && entries.len() == 1 && !contains_block_object(entries[0].syntax());
378
379        // Special case: single entry that is a tag with block payload - format inline with paren
380        // e.g., @optional(@object{...}) should format as (@object{\n...\n}) not (\n@object{...}\n)
381        let single_tag_with_block =
382            !has_comments && entries.len() == 1 && is_tag_with_block_payload(entries[0].syntax());
383
384        let is_multiline = !should_collapse
385            && !single_tag_with_block
386            && (seq.is_multiline() || has_comments || entries.is_empty());
387
388        if single_tag_with_block {
389            // Format the single entry inline with the paren - no newline after (
390            if let Some(key) = entries[0]
391                .syntax()
392                .children()
393                .find(|n| n.kind() == SyntaxKind::KEY)
394            {
395                for child in key.children() {
396                    self.format_node(&child);
397                }
398            }
399            self.write(")");
400        } else if is_multiline {
401            // Multiline format - preserve comments as children of the sequence
402            self.write_newline();
403            self.indent_level += 1;
404
405            // Iterate through all children to preserve comments in order
406            let mut wrote_content = false;
407            let mut consecutive_newlines = 0;
408            for el in node.children_with_tokens() {
409                match el.kind() {
410                    SyntaxKind::NEWLINE => {
411                        consecutive_newlines += 1;
412                    }
413                    SyntaxKind::LINE_COMMENT | SyntaxKind::DOC_COMMENT => {
414                        if let Some(token) = el.into_token() {
415                            if wrote_content {
416                                self.write_newline();
417                                // 2+ consecutive newlines means there was a blank line
418                                if consecutive_newlines >= 2 {
419                                    self.write_newline();
420                                }
421                            }
422                            self.write(token.text());
423                            wrote_content = true;
424                            consecutive_newlines = 0;
425                        }
426                    }
427                    SyntaxKind::ENTRY => {
428                        if let Some(entry_node) = el.into_node() {
429                            if wrote_content {
430                                self.write_newline();
431                                // 2+ consecutive newlines means there was a blank line
432                                if consecutive_newlines >= 2 {
433                                    self.write_newline();
434                                }
435                            }
436                            // Format the entry's value (sequence entries have implicit unit keys)
437                            if let Some(key) =
438                                entry_node.children().find(|n| n.kind() == SyntaxKind::KEY)
439                            {
440                                for child in key.children() {
441                                    self.format_node(&child);
442                                }
443                            }
444                            wrote_content = true;
445                            consecutive_newlines = 0;
446                        }
447                    }
448                    // Skip whitespace, parens - we handle formatting ourselves
449                    SyntaxKind::WHITESPACE | SyntaxKind::L_PAREN | SyntaxKind::R_PAREN => {}
450                    _ => {
451                        consecutive_newlines = 0;
452                    }
453                }
454            }
455
456            self.write_newline();
457            self.indent_level -= 1;
458            self.write(")");
459        } else {
460            // Inline format - single line with spaces (no comments possible here)
461            for (i, entry) in entries.iter().enumerate() {
462                // Get the actual value from the entry's key
463                if let Some(key) = entry
464                    .syntax()
465                    .children()
466                    .find(|n| n.kind() == SyntaxKind::KEY)
467                {
468                    for child in key.children() {
469                        self.format_node(&child);
470                    }
471                }
472
473                if i < entries.len() - 1 {
474                    self.write(" ");
475                }
476            }
477            self.write(")");
478        }
479    }
480
481    fn format_key(&mut self, node: &SyntaxNode) {
482        // Format the key content (scalar, tag, unit, etc.)
483        for child in node.children() {
484            self.format_node(&child);
485        }
486
487        // Also check for direct tokens (like BARE_SCALAR in simple keys)
488        for token in node.children_with_tokens().filter_map(|el| el.into_token()) {
489            match token.kind() {
490                SyntaxKind::BARE_SCALAR | SyntaxKind::QUOTED_SCALAR | SyntaxKind::RAW_SCALAR => {
491                    self.write(token.text());
492                }
493                _ => {}
494            }
495        }
496    }
497
498    fn format_value(&mut self, node: &SyntaxNode) {
499        for child in node.children() {
500            self.format_node(&child);
501        }
502    }
503
504    fn format_scalar(&mut self, node: &SyntaxNode) {
505        // Get the scalar token and write it as-is
506        for token in node.children_with_tokens().filter_map(|el| el.into_token()) {
507            match token.kind() {
508                SyntaxKind::BARE_SCALAR | SyntaxKind::QUOTED_SCALAR | SyntaxKind::RAW_SCALAR => {
509                    self.write(token.text());
510                }
511                _ => {}
512            }
513        }
514    }
515
516    fn format_tag(&mut self, node: &SyntaxNode) {
517        self.write("@");
518
519        // Per grammar: Tag ::= '@' TagName TagPayload?
520        // TagPayload must be immediately attached (no whitespace allowed)
521        // TagPayload ::= Object | Sequence | QuotedScalar | RawScalar | HeredocScalar | '@'
522        for el in node.children_with_tokens() {
523            if let rowan::NodeOrToken::Node(child) = el {
524                match child.kind() {
525                    SyntaxKind::TAG_NAME => self.format_tag_name(&child),
526                    SyntaxKind::TAG_PAYLOAD => self.format_tag_payload(&child),
527                    _ => {}
528                }
529            }
530        }
531    }
532
533    fn format_tag_name(&mut self, node: &SyntaxNode) {
534        for token in node.children_with_tokens().filter_map(|el| el.into_token()) {
535            if token.kind() == SyntaxKind::BARE_SCALAR {
536                self.write(token.text());
537            }
538        }
539    }
540
541    fn format_tag_payload(&mut self, node: &SyntaxNode) {
542        for child in node.children() {
543            match child.kind() {
544                SyntaxKind::SEQUENCE => {
545                    // Sequence payload: @tag(...)
546                    self.format_sequence(&child);
547                }
548                SyntaxKind::OBJECT => {
549                    // Object payload: @tag{...}
550                    self.format_object(&child);
551                }
552                _ => self.format_node(&child),
553            }
554        }
555    }
556
557    fn format_heredoc(&mut self, node: &SyntaxNode) {
558        // Heredocs are preserved as-is
559        self.write(&node.to_string());
560    }
561
562    fn format_attributes(&mut self, node: &SyntaxNode) {
563        let attrs: Vec<_> = node
564            .children()
565            .filter(|n| n.kind() == SyntaxKind::ATTRIBUTE)
566            .collect();
567
568        for (i, attr) in attrs.iter().enumerate() {
569            self.format_attribute(attr);
570            if i < attrs.len() - 1 {
571                self.write(" ");
572            }
573        }
574    }
575
576    fn format_attribute(&mut self, node: &SyntaxNode) {
577        // Attribute structure: BARE_SCALAR ">" SCALAR
578        for el in node.children_with_tokens() {
579            match el {
580                rowan::NodeOrToken::Token(token) => match token.kind() {
581                    SyntaxKind::BARE_SCALAR => self.write(token.text()),
582                    SyntaxKind::GT => self.write(">"),
583                    _ => {}
584                },
585                rowan::NodeOrToken::Node(child) => {
586                    self.format_node(&child);
587                }
588            }
589        }
590    }
591}
592
593/// Check if an entry is a "block" entry (contains a multiline object at top level).
594/// Block entries need blank lines around them per issue #28.
595fn is_block_entry(entry: &Entry) -> bool {
596    if let Some(value) = entry.value() {
597        // Check if the value directly contains a block-style object
598        contains_block_object(value.syntax())
599    } else {
600        false
601    }
602}
603
604/// Check if a sequence entry is a tag with a block object payload.
605/// Used to format `(@object{...})` as `(@object{\n...\n})` not `(\n@object{...}\n)`.
606fn is_tag_with_block_payload(entry_node: &SyntaxNode) -> bool {
607    // Find the KEY child of the entry
608    let key = match entry_node.children().find(|n| n.kind() == SyntaxKind::KEY) {
609        Some(k) => k,
610        None => return false,
611    };
612
613    // Look for a TAG child in the key
614    for child in key.children() {
615        if child.kind() == SyntaxKind::TAG {
616            // Check if this tag has a TAG_PAYLOAD with a block object
617            for tag_child in child.children() {
618                if tag_child.kind() == SyntaxKind::TAG_PAYLOAD {
619                    // Check if the payload contains a block object
620                    return contains_block_object(&tag_child);
621                }
622            }
623        }
624    }
625
626    false
627}
628
629/// Recursively check if a node contains a block-style object or doc comments.
630/// Objects with doc comments also need to be block-formatted.
631fn contains_block_object(node: &SyntaxNode) -> bool {
632    // Check this node if it's an object
633    if node.kind() == SyntaxKind::OBJECT
634        && let Some(obj) = Object::cast(node.clone())
635    {
636        let sep = obj.separator();
637        if matches!(sep, Separator::Newline | Separator::Mixed) {
638            return true;
639        }
640        // Also check if the object contains doc comments
641        if node
642            .children_with_tokens()
643            .any(|el| el.kind() == SyntaxKind::DOC_COMMENT)
644        {
645            return true;
646        }
647    }
648
649    // Recursively check all descendants
650    for child in node.children() {
651        if contains_block_object(&child) {
652            return true;
653        }
654    }
655
656    false
657}
658
659/// Check if an entry is a schema declaration (@schema tag as key).
660fn is_schema_declaration(entry: &Entry) -> bool {
661    if let Some(key) = entry.key() {
662        // Check if the key contains a @schema tag
663        key.syntax().children().any(|n| {
664            if n.kind() == SyntaxKind::TAG {
665                // Look for TAG_NAME child with text "schema"
666                n.children().any(|child| {
667                    child.kind() == SyntaxKind::TAG_NAME && child.to_string() == "schema"
668                })
669            } else {
670                false
671            }
672        })
673    } else {
674        false
675    }
676}
677
678#[cfg(test)]
679mod tests {
680    use super::*;
681
682    fn format(source: &str) -> String {
683        format_source(source, FormatOptions::default())
684    }
685
686    #[test]
687    fn test_parse_errors_detected() {
688        // This input has a parse error - space-separated entries in inline object
689        let input = "config {a 1 b 2}";
690        let parsed = styx_cst::parse(input);
691        assert!(
692            !parsed.is_ok(),
693            "Expected parse errors for '{}', but got none. Errors: {:?}",
694            input,
695            parsed.errors()
696        );
697        // Formatter should return original source for documents with errors
698        let output = format(input);
699        assert_eq!(
700            output, input,
701            "Formatter should return original source for documents with parse errors"
702        );
703    }
704
705    #[test]
706    fn test_simple_document() {
707        let input = "name Alice\nage 30";
708        let output = format(input);
709        insta::assert_snapshot!(output);
710    }
711
712    #[test]
713    fn test_preserves_comments() {
714        let input = r#"// This is a comment
715name Alice
716/// Doc comment
717age 30"#;
718        let output = format(input);
719        insta::assert_snapshot!(output);
720    }
721
722    #[test]
723    fn test_inline_object() {
724        let input = "point {x 1, y 2}";
725        let output = format(input);
726        insta::assert_snapshot!(output);
727    }
728
729    #[test]
730    fn test_multiline_object() {
731        let input = "server {\n  host localhost\n  port 8080\n}";
732        let output = format(input);
733        insta::assert_snapshot!(output);
734    }
735
736    #[test]
737    fn test_nested_objects() {
738        let input = "config {\n  server {\n    host localhost\n  }\n}";
739        let output = format(input);
740        insta::assert_snapshot!(output);
741    }
742
743    #[test]
744    fn test_sequence() {
745        let input = "items (a b c)";
746        let output = format(input);
747        insta::assert_snapshot!(output);
748    }
749
750    #[test]
751    fn test_tagged_value() {
752        let input = "type @string";
753        let output = format(input);
754        insta::assert_snapshot!(output);
755    }
756
757    #[test]
758    fn test_schema_declaration() {
759        let input = "@schema schema.styx\n\nname test";
760        let output = format(input);
761        insta::assert_snapshot!(output);
762    }
763
764    #[test]
765    fn test_tag_with_nested_tag_payload() {
766        // Note: `@string @Schema` parses as @string with @Schema as its payload
767        // This is intentional grammar behavior - tags consume the next value as payload
768        let input = "@seq(@string @Schema)";
769        let output = format(input);
770        // The formatter must preserve the space before the nested payload
771        assert_eq!(output.trim(), "@seq(@string @Schema)");
772    }
773
774    #[test]
775    fn test_sequence_with_multiple_scalars() {
776        let input = "(a b c)";
777        let output = format(input);
778        assert_eq!(output.trim(), "(a b c)");
779    }
780
781    #[test]
782    fn test_complex_schema() {
783        let input = r#"meta {
784  id https://example.com/schema
785  version 1.0
786}
787schema {
788  @ @object{
789    name @string
790    port @int
791  }
792}"#;
793        let output = format(input);
794        insta::assert_snapshot!(output);
795    }
796
797    #[test]
798    fn test_path_syntax_in_object() {
799        let input = r#"resources {
800    limits cpu>500m memory>256Mi
801    requests cpu>100m memory>128Mi
802}"#;
803        let output = format(input);
804        insta::assert_snapshot!(output);
805    }
806
807    #[test]
808    fn test_syntax_error_space_after_gt() {
809        // Space after > is a syntax error - should return original
810        let input = "limits cpu> 500m";
811        let parsed = styx_cst::parse(input);
812        assert!(!parsed.is_ok(), "should have parse error");
813        let output = format(input);
814        assert_eq!(output, input);
815    }
816
817    #[test]
818    fn test_syntax_error_space_before_gt() {
819        // Space before > is a syntax error - should return original
820        let input = "limits cpu >500m";
821        let parsed = styx_cst::parse(input);
822        assert!(!parsed.is_ok(), "should have parse error");
823        let output = format(input);
824        assert_eq!(output, input);
825    }
826
827    #[test]
828    fn test_tag_with_separate_sequence() {
829        // @a () has space between tag and sequence - must be preserved
830        // (whitespace affects parsing semantics for tag payloads)
831        let input = "@a ()";
832        let output = format(input);
833        assert_eq!(output.trim(), "@a ()");
834    }
835
836    #[test]
837    fn test_tag_with_attached_sequence() {
838        // @a() has no space - compact form must stay compact
839        let input = "@a()";
840        let output = format(input);
841        assert_eq!(output.trim(), "@a()");
842    }
843
844    // === Sequence comment tests ===
845
846    #[test]
847    fn test_multiline_sequence_preserves_structure() {
848        let input = r#"items (
849  a
850  b
851  c
852)"#;
853        let output = format(input);
854        insta::assert_snapshot!(output);
855    }
856
857    #[test]
858    fn test_sequence_with_trailing_comment() {
859        let input = r#"extends (
860  "@eslint/js:recommended"
861  typescript-eslint:strictTypeChecked
862  // don't fold
863)"#;
864        let output = format(input);
865        insta::assert_snapshot!(output);
866    }
867
868    #[test]
869    fn test_sequence_with_inline_comments() {
870        let input = r#"items (
871  // first item
872  a
873  // second item
874  b
875)"#;
876        let output = format(input);
877        insta::assert_snapshot!(output);
878    }
879
880    #[test]
881    fn test_sequence_comment_idempotent() {
882        let input = r#"extends (
883  "@eslint/js:recommended"
884  typescript-eslint:strictTypeChecked
885  // don't fold
886)"#;
887        let once = format(input);
888        let twice = format(&once);
889        assert_eq!(once, twice, "formatting should be idempotent");
890    }
891
892    #[test]
893    fn test_inline_sequence_stays_inline() {
894        // No newlines or comments = stays inline
895        let input = "items (a b c)";
896        let output = format(input);
897        assert_eq!(output.trim(), "items (a b c)");
898    }
899
900    #[test]
901    fn test_sequence_with_doc_comment() {
902        let input = r#"items (
903  /// Documentation for first
904  a
905  b
906)"#;
907        let output = format(input);
908        insta::assert_snapshot!(output);
909    }
910
911    #[test]
912    fn test_nested_multiline_sequence() {
913        let input = r#"outer (
914  (a b)
915  // between
916  (c d)
917)"#;
918        let output = format(input);
919        insta::assert_snapshot!(output);
920    }
921
922    #[test]
923    fn test_sequence_in_object_with_comment() {
924        let input = r#"config {
925  items (
926    a
927    // comment
928    b
929  )
930}"#;
931        let output = format(input);
932        insta::assert_snapshot!(output);
933    }
934
935    #[test]
936    fn test_object_with_only_comments() {
937        // Regression test: objects containing only comments should preserve them
938        let input = r#"pre-commit {
939    // generate-readmes false
940    // rustfmt false
941    // cargo-lock false
942}"#;
943        let output = format(input);
944        insta::assert_snapshot!(output);
945    }
946
947    #[test]
948    fn test_object_comments_with_blank_line() {
949        // Regression test: blank lines between comment groups should be preserved
950        let input = r#"config {
951    // first group
952    // still first group
953
954    // second group after blank line
955    // still second group
956}"#;
957        let output = format(input);
958        insta::assert_snapshot!(output);
959    }
960
961    #[test]
962    fn test_object_mixed_entries_and_comments() {
963        // Test mixing actual entries with commented-out entries
964        let input = r#"settings {
965    enabled true
966    // disabled-option false
967    name "test"
968    // another-disabled option
969}"#;
970        let output = format(input);
971        insta::assert_snapshot!(output);
972    }
973
974    #[test]
975    fn test_schema_with_doc_comments_in_inline_object() {
976        // Regression test: doc comments inside an inline object must be preserved
977        // and the object must be expanded to multiline format
978        let input = include_str!("fixtures/before-format.styx");
979        let output = format(input);
980
981        // The doc comments must be preserved
982        assert!(
983            output.contains("/// Features to use for clippy"),
984            "Doc comment for clippy-features was lost!\nOutput:\n{}",
985            output
986        );
987        assert!(
988            output.contains("/// Features to use for docs"),
989            "Doc comment for docs-features was lost!\nOutput:\n{}",
990            output
991        );
992        assert!(
993            output.contains("/// Features to use for doc tests"),
994            "Doc comment for doc-test-features was lost!\nOutput:\n{}",
995            output
996        );
997
998        insta::assert_snapshot!(output);
999    }
1000
1001    #[test]
1002    fn test_dibs_extracted_schema() {
1003        // Complex schema extracted from dibs binary - tests deeply nested structures
1004        let input = include_str!("fixtures/dibs-extracted.styx");
1005        let output = format(input);
1006        insta::assert_snapshot!(output);
1007    }
1008
1009    // ============================================================
1010    // SYSTEMATIC FORMATTER TESTS - 100 cases of increasing complexity
1011    // ============================================================
1012
1013    // --- 1-10: Basic scalars and simple entries ---
1014
1015    #[test]
1016    fn fmt_001_bare_scalar() {
1017        insta::assert_snapshot!(format("foo bar"));
1018    }
1019
1020    #[test]
1021    fn fmt_002_quoted_scalar() {
1022        insta::assert_snapshot!(format(r#"foo "hello world""#));
1023    }
1024
1025    #[test]
1026    fn fmt_003_raw_scalar() {
1027        insta::assert_snapshot!(format(r#"path r"/usr/bin""#));
1028    }
1029
1030    #[test]
1031    fn fmt_004_multiple_entries() {
1032        insta::assert_snapshot!(format("foo bar\nbaz qux"));
1033    }
1034
1035    #[test]
1036    fn fmt_005_unit_tag() {
1037        insta::assert_snapshot!(format("empty @"));
1038    }
1039
1040    #[test]
1041    fn fmt_006_simple_tag() {
1042        insta::assert_snapshot!(format("type @string"));
1043    }
1044
1045    #[test]
1046    fn fmt_007_tag_with_scalar_payload() {
1047        insta::assert_snapshot!(format(r#"default @default("hello")"#));
1048    }
1049
1050    #[test]
1051    fn fmt_008_nested_tags() {
1052        insta::assert_snapshot!(format("type @optional(@string)"));
1053    }
1054
1055    #[test]
1056    fn fmt_009_deeply_nested_tags() {
1057        insta::assert_snapshot!(format("type @seq(@optional(@string))"));
1058    }
1059
1060    #[test]
1061    fn fmt_010_path_syntax() {
1062        insta::assert_snapshot!(format("limits cpu>500m memory>256Mi"));
1063    }
1064
1065    // --- 11-20: Inline objects ---
1066
1067    #[test]
1068    fn fmt_011_empty_inline_object() {
1069        insta::assert_snapshot!(format("config {}"));
1070    }
1071
1072    #[test]
1073    fn fmt_012_single_entry_inline_object() {
1074        insta::assert_snapshot!(format("config {name foo}"));
1075    }
1076
1077    #[test]
1078    fn fmt_013_multi_entry_inline_object() {
1079        insta::assert_snapshot!(format("point {x 1, y 2, z 3}"));
1080    }
1081
1082    #[test]
1083    fn fmt_014_nested_inline_objects() {
1084        insta::assert_snapshot!(format("outer {inner {value 42}}"));
1085    }
1086
1087    #[test]
1088    fn fmt_015_inline_object_with_tags() {
1089        insta::assert_snapshot!(format("schema {name @string, age @int}"));
1090    }
1091
1092    #[test]
1093    fn fmt_016_tag_with_inline_object_payload() {
1094        insta::assert_snapshot!(format("type @object{name @string}"));
1095    }
1096
1097    #[test]
1098    fn fmt_017_inline_object_no_commas() {
1099        // Parser might accept this - test what formatter does
1100        insta::assert_snapshot!(format("config {a 1 b 2}"));
1101    }
1102
1103    #[test]
1104    fn fmt_018_inline_object_mixed_separators() {
1105        insta::assert_snapshot!(format("config {a 1, b 2 c 3}"));
1106    }
1107
1108    #[test]
1109    fn fmt_019_deeply_nested_inline() {
1110        insta::assert_snapshot!(format("a {b {c {d {e 1}}}}"));
1111    }
1112
1113    #[test]
1114    fn fmt_020_inline_with_unit_values() {
1115        insta::assert_snapshot!(format("flags {debug @, verbose @}"));
1116    }
1117
1118    // --- 21-30: Block objects ---
1119
1120    #[test]
1121    fn fmt_021_simple_block_object() {
1122        insta::assert_snapshot!(format("config {\n  name foo\n  value bar\n}"));
1123    }
1124
1125    #[test]
1126    fn fmt_022_block_object_irregular_indent() {
1127        insta::assert_snapshot!(format("config {\n    name foo\n  value bar\n}"));
1128    }
1129
1130    #[test]
1131    fn fmt_023_nested_block_objects() {
1132        insta::assert_snapshot!(format("outer {\n  inner {\n    value 42\n  }\n}"));
1133    }
1134
1135    #[test]
1136    fn fmt_024_block_with_inline_child() {
1137        insta::assert_snapshot!(format("config {\n  point {x 1, y 2}\n  name foo\n}"));
1138    }
1139
1140    #[test]
1141    fn fmt_025_inline_with_block_child() {
1142        // Inline object containing a block - should this expand?
1143        insta::assert_snapshot!(format("config {nested {\n  a 1\n}}"));
1144    }
1145
1146    #[test]
1147    fn fmt_026_block_object_blank_lines() {
1148        insta::assert_snapshot!(format("config {\n  a 1\n\n  b 2\n}"));
1149    }
1150
1151    #[test]
1152    fn fmt_027_block_object_multiple_blank_lines() {
1153        insta::assert_snapshot!(format("config {\n  a 1\n\n\n\n  b 2\n}"));
1154    }
1155
1156    #[test]
1157    fn fmt_028_empty_block_object() {
1158        insta::assert_snapshot!(format("config {\n}"));
1159    }
1160
1161    #[test]
1162    fn fmt_029_block_single_entry() {
1163        insta::assert_snapshot!(format("config {\n  only_one value\n}"));
1164    }
1165
1166    #[test]
1167    fn fmt_030_mixed_block_inline_siblings() {
1168        insta::assert_snapshot!(format("a {x 1}\nb {\n  y 2\n}"));
1169    }
1170
1171    // --- 31-40: Sequences ---
1172
1173    #[test]
1174    fn fmt_031_empty_sequence() {
1175        insta::assert_snapshot!(format("items ()"));
1176    }
1177
1178    #[test]
1179    fn fmt_032_single_item_sequence() {
1180        insta::assert_snapshot!(format("items (one)"));
1181    }
1182
1183    #[test]
1184    fn fmt_033_multi_item_sequence() {
1185        insta::assert_snapshot!(format("items (a b c d e)"));
1186    }
1187
1188    #[test]
1189    fn fmt_034_nested_sequences() {
1190        insta::assert_snapshot!(format("matrix ((1 2) (3 4))"));
1191    }
1192
1193    #[test]
1194    fn fmt_035_sequence_of_objects() {
1195        insta::assert_snapshot!(format("points ({x 1} {x 2})"));
1196    }
1197
1198    #[test]
1199    fn fmt_036_block_sequence() {
1200        insta::assert_snapshot!(format("items (\n  a\n  b\n  c\n)"));
1201    }
1202
1203    #[test]
1204    fn fmt_037_sequence_with_trailing_newline() {
1205        insta::assert_snapshot!(format("items (a b c\n)"));
1206    }
1207
1208    #[test]
1209    fn fmt_038_tag_with_sequence_payload() {
1210        insta::assert_snapshot!(format("type @seq(a b c)"));
1211    }
1212
1213    #[test]
1214    fn fmt_039_tag_sequence_attached() {
1215        insta::assert_snapshot!(format("type @seq()"));
1216    }
1217
1218    #[test]
1219    fn fmt_040_tag_sequence_detached() {
1220        insta::assert_snapshot!(format("type @seq ()"));
1221    }
1222
1223    // --- 41-50: Comments ---
1224
1225    #[test]
1226    fn fmt_041_line_comment_before_entry() {
1227        insta::assert_snapshot!(format("// comment\nfoo bar"));
1228    }
1229
1230    #[test]
1231    fn fmt_042_doc_comment_before_entry() {
1232        insta::assert_snapshot!(format("/// doc comment\nfoo bar"));
1233    }
1234
1235    #[test]
1236    fn fmt_043_comment_inside_block_object() {
1237        insta::assert_snapshot!(format("config {\n  // comment\n  foo bar\n}"));
1238    }
1239
1240    #[test]
1241    fn fmt_044_doc_comment_inside_block_object() {
1242        insta::assert_snapshot!(format("config {\n  /// doc\n  foo bar\n}"));
1243    }
1244
1245    #[test]
1246    fn fmt_045_comment_between_entries() {
1247        insta::assert_snapshot!(format("config {\n  a 1\n  // middle\n  b 2\n}"));
1248    }
1249
1250    #[test]
1251    fn fmt_046_comment_at_end_of_object() {
1252        insta::assert_snapshot!(format("config {\n  a 1\n  // trailing\n}"));
1253    }
1254
1255    #[test]
1256    fn fmt_047_inline_object_with_doc_comment() {
1257        // Doc comment forces expansion
1258        insta::assert_snapshot!(format("config {/// doc\na 1, b 2}"));
1259    }
1260
1261    #[test]
1262    fn fmt_048_comment_in_sequence() {
1263        insta::assert_snapshot!(format("items (\n  // comment\n  a\n  b\n)"));
1264    }
1265
1266    #[test]
1267    fn fmt_049_multiple_comments_grouped() {
1268        insta::assert_snapshot!(format("config {\n  // first\n  // second\n  a 1\n}"));
1269    }
1270
1271    #[test]
1272    fn fmt_050_comments_with_blank_line_between() {
1273        insta::assert_snapshot!(format("config {\n  // group 1\n\n  // group 2\n  a 1\n}"));
1274    }
1275
1276    // --- 51-60: The problematic cases from styx extract ---
1277
1278    #[test]
1279    fn fmt_051_optional_with_newline_before_close() {
1280        // This is the minimal repro of the dibs issue
1281        insta::assert_snapshot!(format("foo @optional(@string\n)"));
1282    }
1283
1284    #[test]
1285    fn fmt_052_seq_with_newline_before_close() {
1286        insta::assert_snapshot!(format("foo @seq(@string\n)"));
1287    }
1288
1289    #[test]
1290    fn fmt_053_object_with_newline_before_close() {
1291        insta::assert_snapshot!(format("foo @object{a @string\n}"));
1292    }
1293
1294    #[test]
1295    fn fmt_054_deeply_nested_with_weird_breaks() {
1296        insta::assert_snapshot!(format("foo @optional(@object{a @seq(@string\n)\n})"));
1297    }
1298
1299    #[test]
1300    fn fmt_055_closing_delimiters_on_own_lines() {
1301        insta::assert_snapshot!(format("foo @a(@b{x 1\n}\n)"));
1302    }
1303
1304    #[test]
1305    fn fmt_056_inline_entries_one_has_doc_comment() {
1306        // If ANY entry has doc comment, whole object should be block
1307        insta::assert_snapshot!(format("config {a @unit, /// doc\nb @unit, c @unit}"));
1308    }
1309
1310    #[test]
1311    fn fmt_057_mixed_inline_block_with_doc() {
1312        insta::assert_snapshot!(format("schema {@ @object{a @unit, /// doc\nb @string}}"));
1313    }
1314
1315    #[test]
1316    fn fmt_058_tag_map_with_doc_comments() {
1317        insta::assert_snapshot!(format(
1318            "fields @map(@string@enum{/// variant a\na @unit, /// variant b\nb @unit})"
1319        ));
1320    }
1321
1322    #[test]
1323    fn fmt_059_nested_enums_with_docs() {
1324        insta::assert_snapshot!(format(
1325            "type @enum{/// first\na @object{/// inner\nx @int}, b @unit}"
1326        ));
1327    }
1328
1329    #[test]
1330    fn fmt_060_the_dibs_pattern() {
1331        // Simplified version of dibs schema structure
1332        insta::assert_snapshot!(format(
1333            r#"schema {@ @object{decls @map(@string@enum{
1334    /// A query
1335    query @object{
1336        params @optional(@object{params @map(@string@enum{uuid @unit, /// doc
1337            optional @seq(@type{name T})
1338        })})
1339    }
1340})}}"#
1341        ));
1342    }
1343
1344    // --- 61-70: Top-level spacing (issue #28) ---
1345
1346    #[test]
1347    fn fmt_061_two_inline_entries() {
1348        insta::assert_snapshot!(format("a 1\nb 2"));
1349    }
1350
1351    #[test]
1352    fn fmt_062_two_block_entries() {
1353        insta::assert_snapshot!(format("a {\n  x 1\n}\nb {\n  y 2\n}"));
1354    }
1355
1356    #[test]
1357    fn fmt_063_inline_then_block() {
1358        insta::assert_snapshot!(format("a 1\nb {\n  y 2\n}"));
1359    }
1360
1361    #[test]
1362    fn fmt_064_block_then_inline() {
1363        insta::assert_snapshot!(format("a {\n  x 1\n}\nb 2"));
1364    }
1365
1366    #[test]
1367    fn fmt_065_inline_inline_with_existing_blank() {
1368        insta::assert_snapshot!(format("a 1\n\nb 2"));
1369    }
1370
1371    #[test]
1372    fn fmt_066_three_entries_mixed() {
1373        insta::assert_snapshot!(format("a 1\nb {\n  x 1\n}\nc 3"));
1374    }
1375
1376    #[test]
1377    fn fmt_067_meta_then_schema_blocks() {
1378        insta::assert_snapshot!(format("meta {\n  id test\n}\nschema {\n  @ @string\n}"));
1379    }
1380
1381    #[test]
1382    fn fmt_068_doc_comment_entry_spacing() {
1383        insta::assert_snapshot!(format("/// doc for a\na 1\n/// doc for b\nb 2"));
1384    }
1385
1386    #[test]
1387    fn fmt_069_multiple_blocks_no_blanks() {
1388        insta::assert_snapshot!(format("a {\nx 1\n}\nb {\ny 2\n}\nc {\nz 3\n}"));
1389    }
1390
1391    #[test]
1392    fn fmt_070_schema_declaration_spacing() {
1393        insta::assert_snapshot!(format("@schema foo.styx\nname test"));
1394    }
1395
1396    // --- 71-80: Edge cases with tags ---
1397
1398    #[test]
1399    fn fmt_071_tag_chain() {
1400        insta::assert_snapshot!(format("type @optional @string"));
1401    }
1402
1403    #[test]
1404    fn fmt_072_tag_with_object_then_scalar() {
1405        insta::assert_snapshot!(format("type @default({x 1} @object{x @int})"));
1406    }
1407
1408    #[test]
1409    fn fmt_073_multiple_tags_same_entry() {
1410        insta::assert_snapshot!(format("field @deprecated @optional(@string)"));
1411    }
1412
1413    #[test]
1414    fn fmt_074_tag_payload_is_unit() {
1415        insta::assert_snapshot!(format("empty @some(@)"));
1416    }
1417
1418    #[test]
1419    fn fmt_075_tag_with_heredoc() {
1420        insta::assert_snapshot!(format("sql @raw(<<EOF\nSELECT *\nEOF)"));
1421    }
1422
1423    #[test]
1424    fn fmt_076_tag_payload_sequence_of_tags() {
1425        insta::assert_snapshot!(format("types @union(@string @int @bool)"));
1426    }
1427
1428    #[test]
1429    fn fmt_077_tag_map_compact() {
1430        insta::assert_snapshot!(format("fields @map(@string@int)"));
1431    }
1432
1433    #[test]
1434    fn fmt_078_tag_map_with_complex_value() {
1435        insta::assert_snapshot!(format("fields @map(@string@object{x @int, y @int})"));
1436    }
1437
1438    #[test]
1439    fn fmt_079_tag_type_reference() {
1440        insta::assert_snapshot!(format("field @type{name MyType}"));
1441    }
1442
1443    #[test]
1444    fn fmt_080_tag_default_with_at() {
1445        insta::assert_snapshot!(format("opt @default(@ @optional(@string))"));
1446    }
1447
1448    // --- 81-90: Heredocs ---
1449
1450    #[test]
1451    fn fmt_081_simple_heredoc() {
1452        insta::assert_snapshot!(format("text <<EOF\nhello\nworld\nEOF"));
1453    }
1454
1455    #[test]
1456    fn fmt_082_heredoc_in_object() {
1457        insta::assert_snapshot!(format("config {\n  sql <<SQL\nSELECT *\nSQL\n}"));
1458    }
1459
1460    #[test]
1461    fn fmt_083_heredoc_indented_content() {
1462        insta::assert_snapshot!(format("code <<END\n  indented\n    more\nEND"));
1463    }
1464
1465    #[test]
1466    fn fmt_084_multiple_heredocs() {
1467        insta::assert_snapshot!(format("a <<A\nfirst\nA\nb <<B\nsecond\nB"));
1468    }
1469
1470    #[test]
1471    fn fmt_085_heredoc_empty() {
1472        insta::assert_snapshot!(format("empty <<EOF\nEOF"));
1473    }
1474
1475    // --- 86-90: Quoted strings edge cases ---
1476
1477    #[test]
1478    fn fmt_086_quoted_with_escapes() {
1479        insta::assert_snapshot!(format(r#"msg "hello\nworld\ttab""#));
1480    }
1481
1482    #[test]
1483    fn fmt_087_quoted_with_quotes() {
1484        insta::assert_snapshot!(format(r#"msg "say \"hello\"""#));
1485    }
1486
1487    #[test]
1488    fn fmt_088_raw_string_with_hashes() {
1489        insta::assert_snapshot!(format(r##"pattern r#"foo"bar"#"##));
1490    }
1491
1492    #[test]
1493    fn fmt_089_quoted_empty() {
1494        insta::assert_snapshot!(format(r#"empty """#));
1495    }
1496
1497    #[test]
1498    fn fmt_090_mixed_scalar_types() {
1499        insta::assert_snapshot!(format(r#"config {bare word, quoted "str", raw r"path"}"#));
1500    }
1501
1502    // --- 91-100: Complex real-world-like structures ---
1503
1504    #[test]
1505    fn fmt_091_schema_with_meta() {
1506        insta::assert_snapshot!(format(
1507            r#"meta {id "app:config@1", cli myapp}
1508schema {@ @object{
1509    name @string
1510    port @default(8080 @int)
1511}}"#
1512        ));
1513    }
1514
1515    #[test]
1516    fn fmt_092_enum_with_object_variants() {
1517        insta::assert_snapshot!(format(
1518            r#"type @enum{
1519    /// A simple variant
1520    simple @unit
1521    /// Complex variant
1522    complex @object{x @int, y @int}
1523}"#
1524        ));
1525    }
1526
1527    #[test]
1528    fn fmt_093_nested_optionals() {
1529        insta::assert_snapshot!(format("type @optional(@optional(@optional(@string)))"));
1530    }
1531
1532    #[test]
1533    fn fmt_094_map_of_maps() {
1534        insta::assert_snapshot!(format("data @map(@string@map(@string@int))"));
1535    }
1536
1537    #[test]
1538    fn fmt_095_sequence_of_enums() {
1539        insta::assert_snapshot!(format("items @seq(@enum{a @unit, b @unit, c @unit})"));
1540    }
1541
1542    #[test]
1543    fn fmt_096_all_builtin_types() {
1544        insta::assert_snapshot!(format(
1545            "types {s @string, i @int, b @bool, f @float, u @unit}"
1546        ));
1547    }
1548
1549    #[test]
1550    fn fmt_097_deep_nesting_mixed() {
1551        insta::assert_snapshot!(format(
1552            "a @object{b @seq(@enum{c @object{d @optional(@map(@string@int))}})}"
1553        ));
1554    }
1555
1556    #[test]
1557    fn fmt_098_realistic_config_schema() {
1558        insta::assert_snapshot!(format(
1559            r#"meta {id "crate:myapp@1", cli myapp, description "My application config"}
1560schema {@ @object{
1561    /// Server configuration
1562    server @object{
1563        /// Hostname to bind
1564        host @default("localhost" @string)
1565        /// Port number
1566        port @default(8080 @int)
1567    }
1568    /// Database settings
1569    database @optional(@object{
1570        url @string
1571        pool_size @default(10 @int)
1572    })
1573}}"#
1574        ));
1575    }
1576
1577    #[test]
1578    fn fmt_099_attributes_syntax() {
1579        insta::assert_snapshot!(format("resource limits>cpu>500m limits>memory>256Mi"));
1580    }
1581
1582    #[test]
1583    fn fmt_100_everything_combined() {
1584        insta::assert_snapshot!(format(
1585            r#"// Top level comment
1586meta {id "test@1"}
1587
1588/// Schema documentation
1589schema {@ @object{
1590    /// A string field
1591    name @string
1592
1593    /// An enum with variants
1594    kind @enum{
1595        /// Simple kind
1596        simple @unit
1597        /// Complex kind
1598        complex @object{
1599            /// Nested value
1600            value @optional(@int)
1601        }
1602    }
1603
1604    /// A sequence
1605    items @seq(@string)
1606
1607    /// A map
1608    data @map(@string@object{x @int, y @int})
1609}}"#
1610        ));
1611    }
1612}
1613
1614#[cfg(test)]
1615mod proptests {
1616    use super::*;
1617    use proptest::prelude::*;
1618
1619    /// Generate a valid bare scalar (no special chars)
1620    fn bare_scalar() -> impl Strategy<Value = String> {
1621        // Start with letter, then alphanumeric + some allowed chars
1622        prop::string::string_regex("[a-zA-Z][a-zA-Z0-9_-]{0,10}")
1623            .unwrap()
1624            .prop_filter("non-empty", |s| !s.is_empty())
1625    }
1626
1627    /// Generate a quoted scalar with potential escape sequences
1628    fn quoted_scalar() -> impl Strategy<Value = String> {
1629        prop_oneof![
1630            // Simple quoted string
1631            prop::string::string_regex(r#"[a-zA-Z0-9 _-]{0,20}"#)
1632                .unwrap()
1633                .prop_map(|s| format!("\"{}\"", s)),
1634            // With common escapes
1635            prop::string::string_regex(r#"[a-zA-Z0-9 ]{0,10}"#)
1636                .unwrap()
1637                .prop_map(|s| format!("\"hello\\n{}\\t\"", s)),
1638        ]
1639    }
1640
1641    /// Generate a raw scalar (r"..." or r#"..."#)
1642    fn raw_scalar() -> impl Strategy<Value = String> {
1643        prop_oneof![
1644            // Simple raw string
1645            prop::string::string_regex(r#"[a-zA-Z0-9/_\\.-]{0,15}"#)
1646                .unwrap()
1647                .prop_map(|s| format!("r\"{}\"", s)),
1648            // Raw string with # delimiters (can contain quotes)
1649            prop::string::string_regex(r#"[a-zA-Z0-9 "/_\\.-]{0,15}"#)
1650                .unwrap()
1651                .prop_map(|s| format!("r#\"{}\"#", s)),
1652        ]
1653    }
1654
1655    /// Generate a scalar (bare, quoted, or raw)
1656    fn scalar() -> impl Strategy<Value = String> {
1657        prop_oneof![
1658            4 => bare_scalar(),
1659            3 => quoted_scalar(),
1660            1 => raw_scalar(),
1661        ]
1662    }
1663
1664    /// Generate a tag name
1665    fn tag_name() -> impl Strategy<Value = String> {
1666        prop::string::string_regex("[a-zA-Z][a-zA-Z0-9_-]{0,8}")
1667            .unwrap()
1668            .prop_filter("non-empty", |s| !s.is_empty())
1669    }
1670
1671    /// Generate a tag (@name or @name with payload)
1672    /// Per grammar: TagPayload must be immediately attached (no whitespace)
1673    /// TagPayload ::= Object | Sequence | QuotedScalar | RawScalar | HeredocScalar | '@'
1674    /// Note: BareScalar is NOT a valid TagPayload
1675    fn tag() -> impl Strategy<Value = String> {
1676        prop_oneof![
1677            // Unit tag (just @)
1678            Just("@".to_string()),
1679            // Simple tag (no payload)
1680            tag_name().prop_map(|n| format!("@{n}")),
1681            // Tag with sequence payload (must be attached, no space)
1682            (tag_name(), flat_sequence()).prop_map(|(n, s)| format!("@{n}{s}")),
1683            // Tag with inline object payload (must be attached, no space)
1684            (tag_name(), inline_object()).prop_map(|(n, o)| format!("@{n}{o}")),
1685            // Tag with quoted scalar payload (must be attached, no space)
1686            (tag_name(), quoted_scalar()).prop_map(|(n, q)| format!("@{n}{q}")),
1687            // Tag with unit payload
1688            (tag_name()).prop_map(|n| format!("@{n} @")),
1689        ]
1690    }
1691
1692    /// Generate an attribute (key>value)
1693    fn attribute() -> impl Strategy<Value = String> {
1694        (bare_scalar(), scalar()).prop_map(|(k, v)| format!("{k}>{v}"))
1695    }
1696
1697    /// Generate a flat sequence of scalars (no nesting)
1698    fn flat_sequence() -> impl Strategy<Value = String> {
1699        prop::collection::vec(scalar(), 0..5).prop_map(|items| {
1700            if items.is_empty() {
1701                "()".to_string()
1702            } else {
1703                format!("({})", items.join(" "))
1704            }
1705        })
1706    }
1707
1708    /// Generate a nested sequence like ((a b) (c d))
1709    fn nested_sequence() -> impl Strategy<Value = String> {
1710        prop::collection::vec(flat_sequence(), 1..4)
1711            .prop_map(|seqs| format!("({})", seqs.join(" ")))
1712    }
1713
1714    /// Generate a sequence (flat or nested)
1715    fn sequence() -> impl Strategy<Value = String> {
1716        prop_oneof![
1717            3 => flat_sequence(),
1718            1 => nested_sequence(),
1719        ]
1720    }
1721
1722    /// Generate an inline object {key value, ...}
1723    fn inline_object() -> impl Strategy<Value = String> {
1724        prop::collection::vec((bare_scalar(), scalar()), 0..4).prop_map(|entries| {
1725            if entries.is_empty() {
1726                "{}".to_string()
1727            } else {
1728                let inner: Vec<String> = entries
1729                    .into_iter()
1730                    .map(|(k, v)| format!("{k} {v}"))
1731                    .collect();
1732                format!("{{{}}}", inner.join(", "))
1733            }
1734        })
1735    }
1736
1737    /// Generate a multiline object
1738    fn multiline_object() -> impl Strategy<Value = String> {
1739        prop::collection::vec((bare_scalar(), scalar()), 1..4).prop_map(|entries| {
1740            let inner: Vec<String> = entries
1741                .into_iter()
1742                .map(|(k, v)| format!("  {k} {v}"))
1743                .collect();
1744            format!("{{\n{}\n}}", inner.join("\n"))
1745        })
1746    }
1747
1748    /// Generate a line comment
1749    fn line_comment() -> impl Strategy<Value = String> {
1750        prop::string::string_regex("[a-zA-Z0-9 _-]{0,30}")
1751            .unwrap()
1752            .prop_map(|s| format!("// {}", s.trim()))
1753    }
1754
1755    /// Generate a doc comment
1756    fn doc_comment() -> impl Strategy<Value = String> {
1757        prop::string::string_regex("[a-zA-Z0-9 _-]{0,30}")
1758            .unwrap()
1759            .prop_map(|s| format!("/// {}", s.trim()))
1760    }
1761
1762    /// Generate a heredoc
1763    fn heredoc() -> impl Strategy<Value = String> {
1764        let delimiters = prop_oneof![
1765            Just("EOF".to_string()),
1766            Just("END".to_string()),
1767            Just("TEXT".to_string()),
1768            Just("CODE".to_string()),
1769        ];
1770        let content = prop::string::string_regex("[a-zA-Z0-9 \n_.-]{0,50}").unwrap();
1771        let lang_hint = prop_oneof![
1772            Just("".to_string()),
1773            Just(",txt".to_string()),
1774            Just(",rust".to_string()),
1775        ];
1776        (delimiters, content, lang_hint)
1777            .prop_map(|(delim, content, hint)| format!("<<{delim}{hint}\n{content}\n{delim}"))
1778    }
1779
1780    /// Generate a simple value (scalar, sequence, or attributes)
1781    fn simple_value() -> impl Strategy<Value = String> {
1782        prop_oneof![
1783            3 => scalar(),
1784            2 => sequence(),
1785            2 => tag(),
1786            1 => inline_object(),
1787            1 => multiline_object(),
1788            1 => heredoc(),
1789            // Multiple attributes (path syntax)
1790            1 => prop::collection::vec(attribute(), 1..4).prop_map(|attrs| attrs.join(" ")),
1791        ]
1792    }
1793
1794    /// Generate a simple entry (key value)
1795    fn entry() -> impl Strategy<Value = String> {
1796        prop_oneof![
1797            // Regular entry
1798            (bare_scalar(), simple_value()).prop_map(|(k, v)| format!("{k} {v}")),
1799            // Tag as key
1800            (tag(), simple_value()).prop_map(|(t, v)| format!("{t} {v}")),
1801        ]
1802    }
1803
1804    /// Generate an entry optionally preceded by a comment
1805    fn commented_entry() -> impl Strategy<Value = String> {
1806        prop_oneof![
1807            3 => entry(),
1808            1 => (doc_comment(), entry()).prop_map(|(c, e)| format!("{c}\n{e}")),
1809            1 => (line_comment(), entry()).prop_map(|(c, e)| format!("{c}\n{e}")),
1810        ]
1811    }
1812
1813    /// Generate a simple document (multiple entries)
1814    fn document() -> impl Strategy<Value = String> {
1815        prop::collection::vec(commented_entry(), 1..5).prop_map(|entries| entries.join("\n"))
1816    }
1817
1818    /// Generate a deeply nested object (recursive)
1819    fn deep_object(depth: usize) -> BoxedStrategy<String> {
1820        if depth == 0 {
1821            scalar().boxed()
1822        } else {
1823            prop_oneof![
1824                // Scalar leaf
1825                2 => scalar(),
1826                // Nested object
1827                1 => prop::collection::vec(
1828                    (bare_scalar(), deep_object(depth - 1)),
1829                    1..3
1830                ).prop_map(|entries| {
1831                    let inner: Vec<String> = entries.into_iter()
1832                        .map(|(k, v)| format!("  {k} {v}"))
1833                        .collect();
1834                    format!("{{\n{}\n}}", inner.join("\n"))
1835                }),
1836            ]
1837            .boxed()
1838        }
1839    }
1840
1841    /// Generate a sequence containing tags
1842    fn sequence_of_tags() -> impl Strategy<Value = String> {
1843        prop::collection::vec(tag(), 1..5).prop_map(|tags| format!("({})", tags.join(" ")))
1844    }
1845
1846    /// Generate an object with sequence values
1847    fn object_with_sequences() -> impl Strategy<Value = String> {
1848        prop::collection::vec((bare_scalar(), flat_sequence()), 1..4).prop_map(|entries| {
1849            let inner: Vec<String> = entries
1850                .into_iter()
1851                .map(|(k, v)| format!("  {k} {v}"))
1852                .collect();
1853            format!("{{\n{}\n}}", inner.join("\n"))
1854        })
1855    }
1856
1857    /// Strip spans from a value tree for comparison (spans change after formatting)
1858    fn strip_spans(value: &mut styx_tree::Value) {
1859        value.span = None;
1860        if let Some(ref mut tag) = value.tag {
1861            tag.span = None;
1862        }
1863        if let Some(ref mut payload) = value.payload {
1864            match payload {
1865                styx_tree::Payload::Scalar(s) => s.span = None,
1866                styx_tree::Payload::Sequence(seq) => {
1867                    seq.span = None;
1868                    for item in &mut seq.items {
1869                        strip_spans(item);
1870                    }
1871                }
1872                styx_tree::Payload::Object(obj) => {
1873                    obj.span = None;
1874                    for entry in &mut obj.entries {
1875                        strip_spans(&mut entry.key);
1876                        strip_spans(&mut entry.value);
1877                    }
1878                }
1879            }
1880        }
1881    }
1882
1883    /// Parse source into a comparable tree (spans stripped)
1884    fn parse_to_tree(source: &str) -> Option<styx_tree::Value> {
1885        let mut value = styx_tree::parse(source).ok()?;
1886        strip_spans(&mut value);
1887        Some(value)
1888    }
1889
1890    proptest! {
1891        /// Formatting must preserve document semantics
1892        #[test]
1893        fn format_preserves_semantics(input in document()) {
1894            let tree1 = parse_to_tree(&input);
1895
1896            // Skip if original doesn't parse (shouldn't happen with our generator)
1897            if tree1.is_none() {
1898                return Ok(());
1899            }
1900            let tree1 = tree1.unwrap();
1901
1902            let formatted = format_source(&input, FormatOptions::default());
1903            let tree2 = parse_to_tree(&formatted);
1904
1905            prop_assert!(
1906                tree2.is_some(),
1907                "Formatted output should parse. Input:\n{}\nFormatted:\n{}",
1908                input,
1909                formatted
1910            );
1911            let tree2 = tree2.unwrap();
1912
1913            prop_assert_eq!(
1914                tree1,
1915                tree2,
1916                "Formatting changed semantics!\nInput:\n{}\nFormatted:\n{}",
1917                input,
1918                formatted
1919            );
1920        }
1921
1922        /// Formatting should be idempotent
1923        #[test]
1924        fn format_is_idempotent(input in document()) {
1925            let once = format_source(&input, FormatOptions::default());
1926            let twice = format_source(&once, FormatOptions::default());
1927
1928            prop_assert_eq!(
1929                &once,
1930                &twice,
1931                "Formatting is not idempotent!\nInput:\n{}\nOnce:\n{}\nTwice:\n{}",
1932                input,
1933                &once,
1934                &twice
1935            );
1936        }
1937
1938        /// Deeply nested objects should format correctly
1939        #[test]
1940        fn format_deep_objects(key in bare_scalar(), value in deep_object(4)) {
1941            let input = format!("{key} {value}");
1942            let tree1 = parse_to_tree(&input);
1943
1944            if tree1.is_none() {
1945                return Ok(());
1946            }
1947            let tree1 = tree1.unwrap();
1948
1949            let formatted = format_source(&input, FormatOptions::default());
1950            let tree2 = parse_to_tree(&formatted);
1951
1952            prop_assert!(
1953                tree2.is_some(),
1954                "Deep object should parse after formatting. Input:\n{}\nFormatted:\n{}",
1955                input,
1956                formatted
1957            );
1958
1959            prop_assert_eq!(
1960                tree1,
1961                tree2.unwrap(),
1962                "Deep object semantics changed!\nInput:\n{}\nFormatted:\n{}",
1963                input,
1964                formatted
1965            );
1966        }
1967
1968        /// Sequences of tags should format correctly
1969        #[test]
1970        fn format_sequence_of_tags(key in bare_scalar(), seq in sequence_of_tags()) {
1971            let input = format!("{key} {seq}");
1972            let tree1 = parse_to_tree(&input);
1973
1974            if tree1.is_none() {
1975                return Ok(());
1976            }
1977            let tree1 = tree1.unwrap();
1978
1979            let formatted = format_source(&input, FormatOptions::default());
1980            let tree2 = parse_to_tree(&formatted);
1981
1982            prop_assert!(
1983                tree2.is_some(),
1984                "Tag sequence should parse. Input:\n{}\nFormatted:\n{}",
1985                input,
1986                formatted
1987            );
1988
1989            prop_assert_eq!(
1990                tree1,
1991                tree2.unwrap(),
1992                "Tag sequence semantics changed!\nInput:\n{}\nFormatted:\n{}",
1993                input,
1994                formatted
1995            );
1996        }
1997
1998        /// Objects containing sequences should format correctly
1999        #[test]
2000        fn format_objects_with_sequences(key in bare_scalar(), obj in object_with_sequences()) {
2001            let input = format!("{key} {obj}");
2002            let tree1 = parse_to_tree(&input);
2003
2004            if tree1.is_none() {
2005                return Ok(());
2006            }
2007            let tree1 = tree1.unwrap();
2008
2009            let formatted = format_source(&input, FormatOptions::default());
2010            let tree2 = parse_to_tree(&formatted);
2011
2012            prop_assert!(
2013                tree2.is_some(),
2014                "Object with sequences should parse. Input:\n{}\nFormatted:\n{}",
2015                input,
2016                formatted
2017            );
2018
2019            prop_assert_eq!(
2020                tree1,
2021                tree2.unwrap(),
2022                "Object with sequences semantics changed!\nInput:\n{}\nFormatted:\n{}",
2023                input,
2024                formatted
2025            );
2026        }
2027
2028        /// Formatting must preserve all comments (line and doc comments)
2029        #[test]
2030        fn format_preserves_comments(input in document_with_comments()) {
2031            let original_comments = extract_comments(&input);
2032
2033            // Skip if no comments (not interesting for this test)
2034            if original_comments.is_empty() {
2035                return Ok(());
2036            }
2037
2038            let formatted = format_source(&input, FormatOptions::default());
2039            let formatted_comments = extract_comments(&formatted);
2040
2041            prop_assert_eq!(
2042                original_comments.len(),
2043                formatted_comments.len(),
2044                "Comment count changed!\nInput ({} comments):\n{}\nFormatted ({} comments):\n{}\nOriginal comments: {:?}\nFormatted comments: {:?}",
2045                original_comments.len(),
2046                input,
2047                formatted_comments.len(),
2048                formatted,
2049                original_comments,
2050                formatted_comments
2051            );
2052
2053            // Check that each comment text is preserved (order may change slightly due to formatting)
2054            for comment in &original_comments {
2055                prop_assert!(
2056                    formatted_comments.contains(comment),
2057                    "Comment lost during formatting!\nMissing: {:?}\nInput:\n{}\nFormatted:\n{}\nOriginal comments: {:?}\nFormatted comments: {:?}",
2058                    comment,
2059                    input,
2060                    formatted,
2061                    original_comments,
2062                    formatted_comments
2063                );
2064            }
2065        }
2066
2067        /// Objects with only comments should preserve them
2068        #[test]
2069        fn format_preserves_comments_in_empty_objects(
2070            key in bare_scalar(),
2071            comments in prop::collection::vec(line_comment(), 1..5)
2072        ) {
2073            let inner = comments.iter()
2074                .map(|c| format!("    {c}"))
2075                .collect::<Vec<_>>()
2076                .join("\n");
2077            let input = format!("{key} {{\n{inner}\n}}");
2078
2079            let original_comments = extract_comments(&input);
2080            let formatted = format_source(&input, FormatOptions::default());
2081            let formatted_comments = extract_comments(&formatted);
2082
2083            prop_assert_eq!(
2084                original_comments.len(),
2085                formatted_comments.len(),
2086                "Comments in empty object lost!\nInput:\n{}\nFormatted:\n{}",
2087                input,
2088                formatted
2089            );
2090        }
2091
2092        /// Objects with mixed entries and comments should preserve all comments
2093        #[test]
2094        fn format_preserves_comments_mixed_with_entries(
2095            key in bare_scalar(),
2096            items in prop::collection::vec(
2097                prop_oneof![
2098                    // Entry
2099                    (bare_scalar(), scalar()).prop_map(|(k, v)| format!("{k} {v}")),
2100                    // Comment
2101                    line_comment(),
2102                ],
2103                2..6
2104            )
2105        ) {
2106            let inner = items.iter()
2107                .map(|item| format!("    {item}"))
2108                .collect::<Vec<_>>()
2109                .join("\n");
2110            let input = format!("{key} {{\n{inner}\n}}");
2111
2112            let original_comments = extract_comments(&input);
2113            let formatted = format_source(&input, FormatOptions::default());
2114            let formatted_comments = extract_comments(&formatted);
2115
2116            prop_assert_eq!(
2117                original_comments.len(),
2118                formatted_comments.len(),
2119                "Comments mixed with entries lost!\nInput:\n{}\nFormatted:\n{}\nOriginal: {:?}\nFormatted: {:?}",
2120                input,
2121                formatted,
2122                original_comments,
2123                formatted_comments
2124            );
2125        }
2126
2127        /// Sequences with comments should preserve them
2128        #[test]
2129        fn format_preserves_comments_in_sequences(
2130            key in bare_scalar(),
2131            items in prop::collection::vec(
2132                prop_oneof![
2133                    // Scalar item
2134                    2 => scalar(),
2135                    // Comment
2136                    1 => line_comment(),
2137                ],
2138                2..6
2139            )
2140        ) {
2141            // Only create multiline sequence if we have comments
2142            let has_comment = items.iter().any(|i| i.starts_with("//"));
2143            if !has_comment {
2144                return Ok(());
2145            }
2146
2147            let inner = items.iter()
2148                .map(|item| format!("    {item}"))
2149                .collect::<Vec<_>>()
2150                .join("\n");
2151            let input = format!("{key} (\n{inner}\n)");
2152
2153            let original_comments = extract_comments(&input);
2154            let formatted = format_source(&input, FormatOptions::default());
2155            let formatted_comments = extract_comments(&formatted);
2156
2157            prop_assert_eq!(
2158                original_comments.len(),
2159                formatted_comments.len(),
2160                "Comments in sequence lost!\nInput:\n{}\nFormatted:\n{}\nOriginal: {:?}\nFormatted: {:?}",
2161                input,
2162                formatted,
2163                original_comments,
2164                formatted_comments
2165            );
2166        }
2167    }
2168
2169    /// Generate a document that definitely contains comments in various positions
2170    fn document_with_comments() -> impl Strategy<Value = String> {
2171        prop::collection::vec(
2172            prop_oneof![
2173                // Regular entry
2174                2 => entry(),
2175                // Entry preceded by comment
2176                2 => (line_comment(), entry()).prop_map(|(c, e)| format!("{c}\n{e}")),
2177                // Entry preceded by doc comment
2178                1 => (doc_comment(), entry()).prop_map(|(c, e)| format!("{c}\n{e}")),
2179                // Object with comments inside
2180                1 => object_with_internal_comments(),
2181            ],
2182            1..5,
2183        )
2184        .prop_map(|entries| entries.join("\n"))
2185    }
2186
2187    /// Generate an object that has comments inside it
2188    fn object_with_internal_comments() -> impl Strategy<Value = String> {
2189        (
2190            bare_scalar(),
2191            prop::collection::vec(
2192                prop_oneof![
2193                    // Entry
2194                    2 => (bare_scalar(), scalar()).prop_map(|(k, v)| format!("{k} {v}")),
2195                    // Comment
2196                    1 => line_comment(),
2197                ],
2198                1..5,
2199            ),
2200        )
2201            .prop_map(|(key, items)| {
2202                let inner = items
2203                    .iter()
2204                    .map(|item| format!("    {item}"))
2205                    .collect::<Vec<_>>()
2206                    .join("\n");
2207                format!("{key} {{\n{inner}\n}}")
2208            })
2209    }
2210
2211    /// Extract all comments from source text (both line and doc comments)
2212    fn extract_comments(source: &str) -> Vec<String> {
2213        let mut comments = Vec::new();
2214        for line in source.lines() {
2215            let trimmed = line.trim();
2216            if trimmed.starts_with("///") || trimmed.starts_with("//") {
2217                comments.push(trimmed.to_string());
2218            }
2219        }
2220        comments
2221    }
2222}
styx_format/cst_format.rs

styx_format/
cst_format.rs