Skip to main content

aver/main/
format_cmd.rs

1use std::collections::HashMap;
2use std::fs;
3use std::path::{Path, PathBuf};
4use std::process;
5
6use aver::ast::TopLevel;
7use aver::diagnostics::model::AnalysisReport;
8use aver::diagnostics::needs_format_diagnostic;
9use aver::lexer::Lexer;
10use aver::parser::Parser;
11use aver::types::{Type, parse_type_str_strict};
12use colored::Colorize;
13
14#[allow(dead_code)]
15pub(super) fn cmd_format(path: &str, check: bool, json: bool) {
16    // JSON mode implies --check: it's a report of diffs, never writes.
17    let check = check || json;
18
19    let root = Path::new(path);
20    let mut files = Vec::new();
21    if let Err(e) = collect_av_files(root, &mut files) {
22        if json {
23            emit_fatal_json("cannot-collect", &e);
24        } else {
25            eprintln!("{}", e.red());
26        }
27        process::exit(1);
28    }
29    files.sort();
30
31    if files.is_empty() {
32        let msg = format!("No .av files found under '{}'", root.display());
33        if json {
34            emit_fatal_json("no-files", &msg);
35        } else {
36            eprintln!("{}", msg.red());
37        }
38        process::exit(1);
39    }
40
41    // Keep original source + formatter violations so JSON/tty modes can
42    // render precise per-rule diagnostics via the canonical factory.
43    struct Changed {
44        path: PathBuf,
45        original: String,
46        violations: Vec<aver::diagnostics::model::FormatViolation>,
47    }
48    let mut changed: Vec<Changed> = Vec::new();
49
50    for file in &files {
51        let src = match fs::read_to_string(file) {
52            Ok(s) => s,
53            Err(e) => {
54                let msg = format!("Cannot read '{}': {}", file.display(), e);
55                if json {
56                    emit_fatal_json("read-failed", &msg);
57                } else {
58                    eprintln!("{}", msg.red());
59                }
60                process::exit(1);
61            }
62        };
63        let (formatted, violations) = match try_format_source(&src) {
64            Ok(pair) => pair,
65            Err(e) => {
66                let msg = format!("Cannot format '{}': {}", file.display(), e);
67                if json {
68                    emit_fatal_json("format-failed", &msg);
69                } else {
70                    eprintln!("{}", msg.red());
71                }
72                process::exit(1);
73            }
74        };
75        if formatted != src {
76            if !check && let Err(e) = fs::write(file, &formatted) {
77                eprintln!(
78                    "{}",
79                    format!("Cannot write '{}': {}", file.display(), e).red()
80                );
81                process::exit(1);
82            }
83            changed.push(Changed {
84                path: file.clone(),
85                original: src,
86                violations,
87            });
88        }
89    }
90
91    if json {
92        for c in &changed {
93            let file_label = c.path.display().to_string();
94            let diag = needs_format_diagnostic(&file_label, &c.violations, &c.original);
95            let report = AnalysisReport::with_diagnostics(file_label, vec![diag]);
96            println!("{}", report.to_json());
97        }
98        println!(
99            "{{\"schema_version\":1,\"kind\":\"summary\",\"files\":{},\"format\":{{\"clean\":{},\"needs_format\":{}}}}}",
100            files.len(),
101            files.len() - changed.len(),
102            changed.len()
103        );
104        if !changed.is_empty() {
105            process::exit(1);
106        }
107        return;
108    }
109
110    if check {
111        if changed.is_empty() {
112            println!("{}", "Format check passed".green());
113            return;
114        }
115        for (i, c) in changed.iter().enumerate() {
116            if i > 0 {
117                println!();
118            }
119            let file_label = c.path.display().to_string();
120            let diag = needs_format_diagnostic(&file_label, &c.violations, &c.original);
121            // verbose=true so violation regions render in tty too —
122            // parity with `--check --json` consumers.
123            print!("{}", aver::tty_render::render_tty(&diag, true));
124        }
125        println!();
126        println!(
127            "{}: {} file(s) need formatting",
128            "Format check failed".red(),
129            changed.len()
130        );
131        process::exit(1);
132    }
133
134    if changed.is_empty() {
135        println!("{}", "Already formatted".green());
136    } else {
137        for c in &changed {
138            println!("{} {}", "formatted".green(), c.path.display());
139        }
140        println!("{}", format!("Formatted {} file(s)", changed.len()).green());
141    }
142}
143
144fn emit_fatal_json(kind: &str, message: &str) {
145    use aver::diagnostics::json_escape;
146    println!(
147        "{{\"schema_version\":1,\"kind\":\"file-error\",\"error_kind\":\"{}\",\"error\":{}}}",
148        kind,
149        json_escape(message)
150    );
151}
152
153#[allow(dead_code)]
154fn collect_av_files(path: &Path, out: &mut Vec<PathBuf>) -> Result<(), String> {
155    if !path.exists() {
156        return Err(format!("Path '{}' does not exist", path.display()));
157    }
158
159    if path.is_file() {
160        if is_av_file(path) {
161            out.push(path.to_path_buf());
162            return Ok(());
163        }
164        return Err(format!("'{}' is not an .av file", path.display()));
165    }
166
167    let entries = fs::read_dir(path)
168        .map_err(|e| format!("Cannot read directory '{}': {}", path.display(), e))?;
169    for entry_res in entries {
170        let entry = entry_res
171            .map_err(|e| format!("Cannot read directory entry in '{}': {}", path.display(), e))?;
172        let p = entry.path();
173        if p.is_dir() {
174            collect_av_files(&p, out)?;
175        } else if is_av_file(&p) {
176            out.push(p);
177        }
178    }
179    Ok(())
180}
181
182#[allow(dead_code)]
183fn is_av_file(path: &Path) -> bool {
184    path.extension().and_then(|e| e.to_str()) == Some("av")
185}
186
187/// Normalize a single line's leading indent: expand tabs to 4 spaces,
188/// strip trailing whitespace implicitly by collapsing empty content.
189///
190/// Returns the rewritten line plus an optional violation if the input
191/// carried mixed / tab-based indent. The caller supplies `source_line`
192/// so the violation can point back to the original source.
193fn normalize_leading_indent_tracked(
194    line: &str,
195    source_line: usize,
196) -> (String, Option<aver::diagnostics::model::FormatViolation>) {
197    let mut end = 0usize;
198    for (idx, ch) in line.char_indices() {
199        if ch == ' ' || ch == '\t' {
200            end = idx + ch.len_utf8();
201        } else {
202            break;
203        }
204    }
205
206    let (indent, rest) = line.split_at(end);
207    if rest.is_empty() {
208        // Line was only whitespace: not a format violation — formatter
209        // collapses to empty regardless of what the user typed.
210        return (String::new(), None);
211    }
212
213    let had_tab = indent.contains('\t');
214    let mut out = String::new();
215    for ch in indent.chars() {
216        if ch == '\t' {
217            out.push_str("    ");
218        } else {
219            out.push(ch);
220        }
221    }
222    out.push_str(rest);
223
224    let violation = if had_tab {
225        Some(aver::diagnostics::model::FormatViolation {
226            line: source_line,
227            col: 1,
228            rule: "tab-indent",
229            message: "tab in leading indent; formatter expands to 4 spaces".to_string(),
230            before: Some(indent.replace('\t', "\\t")),
231            after: Some(indent.replace('\t', "    ")),
232        })
233    } else {
234        None
235    };
236
237    (out, violation)
238}
239
240fn effect_namespace(effect: &str) -> &str {
241    match effect.split_once('.') {
242        Some((namespace, _)) => namespace,
243        None => effect,
244    }
245}
246
247fn sorted_effects(effects: &[String]) -> Vec<String> {
248    let mut sorted = effects.to_vec();
249    sorted.sort();
250    sorted
251}
252
253fn format_block_effect_declaration(indent: &str, effects: &[String]) -> Vec<String> {
254    let effects = sorted_effects(effects);
255    let inline = format!("{}! [{}]", indent, effects.join(", "));
256    if inline.len() <= 100 {
257        return vec![inline];
258    }
259
260    let mut out = vec![format!("{}! [", indent)];
261    let mut start = 0usize;
262    while start < effects.len() {
263        let namespace = effect_namespace(&effects[start]);
264        let mut end = start + 1;
265        while end < effects.len() && effect_namespace(&effects[end]) == namespace {
266            end += 1;
267        }
268        out.push(format!("{}    {},", indent, effects[start..end].join(", ")));
269        start = end;
270    }
271    out.push(format!("{}]", indent));
272    out
273}
274
275fn split_top_level(src: &str, delimiter: char) -> Option<Vec<String>> {
276    let mut parts = Vec::new();
277    let mut start = 0usize;
278    let mut paren_depth = 0usize;
279    let mut bracket_depth = 0usize;
280    let mut angle_depth = 0usize;
281    let mut prev = None;
282
283    for (idx, ch) in src.char_indices() {
284        match ch {
285            '(' => paren_depth += 1,
286            ')' => paren_depth = paren_depth.checked_sub(1)?,
287            '[' => bracket_depth += 1,
288            ']' => bracket_depth = bracket_depth.checked_sub(1)?,
289            '<' => angle_depth += 1,
290            '>' if prev != Some('-') && angle_depth > 0 => angle_depth -= 1,
291            _ => {}
292        }
293
294        if ch == delimiter && paren_depth == 0 && bracket_depth == 0 && angle_depth == 0 {
295            parts.push(src[start..idx].to_string());
296            start = idx + ch.len_utf8();
297        }
298        prev = Some(ch);
299    }
300
301    if paren_depth != 0 || bracket_depth != 0 || angle_depth != 0 {
302        return None;
303    }
304
305    parts.push(src[start..].to_string());
306    Some(parts)
307}
308
309fn find_matching_paren(src: &str, open_idx: usize) -> Option<usize> {
310    let mut depth = 0usize;
311    for (idx, ch) in src.char_indices().skip_while(|(idx, _)| *idx < open_idx) {
312        match ch {
313            '(' => depth += 1,
314            ')' => {
315                depth = depth.checked_sub(1)?;
316                if depth == 0 {
317                    return Some(idx);
318                }
319            }
320            _ => {}
321        }
322    }
323    None
324}
325
326fn format_type_for_source(ty: &Type) -> String {
327    match ty {
328        Type::Int => "Int".to_string(),
329        Type::Float => "Float".to_string(),
330        Type::Str => "String".to_string(),
331        Type::Bool => "Bool".to_string(),
332        Type::Unit => "Unit".to_string(),
333        Type::Result(ok, err) => format!(
334            "Result<{}, {}>",
335            format_type_for_source(ok),
336            format_type_for_source(err)
337        ),
338        Type::Option(inner) => format!("Option<{}>", format_type_for_source(inner)),
339        Type::List(inner) => format!("List<{}>", format_type_for_source(inner)),
340        Type::Vector(inner) => format!("Vector<{}>", format_type_for_source(inner)),
341        Type::Tuple(items) => format!(
342            "({})",
343            items
344                .iter()
345                .map(format_type_for_source)
346                .collect::<Vec<_>>()
347                .join(", ")
348        ),
349        Type::Map(key, value) => format!(
350            "Map<{}, {}>",
351            format_type_for_source(key),
352            format_type_for_source(value)
353        ),
354        Type::Fn(params, ret, effects) => {
355            let params = params
356                .iter()
357                .map(format_type_for_source)
358                .collect::<Vec<_>>()
359                .join(", ");
360            let ret = format_type_for_source(ret);
361            let effects = sorted_effects(effects);
362            if effects.is_empty() {
363                format!("Fn({params}) -> {ret}")
364            } else {
365                format!("Fn({params}) -> {ret} ! [{}]", effects.join(", "))
366            }
367        }
368        Type::Unknown => "Unknown".to_string(),
369        Type::Named(name) => name.clone(),
370    }
371}
372
373fn normalize_type_annotation(type_src: &str) -> String {
374    let trimmed = type_src.trim();
375    match parse_type_str_strict(trimmed) {
376        Ok(ty) => format_type_for_source(&ty),
377        Err(_) => trimmed.to_string(),
378    }
379}
380
381fn normalize_function_header_effects_line(line: &str) -> String {
382    let indent_len = line.chars().take_while(|c| *c == ' ').count();
383    let indent = " ".repeat(indent_len);
384    let trimmed = line.trim();
385    if !trimmed.starts_with("fn ") {
386        return line.to_string();
387    }
388
389    let open_idx = match trimmed.find('(') {
390        Some(idx) => idx,
391        None => return line.to_string(),
392    };
393    let close_idx = match find_matching_paren(trimmed, open_idx) {
394        Some(idx) => idx,
395        None => return line.to_string(),
396    };
397
398    let params_src = &trimmed[open_idx + 1..close_idx];
399    let params = match split_top_level(params_src, ',') {
400        Some(parts) => parts,
401        None => return line.to_string(),
402    };
403    let formatted_params = params
404        .into_iter()
405        .filter(|part| !part.trim().is_empty())
406        .map(|param| {
407            let (name, ty) = match param.split_once(':') {
408                Some(parts) => parts,
409                None => return param.trim().to_string(),
410            };
411            format!("{}: {}", name.trim(), normalize_type_annotation(ty))
412        })
413        .collect::<Vec<_>>()
414        .join(", ");
415
416    let mut formatted = format!(
417        "{}{}{})",
418        indent,
419        &trimmed[..open_idx + 1],
420        formatted_params
421    );
422    let remainder = trimmed[close_idx + 1..].trim();
423    if let Some(return_type) = remainder.strip_prefix("->") {
424        formatted.push_str(" -> ");
425        formatted.push_str(&normalize_type_annotation(return_type));
426    } else if !remainder.is_empty() {
427        formatted.push(' ');
428        formatted.push_str(remainder);
429    }
430
431    formatted
432}
433
434/// Per-line formatter for function headers.
435///
436/// When `line_offset` is provided, each rewritten line pushes a
437/// `bad-function-header` violation keyed on the original source line.
438/// `line_offset` is a `Vec<usize>` mapping input-line-index → source
439/// line number (1-based) so the factory can point back at the user's
440/// source accurately.
441fn normalize_function_header_effects_tracked(
442    lines: Vec<String>,
443    violations: &mut Vec<aver::diagnostics::model::FormatViolation>,
444    line_offset: Option<&[usize]>,
445) -> Vec<String> {
446    lines
447        .into_iter()
448        .enumerate()
449        .map(|(idx, line)| {
450            let rewritten = normalize_function_header_effects_line(&line);
451            if rewritten != line {
452                let source_line = line_offset.and_then(|off| off.get(idx)).copied().unwrap_or(idx + 1);
453                violations.push(aver::diagnostics::model::FormatViolation {
454                    line: source_line,
455                    col: 1,
456                    rule: "bad-function-header",
457                    message:
458                        "function signature spacing / parameter separator differs from canonical form"
459                            .to_string(),
460                    before: Some(line.clone()),
461                    after: Some(rewritten.clone()),
462                });
463            }
464            rewritten
465        })
466        .collect()
467}
468
469fn normalize_effect_declaration_blocks_tracked(
470    lines: Vec<String>,
471    violations: &mut Vec<aver::diagnostics::model::FormatViolation>,
472    line_offset: Option<&[usize]>,
473) -> Vec<String> {
474    let mut out = Vec::with_capacity(lines.len());
475    let mut i = 0usize;
476
477    while i < lines.len() {
478        let line = &lines[i];
479        let trimmed = line.trim();
480        if !trimmed.starts_with("! [") {
481            out.push(line.clone());
482            i += 1;
483            continue;
484        }
485
486        let indent_len = line.chars().take_while(|c| *c == ' ').count();
487        let indent = " ".repeat(indent_len);
488        let mut inner = String::new();
489        let mut consumed = 0usize;
490        let mut found_close = false;
491
492        while i + consumed < lines.len() {
493            let current = &lines[i + consumed];
494            let current_trimmed = current.trim();
495            let segment = if consumed == 0 {
496                current_trimmed.trim_start_matches("! [")
497            } else {
498                current_trimmed
499            };
500
501            if let Some(before_close) = segment.strip_suffix(']') {
502                if !inner.is_empty() && !before_close.trim().is_empty() {
503                    inner.push(' ');
504                }
505                inner.push_str(before_close.trim());
506                found_close = true;
507                consumed += 1;
508                break;
509            }
510
511            if !inner.is_empty() && !segment.trim().is_empty() {
512                inner.push(' ');
513            }
514            inner.push_str(segment.trim());
515            consumed += 1;
516        }
517
518        if !found_close {
519            out.push(line.clone());
520            i += 1;
521            continue;
522        }
523
524        let effects: Vec<String> = if inner.trim().is_empty() {
525            vec![]
526        } else {
527            inner
528                .split(',')
529                .map(str::trim)
530                .filter(|part| !part.is_empty())
531                .map(ToString::to_string)
532                .collect()
533        };
534
535        let original_block: Vec<String> = lines[i..i + consumed].to_vec();
536        let rewritten_block = format_block_effect_declaration(&indent, &effects);
537        if original_block != rewritten_block {
538            let source_line = line_offset
539                .and_then(|off| off.get(i))
540                .copied()
541                .unwrap_or(i + 1);
542            let rule = {
543                let mut sorted = effects.clone();
544                sorted.sort();
545                if effects != sorted {
546                    "effects-unsorted"
547                } else {
548                    "effects-reshape"
549                }
550            };
551            let message = match rule {
552                "effects-unsorted" => {
553                    "effect list out of order; formatter sorts alphabetically".to_string()
554                }
555                _ => "effect declaration reshaped to canonical form".to_string(),
556            };
557            violations.push(aver::diagnostics::model::FormatViolation {
558                line: source_line,
559                col: 1,
560                rule,
561                message,
562                before: Some(original_block.join(" | ")),
563                after: Some(rewritten_block.join(" | ")),
564            });
565        }
566        out.extend(rewritten_block);
567        i += consumed;
568    }
569
570    out
571}
572
573#[derive(Clone, Debug, PartialEq, Eq)]
574enum BlockKind {
575    Fn(String),
576    Verify(String),
577    Other,
578}
579
580#[derive(Clone, Debug, PartialEq, Eq)]
581struct TopBlock {
582    text: String,
583    kind: BlockKind,
584    start_line: usize,
585}
586
587#[derive(Default)]
588struct FormatAstInfo {
589    kind_by_line: HashMap<usize, BlockKind>,
590}
591
592fn classify_block(header_line: &str) -> BlockKind {
593    let trimmed = header_line.trim();
594    if let Some(rest) = trimmed.strip_prefix("fn ") {
595        let name = rest
596            .split(['(', ' ', '\t'])
597            .next()
598            .unwrap_or_default()
599            .to_string();
600        if !name.is_empty() {
601            return BlockKind::Fn(name);
602        }
603    }
604    if let Some(rest) = trimmed.strip_prefix("verify ") {
605        let name = rest
606            .split([' ', '\t'])
607            .next()
608            .unwrap_or_default()
609            .to_string();
610        if !name.is_empty() {
611            return BlockKind::Verify(name);
612        }
613    }
614    BlockKind::Other
615}
616
617fn is_top_level_start(line: &str) -> bool {
618    if line.is_empty() {
619        return false;
620    }
621    if line.starts_with(' ') || line.starts_with('\t') {
622        return false;
623    }
624    !line.trim_start().starts_with("//")
625}
626
627fn split_top_level_blocks(lines: &[String], ast_info: Option<&FormatAstInfo>) -> Vec<TopBlock> {
628    if lines.is_empty() {
629        return Vec::new();
630    }
631
632    let starts: Vec<usize> = lines
633        .iter()
634        .enumerate()
635        .filter_map(|(idx, line)| is_top_level_start(line).then_some(idx))
636        .collect();
637
638    if starts.is_empty() {
639        let text = lines.join("\n").trim_end_matches('\n').to_string();
640        if text.is_empty() {
641            return Vec::new();
642        }
643        return vec![TopBlock {
644            text,
645            kind: BlockKind::Other,
646            start_line: 1,
647        }];
648    }
649
650    let mut blocks = Vec::new();
651
652    // Preserve preamble comments/metadata before first top-level declaration.
653    let first = starts[0];
654    if first > 0 {
655        let mut pre = lines[..first].to_vec();
656        while pre.last().is_some_and(|l| l.is_empty()) {
657            pre.pop();
658        }
659        if !pre.is_empty() {
660            blocks.push(TopBlock {
661                text: pre.join("\n"),
662                kind: BlockKind::Other,
663                start_line: 1,
664            });
665        }
666    }
667
668    for (i, start) in starts.iter().enumerate() {
669        let end = starts.get(i + 1).copied().unwrap_or(lines.len());
670        let mut segment = lines[*start..end].to_vec();
671        while segment.last().is_some_and(|l| l.is_empty()) {
672            segment.pop();
673        }
674        if segment.is_empty() {
675            continue;
676        }
677        let header = segment[0].clone();
678        let start_line = *start + 1;
679        let kind = ast_info
680            .and_then(|info| info.kind_by_line.get(&start_line).cloned())
681            .unwrap_or_else(|| classify_block(&header));
682        blocks.push(TopBlock {
683            text: segment.join("\n"),
684            kind,
685            start_line,
686        });
687    }
688
689    blocks
690}
691
692fn reorder_verify_blocks_tracked(
693    blocks: Vec<TopBlock>,
694    violations: &mut Vec<aver::diagnostics::model::FormatViolation>,
695) -> Vec<TopBlock> {
696    let verify_blocks: Vec<TopBlock> = blocks
697        .iter()
698        .filter(|b| matches!(b.kind, BlockKind::Verify(_)))
699        .cloned()
700        .collect();
701
702    if verify_blocks.is_empty() {
703        return blocks;
704    }
705
706    // Remember the original position (0-based index in `blocks`) of
707    // each verify block so we can flag a violation if it ends up moving.
708    let mut original_positions: HashMap<(String, usize), usize> = HashMap::new();
709    for (pos, block) in blocks.iter().enumerate() {
710        if let BlockKind::Verify(name) = &block.kind {
711            original_positions.insert((name.clone(), block.start_line), pos);
712        }
713    }
714
715    let mut by_fn: HashMap<String, Vec<usize>> = HashMap::new();
716    for (idx, block) in verify_blocks.iter().enumerate() {
717        if let BlockKind::Verify(name) = &block.kind {
718            by_fn.entry(name.clone()).or_default().push(idx);
719        }
720    }
721
722    let mut used = vec![false; verify_blocks.len()];
723    let mut out = Vec::new();
724
725    for block in blocks {
726        match block.kind.clone() {
727            BlockKind::Verify(_) => {}
728            BlockKind::Fn(name) => {
729                out.push(block);
730                if let Some(indices) = by_fn.remove(&name) {
731                    for idx in indices {
732                        used[idx] = true;
733                        out.push(verify_blocks[idx].clone());
734                    }
735                }
736            }
737            BlockKind::Other => out.push(block),
738        }
739    }
740
741    for (idx, block) in verify_blocks.iter().enumerate() {
742        if !used[idx] {
743            out.push(block.clone());
744        }
745    }
746
747    // Any verify block whose final position (in `out`) differs from its
748    // original position (in `blocks`) is a violation — the formatter
749    // moved it. Key by (name, start_line) to disambiguate duplicates.
750    for (new_pos, block) in out.iter().enumerate() {
751        if let BlockKind::Verify(name) = &block.kind {
752            let key = (name.clone(), block.start_line);
753            if let Some(&orig_pos) = original_positions.get(&key)
754                && orig_pos != new_pos
755            {
756                violations.push(aver::diagnostics::model::FormatViolation {
757                    line: block.start_line,
758                    col: 1,
759                    rule: "verify-misplaced",
760                    message: format!(
761                        "verify block '{}' should be placed immediately after its function",
762                        name
763                    ),
764                    before: None,
765                    after: None,
766                });
767            }
768        }
769    }
770
771    out
772}
773
774fn parse_ast_info_checked(source: &str) -> Result<FormatAstInfo, String> {
775    let mut lexer = Lexer::new(source);
776    let tokens = lexer.tokenize().map_err(|e| e.to_string())?;
777    let mut parser = Parser::new(tokens);
778    let items = parser.parse().map_err(|e| e.to_string())?;
779
780    let mut info = FormatAstInfo::default();
781    for item in items {
782        match item {
783            TopLevel::FnDef(fd) => {
784                info.kind_by_line
785                    .insert(fd.line, BlockKind::Fn(fd.name.clone()));
786            }
787            TopLevel::Verify(vb) => {
788                info.kind_by_line
789                    .insert(vb.line, BlockKind::Verify(vb.fn_name.clone()));
790            }
791            _ => {}
792        }
793    }
794    Ok(info)
795}
796
797/// Normalize source lines and accumulate per-rule format violations.
798///
799/// Each violation references the **original** 1-based source line,
800/// tracked through `normalize_leading_indent_tracked`. Rules further
801/// downstream still operate on `Vec<String>` today and remain silent
802/// contributors to the `violations` accumulator — migration is
803/// incremental, one rule at a time.
804fn normalize_source_lines_tracked(
805    source: &str,
806    violations: &mut Vec<aver::diagnostics::model::FormatViolation>,
807) -> Vec<String> {
808    let normalized = source.replace("\r\n", "\n").replace('\r', "\n");
809
810    let mut lines = Vec::new();
811    // Track original source line per position so downstream tracked
812    // passes can keep accurate violation coordinates. Per-line rules
813    // preserve count; reshape rules break this map and fall back to
814    // their own heuristics.
815    let mut line_offset: Vec<usize> = Vec::new();
816    for (idx, raw) in normalized.split('\n').enumerate() {
817        let trimmed = raw.trim_end_matches([' ', '\t']);
818        if trimmed.len() != raw.len() {
819            violations.push(aver::diagnostics::model::FormatViolation {
820                line: idx + 1,
821                col: trimmed.len() + 1,
822                rule: "trailing-whitespace",
823                message: "trailing whitespace".to_string(),
824                before: None,
825                after: None,
826            });
827        }
828        let (line, violation) = normalize_leading_indent_tracked(trimmed, idx + 1);
829        if let Some(v) = violation {
830            violations.push(v);
831        }
832        lines.push(line);
833        line_offset.push(idx + 1);
834    }
835
836    let lines = normalize_effect_declaration_blocks_tracked(lines, violations, Some(&line_offset));
837    let lines = normalize_function_header_effects_tracked(lines, violations, Some(&line_offset));
838    let lines = normalize_module_intent_blocks_tracked(lines, violations, Some(&line_offset));
839    normalize_inline_decision_fields_tracked(lines, violations, Some(&line_offset))
840}
841
842fn normalize_module_intent_blocks_tracked(
843    lines: Vec<String>,
844    violations: &mut Vec<aver::diagnostics::model::FormatViolation>,
845    line_offset: Option<&[usize]>,
846) -> Vec<String> {
847    let before = lines.clone();
848    let after = normalize_module_intent_blocks_impl(lines);
849    if before != after {
850        // Find first differing input line and flag it.
851        let diff_idx = before
852            .iter()
853            .zip(&after)
854            .position(|(a, b)| a != b)
855            .unwrap_or(0);
856        let source_line = line_offset
857            .and_then(|off| off.get(diff_idx))
858            .copied()
859            .unwrap_or(diff_idx + 1);
860        violations.push(aver::diagnostics::model::FormatViolation {
861            line: source_line,
862            col: 1,
863            rule: "module-intent-reshape",
864            message: "module intent block reshaped to canonical multiline form".to_string(),
865            before: None,
866            after: None,
867        });
868    }
869    after
870}
871
872fn normalize_module_intent_blocks_impl(lines: Vec<String>) -> Vec<String> {
873    let mut out = Vec::with_capacity(lines.len());
874    let mut in_module_header = false;
875    let mut i = 0usize;
876
877    while i < lines.len() {
878        let line = &lines[i];
879        let trimmed = line.trim();
880        let indent = line.chars().take_while(|c| *c == ' ').count();
881
882        if indent == 0 && trimmed.starts_with("module ") {
883            in_module_header = true;
884            out.push(line.clone());
885            i += 1;
886            continue;
887        }
888
889        if in_module_header && indent == 0 && !trimmed.is_empty() && !trimmed.starts_with("//") {
890            in_module_header = false;
891        }
892
893        if in_module_header && indent > 0 {
894            let head = &line[indent..];
895            if let Some(rhs) = head.strip_prefix("intent =") {
896                let rhs_trimmed = rhs.trim_start();
897                if rhs_trimmed.starts_with('"') {
898                    let mut parts = vec![rhs_trimmed.to_string()];
899                    let mut consumed = 1usize;
900
901                    while i + consumed < lines.len() {
902                        let next = &lines[i + consumed];
903                        let next_indent = next.chars().take_while(|c| *c == ' ').count();
904                        let next_trimmed = next.trim();
905
906                        if next_indent <= indent || next_trimmed.is_empty() {
907                            break;
908                        }
909                        if !next_trimmed.starts_with('"') {
910                            break;
911                        }
912
913                        parts.push(next_trimmed.to_string());
914                        consumed += 1;
915                    }
916
917                    if parts.len() > 1 {
918                        out.push(format!("{}intent =", " ".repeat(indent)));
919                        for part in parts {
920                            out.push(format!("{}{}", " ".repeat(indent + 4), part));
921                        }
922                        i += consumed;
923                        continue;
924                    }
925                }
926            }
927        }
928
929        out.push(line.clone());
930        i += 1;
931    }
932
933    out
934}
935
936/// Collapse internal blank-line runs to at most 2, strip leading/trailing
937/// blanks. `block_start_line` is the 1-based source line of the block's
938/// first line so violations point back at the original source.
939fn normalize_internal_blank_runs_tracked(
940    text: &str,
941    block_start_line: usize,
942    violations: &mut Vec<aver::diagnostics::model::FormatViolation>,
943) -> String {
944    let mut out = Vec::new();
945    let mut blank_run = 0usize;
946    let mut run_start_idx: Option<usize> = None;
947    for (rel_idx, raw) in text.split('\n').enumerate() {
948        if raw.is_empty() {
949            if blank_run == 0 {
950                run_start_idx = Some(rel_idx);
951            }
952            blank_run += 1;
953            if blank_run <= 2 {
954                out.push(String::new());
955            }
956        } else {
957            if blank_run > 2
958                && let Some(start) = run_start_idx
959            {
960                let line = block_start_line.saturating_add(start).max(1);
961                violations.push(aver::diagnostics::model::FormatViolation {
962                    line,
963                    col: 1,
964                    rule: "excess-blank",
965                    message: format!(
966                        "{} consecutive blank lines; formatter collapses to 2",
967                        blank_run
968                    ),
969                    before: None,
970                    after: None,
971                });
972            }
973            blank_run = 0;
974            run_start_idx = None;
975            out.push(raw.to_string());
976        }
977    }
978    while out.first().is_some_and(|l| l.is_empty()) {
979        out.remove(0);
980    }
981    while out.last().is_some_and(|l| l.is_empty()) {
982        out.pop();
983    }
984    out.join("\n")
985}
986
987const DECISION_FIELDS: [&str; 6] = ["date", "author", "reason", "chosen", "rejected", "impacts"];
988
989fn starts_with_decision_field(content: &str) -> bool {
990    DECISION_FIELDS
991        .iter()
992        .any(|field| content.starts_with(&format!("{field} =")))
993}
994
995fn find_next_decision_field_boundary(s: &str) -> Option<usize> {
996    let mut best: Option<usize> = None;
997    for field in DECISION_FIELDS {
998        let needle = format!(" {field} =");
999        let mut search_from = 0usize;
1000        while let Some(rel) = s[search_from..].find(&needle) {
1001            let idx = search_from + rel;
1002            // Require at least two spaces before the next field marker, so
1003            // normal single-space tokens don't split accidentally.
1004            let spaces_before = s[..idx].chars().rev().take_while(|c| *c == ' ').count();
1005            // `needle` starts at one of the separating spaces, so include it.
1006            let total_separator_spaces = spaces_before + 1;
1007            if total_separator_spaces >= 2 {
1008                let field_start = idx + 1;
1009                best = Some(best.map_or(field_start, |cur| cur.min(field_start)));
1010                break;
1011            }
1012            search_from = idx + 1;
1013        }
1014    }
1015    best
1016}
1017
1018fn split_inline_decision_fields(content: &str) -> Vec<String> {
1019    if !starts_with_decision_field(content) {
1020        return vec![content.to_string()];
1021    }
1022    let mut out = Vec::new();
1023    let mut rest = content.trim_end().to_string();
1024    while let Some(idx) = find_next_decision_field_boundary(&rest) {
1025        let left = rest[..idx].trim_end().to_string();
1026        if left.is_empty() {
1027            break;
1028        }
1029        out.push(left);
1030        rest = rest[idx..].trim_start().to_string();
1031    }
1032    if !rest.is_empty() {
1033        out.push(rest.trim_end().to_string());
1034    }
1035    if out.is_empty() {
1036        vec![content.to_string()]
1037    } else {
1038        out
1039    }
1040}
1041
1042fn normalize_inline_decision_fields_tracked(
1043    lines: Vec<String>,
1044    violations: &mut Vec<aver::diagnostics::model::FormatViolation>,
1045    line_offset: Option<&[usize]>,
1046) -> Vec<String> {
1047    let before = lines.clone();
1048    let after = normalize_inline_decision_fields_impl(lines);
1049    if before != after {
1050        let diff_idx = before
1051            .iter()
1052            .zip(&after)
1053            .position(|(a, b)| a != b)
1054            .unwrap_or(0);
1055        let source_line = line_offset
1056            .and_then(|off| off.get(diff_idx))
1057            .copied()
1058            .unwrap_or(diff_idx + 1);
1059        violations.push(aver::diagnostics::model::FormatViolation {
1060            line: source_line,
1061            col: 1,
1062            rule: "decision-inline",
1063            message: "decision fields should each live on their own line".to_string(),
1064            before: None,
1065            after: None,
1066        });
1067    }
1068    after
1069}
1070
1071fn normalize_inline_decision_fields_impl(lines: Vec<String>) -> Vec<String> {
1072    let mut out = Vec::with_capacity(lines.len());
1073    let mut in_decision = false;
1074
1075    for line in lines {
1076        let trimmed = line.trim();
1077        let indent = line.chars().take_while(|c| *c == ' ').count();
1078
1079        if indent == 0 && trimmed.starts_with("decision ") {
1080            in_decision = true;
1081            out.push(line);
1082            continue;
1083        }
1084
1085        if in_decision && indent == 0 && !trimmed.is_empty() && !trimmed.starts_with("//") {
1086            in_decision = false;
1087        }
1088
1089        if in_decision && trimmed.is_empty() {
1090            continue;
1091        }
1092
1093        if in_decision && indent > 0 {
1094            let content = &line[indent..];
1095            let parts = split_inline_decision_fields(content);
1096            if parts.len() > 1 {
1097                for part in parts {
1098                    out.push(format!("{}{}", " ".repeat(indent), part));
1099                }
1100                continue;
1101            }
1102        }
1103
1104        out.push(line);
1105    }
1106
1107    out
1108}
1109
1110/// Format `source` and return the rewritten text plus a list of
1111/// [`FormatViolation`]s — one per rule that fired on a specific
1112/// location. Etap A: violations Vec is allocated but rules don't yet
1113/// populate it; callers must not claim precise line ranges.
1114/// Subsequent commits migrate each `normalize_*` rule to push to this
1115/// vec as they rewrite.
1116pub fn try_format_source(
1117    source: &str,
1118) -> Result<(String, Vec<aver::diagnostics::model::FormatViolation>), String> {
1119    let mut violations: Vec<aver::diagnostics::model::FormatViolation> = Vec::new();
1120
1121    if !source.is_empty() && !source.ends_with('\n') {
1122        let last_line = source.lines().count().max(1);
1123        violations.push(aver::diagnostics::model::FormatViolation {
1124            line: last_line,
1125            col: source.lines().last().map(str::len).unwrap_or(0) + 1,
1126            rule: "missing-final-newline",
1127            message: "file must end with a single newline".to_string(),
1128            before: None,
1129            after: None,
1130        });
1131    }
1132
1133    let lines = normalize_source_lines_tracked(source, &mut violations);
1134    let normalized = lines.join("\n");
1135    let ast_info = parse_ast_info_checked(&normalized)?;
1136
1137    // 3) Split into top-level blocks and co-locate verify blocks under their functions.
1138    let blocks = split_top_level_blocks(&lines, Some(&ast_info));
1139    let reordered = reorder_verify_blocks_tracked(blocks, &mut violations);
1140
1141    // 4) Rejoin with one blank line between top-level blocks.
1142    let mut non_empty_blocks = Vec::new();
1143    for block in reordered {
1144        let text =
1145            normalize_internal_blank_runs_tracked(&block.text, block.start_line, &mut violations);
1146        let text = text.trim_matches('\n').to_string();
1147        if !text.is_empty() {
1148            non_empty_blocks.push(text);
1149        }
1150    }
1151
1152    if non_empty_blocks.is_empty() {
1153        return Ok(("\n".to_string(), violations));
1154    }
1155    let mut out = non_empty_blocks.join("\n\n");
1156    out.push('\n');
1157    Ok((out, violations))
1158}
1159
1160#[cfg(test)]
1161pub fn format_source(source: &str) -> String {
1162    match try_format_source(source) {
1163        Ok((formatted, _violations)) => formatted,
1164        Err(err) => panic!("format_source received invalid Aver source: {err}"),
1165    }
1166}
1167
1168#[cfg(test)]
1169mod tests {
1170    use super::{format_source, try_format_source};
1171
1172    #[test]
1173    fn normalizes_line_endings_and_trailing_ws() {
1174        let src = "module A\r\n    fn x() -> Int   \r\n        1\t \r\n";
1175        let got = format_source(src);
1176        assert_eq!(got, "module A\n    fn x() -> Int\n        1\n");
1177    }
1178
1179    #[test]
1180    fn converts_leading_tabs_only() {
1181        let src = "\tfn x() -> String\n\t\t\"a\\tb\"\n";
1182        let got = format_source(src);
1183        assert_eq!(got, "    fn x() -> String\n        \"a\\tb\"\n");
1184    }
1185
1186    #[test]
1187    fn collapses_long_blank_runs() {
1188        let src = "module A\n\n\n\nfn x() -> Int\n    1\n";
1189        let got = format_source(src);
1190        assert_eq!(got, "module A\n\nfn x() -> Int\n    1\n");
1191    }
1192
1193    #[test]
1194    fn keeps_single_final_newline() {
1195        let src = "module A\nfn x() -> Int\n    1\n\n\n";
1196        let got = format_source(src);
1197        assert_eq!(got, "module A\n\nfn x() -> Int\n    1\n");
1198    }
1199
1200    #[test]
1201    fn rejects_removed_eq_expr_syntax() {
1202        let src = "fn x() -> Int\n    = 1\n";
1203        let err = try_format_source(src).expect_err("old '= expr' syntax should fail");
1204        assert!(
1205            err.contains("no longer use '= expr'"),
1206            "unexpected error: {}",
1207            err
1208        );
1209    }
1210
1211    #[test]
1212    fn moves_verify_directly_under_function() {
1213        let src = r#"module Demo
1214
1215fn a(x: Int) -> Int
1216    x + 1
1217
1218fn b(x: Int) -> Int
1219    x + 2
1220
1221verify a
1222    a(1) => 2
1223
1224verify b
1225    b(1) => 3
1226"#;
1227        let got = format_source(src);
1228        assert_eq!(
1229            got,
1230            r#"module Demo
1231
1232fn a(x: Int) -> Int
1233    x + 1
1234
1235verify a
1236    a(1) => 2
1237
1238fn b(x: Int) -> Int
1239    x + 2
1240
1241verify b
1242    b(1) => 3
1243"#
1244        );
1245    }
1246
1247    #[test]
1248    fn leaves_orphan_verify_at_end() {
1249        let src = r#"module Demo
1250
1251verify missing
1252    missing(1) => 2
1253"#;
1254        let got = format_source(src);
1255        assert_eq!(
1256            got,
1257            r#"module Demo
1258
1259verify missing
1260    missing(1) => 2
1261"#
1262        );
1263    }
1264
1265    #[test]
1266    fn keeps_inline_module_intent_inline() {
1267        let src = r#"module Demo
1268    intent = "Inline intent."
1269    exposes [x]
1270fn x() -> Int
1271    1
1272"#;
1273        let got = format_source(src);
1274        assert_eq!(
1275            got,
1276            r#"module Demo
1277    intent = "Inline intent."
1278    exposes [x]
1279
1280fn x() -> Int
1281    1
1282"#
1283        );
1284    }
1285
1286    #[test]
1287    fn expands_multiline_module_intent_to_block() {
1288        let src = r#"module Demo
1289    intent = "First line."
1290        "Second line."
1291    exposes [x]
1292fn x() -> Int
1293    1
1294"#;
1295        let got = format_source(src);
1296        assert_eq!(
1297            got,
1298            r#"module Demo
1299    intent =
1300        "First line."
1301        "Second line."
1302    exposes [x]
1303
1304fn x() -> Int
1305    1
1306"#
1307        );
1308    }
1309
1310    #[test]
1311    fn splits_inline_decision_fields_to_separate_lines() {
1312        let src = r#"module Demo
1313    intent = "x"
1314    exposes [main]
1315
1316decision D
1317    date = "2026-03-02"
1318    chosen = "A"    rejected = ["B"]
1319    impacts = [main]
1320"#;
1321        let got = format_source(src);
1322        assert_eq!(
1323            got,
1324            r#"module Demo
1325    intent = "x"
1326    exposes [main]
1327
1328decision D
1329    date = "2026-03-02"
1330    chosen = "A"
1331    rejected = ["B"]
1332    impacts = [main]
1333"#
1334        );
1335    }
1336
1337    #[test]
1338    fn keeps_inline_function_description_inline() {
1339        let src = r#"fn add(a: Int, b: Int) -> Int
1340    ? "Adds two numbers."
1341    a + b
1342"#;
1343        let got = format_source(src);
1344        assert_eq!(
1345            got,
1346            r#"fn add(a: Int, b: Int) -> Int
1347    ? "Adds two numbers."
1348    a + b
1349"#
1350        );
1351    }
1352
1353    #[test]
1354    fn keeps_short_effect_lists_inline() {
1355        let src = r#"fn apply(f: Fn(Int) -> Int ! [Console.warn, Console.print], x: Int) -> Int
1356    ! [Http.post, Console.print, Http.get, Console.warn]
1357    f(x)
1358"#;
1359        let got = format_source(src);
1360        assert_eq!(
1361            got,
1362            r#"fn apply(f: Fn(Int) -> Int ! [Console.print, Console.warn], x: Int) -> Int
1363    ! [Console.print, Console.warn, Http.get, Http.post]
1364    f(x)
1365"#
1366        );
1367    }
1368
1369    #[test]
1370    fn keeps_medium_effect_lists_inline_when_they_fit() {
1371        let src = r#"fn run() -> Unit
1372    ! [Args, Console, Disk, Http, Random, Tcp, Terminal, Time]
1373    Unit
1374"#;
1375        let got = format_source(src);
1376        assert_eq!(
1377            got,
1378            r#"fn run() -> Unit
1379    ! [Args, Console, Disk, Http, Random, Tcp, Terminal, Time]
1380    Unit
1381"#
1382        );
1383    }
1384
1385    #[test]
1386    fn expands_long_effect_lists_to_multiline_alphabetical_groups() {
1387        let src = r#"fn main() -> Unit
1388    ! [Args.get, Console.print, Console.warn, Time.now, Disk.makeDir, Disk.exists, Disk.readText, Disk.writeText, Disk.appendText]
1389    Unit
1390"#;
1391        let got = format_source(src);
1392        assert_eq!(
1393            got,
1394            r#"fn main() -> Unit
1395    ! [
1396        Args.get,
1397        Console.print, Console.warn,
1398        Disk.appendText, Disk.exists, Disk.makeDir, Disk.readText, Disk.writeText,
1399        Time.now,
1400    ]
1401    Unit
1402"#
1403        );
1404    }
1405
1406    #[test]
1407    fn sorts_function_type_effects_inline() {
1408        let src = r#"fn useHandler(handler: Fn(Int) -> Result<String, String> ! [Time.now, Args.get, Console.warn, Console.print, Disk.readText], value: Int) -> Unit
1409    handler(value)
1410"#;
1411        let got = format_source(src);
1412        assert_eq!(
1413            got,
1414            r#"fn useHandler(handler: Fn(Int) -> Result<String, String> ! [Args.get, Console.print, Console.warn, Disk.readText, Time.now], value: Int) -> Unit
1415    handler(value)
1416"#
1417        );
1418    }
1419
1420    #[test]
1421    fn keeps_long_function_type_effects_inline() {
1422        let src = r#"fn apply(handler: Fn(Int) -> Int ! [Time.now, Args.get, Console.warn, Console.print, Disk.readText], value: Int) -> Int
1423    handler(value)
1424"#;
1425        let got = format_source(src);
1426        assert_eq!(
1427            got,
1428            r#"fn apply(handler: Fn(Int) -> Int ! [Args.get, Console.print, Console.warn, Disk.readText, Time.now], value: Int) -> Int
1429    handler(value)
1430"#
1431        );
1432    }
1433}