1use std::collections::BTreeMap;
10
11#[derive(Debug, Clone, Default)]
12pub struct Opts {
13 pub sort_frontmatter: bool,
17}
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20enum Region {
21 Default,
22 FrontmatterOpen,
23 FrontmatterBody,
24 FrontmatterClose,
25 CodeFenceOpen,
26 CodeFenceBody,
27 CodeFenceClose,
28 BlockComment,
29 TableDirective,
30 TableRow,
31}
32
33impl Region {
34 fn collapsible(self) -> bool {
38 matches!(self, Region::Default)
39 }
40}
41
42pub fn format(source: &str, opts: &Opts) -> String {
43 let source = source.strip_prefix('\u{feff}').unwrap_or(source);
46
47 if source.is_empty() {
48 return String::new();
49 }
50
51 let normalized = normalize_line_endings(source);
53
54 let raw_lines: Vec<&str> = normalized.split('\n').collect();
57
58 let lines: Vec<&str> = if raw_lines.last() == Some(&"") {
61 raw_lines[..raw_lines.len() - 1].to_vec()
62 } else {
63 raw_lines
64 };
65
66 if lines.is_empty() {
67 return String::new();
68 }
69
70 let regions = scan_regions(&lines);
71 debug_assert_eq!(regions.len(), lines.len());
72
73 let transformed = apply_transforms(&lines, ®ions, opts);
74 let collapsed = collapse_blanks(&transformed.lines, &transformed.regions);
75 let trimmed = trim_blank_edges(&collapsed.lines, &collapsed.regions);
76
77 if trimmed.is_empty() {
78 return String::new();
79 }
80 let mut out = trimmed.join("\n");
81 out.push('\n');
82 out
83}
84
85fn normalize_line_endings(s: &str) -> String {
86 let step1 = s.replace("\r\n", "\n");
88 step1.replace('\r', "\n")
89}
90
91fn scan_regions(lines: &[&str]) -> Vec<Region> {
92 let mut regions = vec![Region::Default; lines.len()];
93 let mut i = 0;
94
95 if !lines.is_empty() && lines[0].trim_end() == "+++" {
97 let mut close_idx = None;
98 for j in 1..lines.len() {
99 if lines[j].trim_end() == "+++" {
100 close_idx = Some(j);
101 break;
102 }
103 }
104 if let Some(j) = close_idx {
105 regions[0] = Region::FrontmatterOpen;
106 for k in 1..j {
107 regions[k] = Region::FrontmatterBody;
108 }
109 regions[j] = Region::FrontmatterClose;
110 i = j + 1;
111 }
112 }
115
116 while i < lines.len() {
117 let line = lines[i];
118 let (indent_len, _) = leading_indent(line);
119 let trimmed = &line[indent_len..];
120 let trimmed = trimmed.trim_end_matches(|c: char| c == ' ' || c == '\t');
121
122 if trimmed.starts_with("```") {
123 regions[i] = Region::CodeFenceOpen;
124 i += 1;
125 while i < lines.len() {
126 let inner = lines[i];
127 let (i_indent, _) = leading_indent(inner);
128 let inner_trim =
129 inner[i_indent..].trim_end_matches(|c: char| c == ' ' || c == '\t');
130 if inner_trim == "```" {
131 regions[i] = Region::CodeFenceClose;
132 i += 1;
133 break;
134 }
135 regions[i] = Region::CodeFenceBody;
136 i += 1;
137 }
138 continue;
139 }
140
141 if trimmed.starts_with("/*") {
142 regions[i] = Region::BlockComment;
143 if trimmed.ends_with("*/") && trimmed.len() >= 4 {
145 i += 1;
146 continue;
147 }
148 i += 1;
149 while i < lines.len() {
150 regions[i] = Region::BlockComment;
151 let inner = lines[i].trim_end_matches(|c: char| c == ' ' || c == '\t');
152 if inner.ends_with("*/") {
153 i += 1;
154 break;
155 }
156 i += 1;
157 }
158 continue;
159 }
160
161 if is_table_directive(trimmed) {
162 regions[i] = Region::TableDirective;
163 i += 1;
164 while i < lines.len() {
165 let (j_indent, _) = leading_indent(lines[i]);
166 let body = &lines[i][j_indent..];
167 if body.starts_with('|') {
168 regions[i] = Region::TableRow;
169 i += 1;
170 } else {
171 break;
172 }
173 }
174 continue;
175 }
176
177 i += 1;
178 }
179
180 regions
181}
182
183fn is_table_directive(trimmed: &str) -> bool {
184 trimmed == "@t" || trimmed.starts_with("@t ") || trimmed.starts_with("@t(")
185}
186
187fn leading_indent(line: &str) -> (usize, &str) {
190 let n = line
191 .bytes()
192 .take_while(|b| *b == b' ' || *b == b'\t')
193 .count();
194 (n, &line[..n])
195}
196
197struct PassResult {
198 lines: Vec<String>,
199 regions: Vec<Region>,
200}
201
202fn apply_transforms(lines: &[&str], regions: &[Region], opts: &Opts) -> PassResult {
203 let mut out_lines: Vec<String> = Vec::with_capacity(lines.len());
204 let mut out_regions: Vec<Region> = Vec::with_capacity(lines.len());
205
206 let mut i = 0;
207 while i < lines.len() {
208 match regions[i] {
209 Region::Default => {
210 out_lines.push(transform_brief_line(lines[i]));
211 out_regions.push(Region::Default);
212 i += 1;
213 }
214 Region::FrontmatterOpen => {
215 out_lines.push("+++".to_string());
216 out_regions.push(Region::FrontmatterOpen);
217 i += 1;
218 }
219 Region::FrontmatterClose => {
220 out_lines.push("+++".to_string());
221 out_regions.push(Region::FrontmatterClose);
222 i += 1;
223 }
224 Region::FrontmatterBody => {
225 let start = i;
226 while i < lines.len() && regions[i] == Region::FrontmatterBody {
227 i += 1;
228 }
229 let body_slice = &lines[start..i];
230 let processed = process_frontmatter_body(body_slice, opts);
231 for l in processed {
232 out_lines.push(l);
233 out_regions.push(Region::FrontmatterBody);
234 }
235 }
236 Region::CodeFenceOpen => {
237 out_lines.push(strip_trailing_ws(lines[i]).to_string());
240 out_regions.push(Region::CodeFenceOpen);
241 i += 1;
242 }
243 Region::CodeFenceClose => {
244 out_lines.push(strip_trailing_ws(lines[i]).to_string());
245 out_regions.push(Region::CodeFenceClose);
246 i += 1;
247 }
248 Region::CodeFenceBody => {
249 out_lines.push(lines[i].to_string());
253 out_regions.push(Region::CodeFenceBody);
254 i += 1;
255 }
256 Region::BlockComment => {
257 out_lines.push(strip_trailing_ws(lines[i]).to_string());
259 out_regions.push(Region::BlockComment);
260 i += 1;
261 }
262 Region::TableDirective => {
263 out_lines.push(transform_brief_line(lines[i]));
264 out_regions.push(Region::TableDirective);
265 i += 1;
266 }
267 Region::TableRow => {
268 let start = i;
269 while i < lines.len() && regions[i] == Region::TableRow {
270 i += 1;
271 }
272 let formatted = format_table(&lines[start..i]);
273 for l in formatted {
274 out_lines.push(l);
275 out_regions.push(Region::TableRow);
276 }
277 }
278 }
279 }
280
281 PassResult {
282 lines: out_lines,
283 regions: out_regions,
284 }
285}
286
287fn transform_brief_line(line: &str) -> String {
290 let stripped = strip_trailing_ws(line);
291 detab_indent(stripped)
292}
293
294fn strip_trailing_ws(line: &str) -> &str {
295 line.trim_end_matches(|c: char| c == ' ' || c == '\t' || c == '\r')
296}
297
298fn detab_indent(line: &str) -> String {
303 let (n, _) = leading_indent(line);
304 if n == 0 {
305 return line.to_string();
306 }
307 let prefix = &line[..n];
308 let rest = &line[n..];
309 let mut out = String::with_capacity(line.len() + 4);
310 for c in prefix.chars() {
311 if c == '\t' {
312 out.push(' ');
313 out.push(' ');
314 } else {
315 out.push(c);
316 }
317 }
318 out.push_str(rest);
319 out
320}
321
322fn process_frontmatter_body(body_lines: &[&str], opts: &Opts) -> Vec<String> {
323 if !opts.sort_frontmatter {
324 return body_lines
325 .iter()
326 .map(|l| strip_trailing_ws(l).to_string())
327 .collect();
328 }
329
330 let body: String = body_lines.join("\n");
331 let parsed: Result<toml::Table, _> = toml::from_str(&body);
332 let table = match parsed {
333 Ok(t) => t,
334 Err(_) => {
335 return body_lines
338 .iter()
339 .map(|l| strip_trailing_ws(l).to_string())
340 .collect();
341 }
342 };
343
344 let sorted: BTreeMap<String, toml::Value> = table.into_iter().collect();
349 let mut wrap: toml::Table = toml::Table::new();
350 for (k, v) in sorted {
351 wrap.insert(k, v);
352 }
353 let serialized = match toml::to_string(&wrap) {
354 Ok(s) => s,
355 Err(_) => {
356 return body_lines
357 .iter()
358 .map(|l| strip_trailing_ws(l).to_string())
359 .collect();
360 }
361 };
362
363 let trimmed = serialized.trim_end_matches('\n');
365 trimmed.split('\n').map(|s| s.to_string()).collect()
366}
367
368fn format_table(rows: &[&str]) -> Vec<String> {
369 if rows.is_empty() {
370 return Vec::new();
371 }
372
373 struct Parsed {
374 indent: String,
375 cells: Vec<String>,
376 }
377
378 let parsed: Vec<Parsed> = rows
379 .iter()
380 .map(|line| {
381 let stripped = strip_trailing_ws(line);
382 let (n, _) = leading_indent(stripped);
383 let indent_raw = &stripped[..n];
384 let indent: String = indent_raw
387 .chars()
388 .flat_map(|c| {
389 if c == '\t' {
390 vec![' ', ' '].into_iter()
391 } else {
392 vec![c].into_iter()
393 }
394 })
395 .collect();
396 let body = &stripped[n..];
397 let cells = parse_table_cells(body);
398 Parsed { indent, cells }
399 })
400 .collect();
401
402 let max_cols = parsed.iter().map(|p| p.cells.len()).max().unwrap_or(0);
403 if max_cols == 0 {
404 return parsed.iter().map(|p| format!("{}|", p.indent)).collect();
405 }
406
407 let mut widths = vec![0usize; max_cols];
408 for p in &parsed {
409 for (i, cell) in p.cells.iter().enumerate() {
410 let w = cell.chars().count();
411 if w > widths[i] {
412 widths[i] = w;
413 }
414 }
415 }
416
417 parsed
418 .iter()
419 .map(|p| {
420 let mut out = p.indent.clone();
421 out.push('|');
422 for (idx, cell) in p.cells.iter().enumerate() {
423 out.push(' ');
424 out.push_str(cell);
425 let pad = widths[idx].saturating_sub(cell.chars().count());
426 if idx + 1 < p.cells.len() {
427 for _ in 0..pad {
428 out.push(' ');
429 }
430 out.push(' ');
431 out.push('|');
432 }
433 }
437 out
438 })
439 .collect()
440}
441
442fn parse_table_cells(body: &str) -> Vec<String> {
443 let trimmed = body.trim_end_matches(|c: char| c == ' ' || c == '\t');
446 let trimmed = trimmed.trim_end_matches('|');
447 let inner = if let Some(rest) = trimmed.strip_prefix('|') {
448 rest
449 } else {
450 trimmed
451 };
452 inner.split('|').map(|s| s.trim().to_string()).collect()
453}
454
455fn collapse_blanks(lines: &[String], regions: &[Region]) -> PassResult {
456 let mut out_lines = Vec::with_capacity(lines.len());
457 let mut out_regions = Vec::with_capacity(lines.len());
458 let mut prev_was_blank = false;
459 for (line, region) in lines.iter().zip(regions.iter()) {
460 let is_blank = line.trim().is_empty();
461 if is_blank && region.collapsible() {
462 if prev_was_blank {
463 continue;
464 }
465 prev_was_blank = true;
466 out_lines.push(String::new());
468 out_regions.push(*region);
469 } else {
470 prev_was_blank = false;
471 out_lines.push(line.clone());
472 out_regions.push(*region);
473 }
474 }
475 PassResult {
476 lines: out_lines,
477 regions: out_regions,
478 }
479}
480
481fn trim_blank_edges(lines: &[String], regions: &[Region]) -> Vec<String> {
482 let mut start = 0;
483 while start < lines.len() && lines[start].trim().is_empty() && regions[start].collapsible() {
484 start += 1;
485 }
486 let mut end = lines.len();
487 while end > start && lines[end - 1].trim().is_empty() && regions[end - 1].collapsible() {
488 end -= 1;
489 }
490 lines[start..end].to_vec()
491}
492
493#[derive(Debug, Clone, Copy, PartialEq, Eq)]
494pub enum CheckResult {
495 Unchanged,
496 WouldChange,
497}
498
499pub fn check(source: &str, opts: &Opts) -> CheckResult {
500 if format(source, opts) == source {
501 CheckResult::Unchanged
502 } else {
503 CheckResult::WouldChange
504 }
505}
506
507#[cfg(test)]
508mod tests {
509 use super::*;
510
511 fn fmt(s: &str) -> String {
512 format(s, &Opts::default())
513 }
514
515 #[test]
516 fn empty_input_stays_empty() {
517 assert_eq!(fmt(""), "");
518 }
519
520 #[test]
521 fn single_line_gains_trailing_newline() {
522 assert_eq!(fmt("hello"), "hello\n");
523 }
524
525 #[test]
526 fn strips_trailing_whitespace() {
527 assert_eq!(fmt("hello \nworld\t \n"), "hello\nworld\n");
528 }
529
530 #[test]
531 fn collapses_runs_of_blank_lines() {
532 assert_eq!(fmt("a\n\n\n\nb\n"), "a\n\nb\n");
533 }
534
535 #[test]
536 fn trims_leading_and_trailing_blank_lines() {
537 assert_eq!(fmt("\n\nhello\n\n\n"), "hello\n");
538 }
539
540 #[test]
541 fn normalizes_crlf_to_lf() {
542 assert_eq!(fmt("a\r\nb\r\n"), "a\nb\n");
543 }
544
545 #[test]
546 fn normalizes_bare_cr_to_lf() {
547 assert_eq!(fmt("a\rb\r"), "a\nb\n");
548 }
549
550 #[test]
551 fn strips_leading_bom() {
552 assert_eq!(fmt("\u{feff}hello\n"), "hello\n");
553 }
554
555 #[test]
556 fn replaces_leading_tabs_with_two_spaces() {
557 assert_eq!(fmt("\t- item\n"), " - item\n");
558 assert_eq!(fmt("\t\t- item\n"), " - item\n");
559 }
560
561 #[test]
562 fn preserves_emphasis_markers_verbatim() {
563 let src = "*bold* and _underline_ and /italic/ and ~strike~\n";
564 assert_eq!(fmt(src), src);
565 }
566
567 #[test]
568 fn preserves_inline_shortcode_arg_order() {
569 let src = "see @link(href: \"x\", title: \"Y\")\n";
570 assert_eq!(fmt(src), src);
571 }
572
573 #[test]
574 fn preserves_code_fence_body_verbatim() {
575 let src = "```rust\n fn x ( ) { } \n```\n";
576 let out = fmt(src);
580 assert!(out.contains(" fn x ( ) { } "));
581 }
582
583 #[test]
584 fn aligns_table_columns() {
585 let src = "@t\n| Header | B\n| longcell | y\n| z | other\n";
586 let out = fmt(src);
587 let expected = "\
588@t
589| Header | B
590| longcell | y
591| z | other
592";
593 assert_eq!(out, expected);
594 }
595
596 #[test]
597 fn table_alignment_handles_indent() {
598 let src = " @t\n | A | B\n | longer | y\n";
599 let out = fmt(src);
600 let expected = " @t\n | A | B\n | longer | y\n";
601 assert_eq!(out, expected);
602 }
603
604 #[test]
605 fn table_with_args() {
606 let src = "@t(align: [\"left\", \"right\"])\n| A | B\n| 1 | 22\n";
607 let out = fmt(src);
608 let expected = "@t(align: [\"left\", \"right\"])\n| A | B\n| 1 | 22\n";
609 assert_eq!(out, expected);
610 }
611
612 #[test]
613 fn frontmatter_passthrough_by_default() {
614 let src = "+++\nz = 1\na = 2\n+++\n# Doc\n";
615 assert_eq!(fmt(src), src);
616 }
617
618 #[test]
619 fn frontmatter_sort_when_opted_in() {
620 let opts = Opts {
621 sort_frontmatter: true,
622 };
623 let src = "+++\nz = 1\na = 2\n+++\n# Doc\n";
624 let out = format(src, &opts);
625 let header_end = out.find("+++\n").unwrap() + 4;
627 let between = &out[header_end..];
628 let close = between.find("+++").unwrap();
629 let body = &between[..close];
630 let a_pos = body.find("a = ").unwrap();
631 let z_pos = body.find("z = ").unwrap();
632 assert!(a_pos < z_pos, "frontmatter not sorted: {:?}", body);
633 }
634
635 #[test]
636 fn frontmatter_with_invalid_toml_unchanged_body() {
637 let opts = Opts {
638 sort_frontmatter: true,
639 };
640 let src = "+++\nfoo === 1\n+++\n";
641 let out = format(src, &opts);
642 assert!(out.contains("foo === 1"));
644 }
645
646 #[test]
647 fn frontmatter_unterminated_left_alone() {
648 let src = "+++\nfoo = 1\n";
651 let out = fmt(src);
652 assert!(out.contains("+++"));
653 assert!(out.contains("foo = 1"));
654 }
655
656 #[test]
657 fn block_comment_body_preserved() {
658 let src = "/*\n multi\n line\n*/\nbody\n";
659 let out = fmt(src);
660 assert!(out.contains(" multi"));
661 assert!(out.contains(" line"));
662 }
663
664 #[test]
665 fn does_not_collapse_blank_lines_inside_code_fence() {
666 let src = "```rust\nfn x() {\n\n\n}\n```\n";
667 let out = fmt(src);
668 assert!(out.contains("fn x() {\n\n\n}"));
670 }
671
672 #[test]
673 fn does_not_reorder_paragraphs() {
674 let src = "third\n\nfirst\n\nsecond\n";
675 assert_eq!(fmt(src), src);
676 }
677
678 #[test]
679 fn idempotent_on_already_formatted() {
680 let src = "# Hello\n\nThis is a paragraph.\n\n@t\n| A | B\n| 1 | 2\n";
681 let once = fmt(src);
682 let twice = fmt(&once);
683 assert_eq!(once, twice);
684 }
685
686 #[test]
687 fn idempotent_on_unformatted() {
688 let inputs = [
689 " # heading \n\n\n\nbody\n",
690 "@t\n| a | b\n| longercell | y\n",
691 "+++\nz = 1\na = 2\n+++\n# x\n",
692 "```rust\n fn x() {}\n```\n",
693 "/*\n comment\n*/\n# heading\n",
694 "\thello\n\t\tworld\n",
695 "",
696 ];
697 for src in &inputs {
698 let once = fmt(src);
699 let twice = fmt(&once);
700 assert_eq!(once, twice, "not idempotent for: {:?}", src);
701 }
702 }
703
704 #[test]
705 fn idempotent_with_sort_frontmatter() {
706 let opts = Opts {
707 sort_frontmatter: true,
708 };
709 let src = "+++\nz = 1\na = 2\nm = \"x\"\n+++\nbody\n";
710 let once = format(src, &opts);
711 let twice = format(&once, &opts);
712 assert_eq!(once, twice);
713 }
714
715 #[test]
716 fn check_returns_unchanged_for_canonical_input() {
717 let src = "hello\n";
718 assert_eq!(check(src, &Opts::default()), CheckResult::Unchanged);
719 }
720
721 #[test]
722 fn check_returns_would_change_for_dirty_input() {
723 let src = "hello \n";
724 assert_eq!(check(src, &Opts::default()), CheckResult::WouldChange);
725 }
726
727 #[test]
728 fn nested_list_indentation_preserved() {
729 let src = "- top\n - nested\n - deeper\n";
730 assert_eq!(fmt(src), src);
731 }
732
733 #[test]
734 fn hard_break_backslash_preserved_after_trailing_ws_strip() {
735 let src = "line one \\ \nline two\n";
737 let out = fmt(src);
738 assert_eq!(out, "line one \\\nline two\n");
739 }
740
741 #[test]
742 fn empty_table_row_handled() {
743 let src = "@t\n|\n";
744 let out = fmt(src);
745 assert!(out.starts_with("@t\n|"));
749 }
750
751 #[test]
752 fn unicode_in_table_cells_aligns_by_codepoint() {
753 let src = "@t\n| α | b\n| longer | y\n";
756 let out = fmt(src);
757 assert!(out.contains("| α | b"));
758 assert!(out.contains("| longer | y"));
759 }
760
761 #[test]
762 fn comment_lines_stay_in_place() {
763 let src = "// a comment\n# heading\n";
764 assert_eq!(fmt(src), src);
767 }
768
769 #[test]
770 fn block_shortcode_unchanged() {
771 let src = "@callout(kind: warning)\nhello\n@end\n";
772 assert_eq!(fmt(src), src);
773 }
774
775 #[test]
776 fn fmt_idempotent_on_dl_example() {
777 let src = "@dl\nTerm 1\n: Definition of term 1.\nTerm 2\n: Definition of term 2.\n@end\n";
778 let once = fmt(src);
779 let twice = fmt(&once);
780 assert_eq!(once, twice, "fmt is not idempotent on @dl");
781 assert_eq!(once, src, "fmt rewrote canonical @dl source");
783 }
784}