use std::collections::BTreeMap;
#[derive(Debug, Clone, Default)]
pub struct Opts {
pub sort_frontmatter: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Region {
Default,
FrontmatterOpen,
FrontmatterBody,
FrontmatterClose,
CodeFenceOpen,
CodeFenceBody,
CodeFenceClose,
BlockComment,
TableDirective,
TableRow,
}
impl Region {
fn collapsible(self) -> bool {
matches!(self, Region::Default)
}
}
pub fn format(source: &str, opts: &Opts) -> String {
let source = source.strip_prefix('\u{feff}').unwrap_or(source);
if source.is_empty() {
return String::new();
}
let normalized = normalize_line_endings(source);
let raw_lines: Vec<&str> = normalized.split('\n').collect();
let lines: Vec<&str> = if raw_lines.last() == Some(&"") {
raw_lines[..raw_lines.len() - 1].to_vec()
} else {
raw_lines
};
if lines.is_empty() {
return String::new();
}
let regions = scan_regions(&lines);
debug_assert_eq!(regions.len(), lines.len());
let transformed = apply_transforms(&lines, ®ions, opts);
let collapsed = collapse_blanks(&transformed.lines, &transformed.regions);
let trimmed = trim_blank_edges(&collapsed.lines, &collapsed.regions);
if trimmed.is_empty() {
return String::new();
}
let mut out = trimmed.join("\n");
out.push('\n');
out
}
fn normalize_line_endings(s: &str) -> String {
let step1 = s.replace("\r\n", "\n");
step1.replace('\r', "\n")
}
fn scan_regions(lines: &[&str]) -> Vec<Region> {
let mut regions = vec![Region::Default; lines.len()];
let mut i = 0;
if !lines.is_empty() && lines[0].trim_end() == "+++" {
let mut close_idx = None;
for j in 1..lines.len() {
if lines[j].trim_end() == "+++" {
close_idx = Some(j);
break;
}
}
if let Some(j) = close_idx {
regions[0] = Region::FrontmatterOpen;
for k in 1..j {
regions[k] = Region::FrontmatterBody;
}
regions[j] = Region::FrontmatterClose;
i = j + 1;
}
}
while i < lines.len() {
let line = lines[i];
let (indent_len, _) = leading_indent(line);
let trimmed = &line[indent_len..];
let trimmed = trimmed.trim_end_matches(|c: char| c == ' ' || c == '\t');
if trimmed.starts_with("```") {
regions[i] = Region::CodeFenceOpen;
i += 1;
while i < lines.len() {
let inner = lines[i];
let (i_indent, _) = leading_indent(inner);
let inner_trim =
inner[i_indent..].trim_end_matches(|c: char| c == ' ' || c == '\t');
if inner_trim == "```" {
regions[i] = Region::CodeFenceClose;
i += 1;
break;
}
regions[i] = Region::CodeFenceBody;
i += 1;
}
continue;
}
if trimmed.starts_with("/*") {
regions[i] = Region::BlockComment;
if trimmed.ends_with("*/") && trimmed.len() >= 4 {
i += 1;
continue;
}
i += 1;
while i < lines.len() {
regions[i] = Region::BlockComment;
let inner = lines[i].trim_end_matches(|c: char| c == ' ' || c == '\t');
if inner.ends_with("*/") {
i += 1;
break;
}
i += 1;
}
continue;
}
if is_table_directive(trimmed) {
regions[i] = Region::TableDirective;
i += 1;
while i < lines.len() {
let (j_indent, _) = leading_indent(lines[i]);
let body = &lines[i][j_indent..];
if body.starts_with('|') {
regions[i] = Region::TableRow;
i += 1;
} else {
break;
}
}
continue;
}
i += 1;
}
regions
}
fn is_table_directive(trimmed: &str) -> bool {
trimmed == "@t" || trimmed.starts_with("@t ") || trimmed.starts_with("@t(")
}
fn leading_indent(line: &str) -> (usize, &str) {
let n = line
.bytes()
.take_while(|b| *b == b' ' || *b == b'\t')
.count();
(n, &line[..n])
}
struct PassResult {
lines: Vec<String>,
regions: Vec<Region>,
}
fn apply_transforms(lines: &[&str], regions: &[Region], opts: &Opts) -> PassResult {
let mut out_lines: Vec<String> = Vec::with_capacity(lines.len());
let mut out_regions: Vec<Region> = Vec::with_capacity(lines.len());
let mut i = 0;
while i < lines.len() {
match regions[i] {
Region::Default => {
out_lines.push(transform_brief_line(lines[i]));
out_regions.push(Region::Default);
i += 1;
}
Region::FrontmatterOpen => {
out_lines.push("+++".to_string());
out_regions.push(Region::FrontmatterOpen);
i += 1;
}
Region::FrontmatterClose => {
out_lines.push("+++".to_string());
out_regions.push(Region::FrontmatterClose);
i += 1;
}
Region::FrontmatterBody => {
let start = i;
while i < lines.len() && regions[i] == Region::FrontmatterBody {
i += 1;
}
let body_slice = &lines[start..i];
let processed = process_frontmatter_body(body_slice, opts);
for l in processed {
out_lines.push(l);
out_regions.push(Region::FrontmatterBody);
}
}
Region::CodeFenceOpen => {
out_lines.push(strip_trailing_ws(lines[i]).to_string());
out_regions.push(Region::CodeFenceOpen);
i += 1;
}
Region::CodeFenceClose => {
out_lines.push(strip_trailing_ws(lines[i]).to_string());
out_regions.push(Region::CodeFenceClose);
i += 1;
}
Region::CodeFenceBody => {
out_lines.push(lines[i].to_string());
out_regions.push(Region::CodeFenceBody);
i += 1;
}
Region::BlockComment => {
out_lines.push(strip_trailing_ws(lines[i]).to_string());
out_regions.push(Region::BlockComment);
i += 1;
}
Region::TableDirective => {
out_lines.push(transform_brief_line(lines[i]));
out_regions.push(Region::TableDirective);
i += 1;
}
Region::TableRow => {
let start = i;
while i < lines.len() && regions[i] == Region::TableRow {
i += 1;
}
let formatted = format_table(&lines[start..i]);
for l in formatted {
out_lines.push(l);
out_regions.push(Region::TableRow);
}
}
}
}
PassResult {
lines: out_lines,
regions: out_regions,
}
}
fn transform_brief_line(line: &str) -> String {
let stripped = strip_trailing_ws(line);
detab_indent(stripped)
}
fn strip_trailing_ws(line: &str) -> &str {
line.trim_end_matches(|c: char| c == ' ' || c == '\t' || c == '\r')
}
fn detab_indent(line: &str) -> String {
let (n, _) = leading_indent(line);
if n == 0 {
return line.to_string();
}
let prefix = &line[..n];
let rest = &line[n..];
let mut out = String::with_capacity(line.len() + 4);
for c in prefix.chars() {
if c == '\t' {
out.push(' ');
out.push(' ');
} else {
out.push(c);
}
}
out.push_str(rest);
out
}
fn process_frontmatter_body(body_lines: &[&str], opts: &Opts) -> Vec<String> {
if !opts.sort_frontmatter {
return body_lines
.iter()
.map(|l| strip_trailing_ws(l).to_string())
.collect();
}
let body: String = body_lines.join("\n");
let parsed: Result<toml::Table, _> = toml::from_str(&body);
let table = match parsed {
Ok(t) => t,
Err(_) => {
return body_lines
.iter()
.map(|l| strip_trailing_ws(l).to_string())
.collect();
}
};
let sorted: BTreeMap<String, toml::Value> = table.into_iter().collect();
let mut wrap: toml::Table = toml::Table::new();
for (k, v) in sorted {
wrap.insert(k, v);
}
let serialized = match toml::to_string(&wrap) {
Ok(s) => s,
Err(_) => {
return body_lines
.iter()
.map(|l| strip_trailing_ws(l).to_string())
.collect();
}
};
let trimmed = serialized.trim_end_matches('\n');
trimmed.split('\n').map(|s| s.to_string()).collect()
}
fn format_table(rows: &[&str]) -> Vec<String> {
if rows.is_empty() {
return Vec::new();
}
struct Parsed {
indent: String,
cells: Vec<String>,
}
let parsed: Vec<Parsed> = rows
.iter()
.map(|line| {
let stripped = strip_trailing_ws(line);
let (n, _) = leading_indent(stripped);
let indent_raw = &stripped[..n];
let indent: String = indent_raw
.chars()
.flat_map(|c| {
if c == '\t' {
vec![' ', ' '].into_iter()
} else {
vec![c].into_iter()
}
})
.collect();
let body = &stripped[n..];
let cells = parse_table_cells(body);
Parsed { indent, cells }
})
.collect();
let max_cols = parsed.iter().map(|p| p.cells.len()).max().unwrap_or(0);
if max_cols == 0 {
return parsed.iter().map(|p| format!("{}|", p.indent)).collect();
}
let mut widths = vec![0usize; max_cols];
for p in &parsed {
for (i, cell) in p.cells.iter().enumerate() {
let w = cell.chars().count();
if w > widths[i] {
widths[i] = w;
}
}
}
parsed
.iter()
.map(|p| {
let mut out = p.indent.clone();
out.push('|');
for (idx, cell) in p.cells.iter().enumerate() {
out.push(' ');
out.push_str(cell);
let pad = widths[idx].saturating_sub(cell.chars().count());
if idx + 1 < p.cells.len() {
for _ in 0..pad {
out.push(' ');
}
out.push(' ');
out.push('|');
}
}
out
})
.collect()
}
fn parse_table_cells(body: &str) -> Vec<String> {
let trimmed = body.trim_end_matches(|c: char| c == ' ' || c == '\t');
let trimmed = trimmed.trim_end_matches('|');
let inner = if let Some(rest) = trimmed.strip_prefix('|') {
rest
} else {
trimmed
};
inner.split('|').map(|s| s.trim().to_string()).collect()
}
fn collapse_blanks(lines: &[String], regions: &[Region]) -> PassResult {
let mut out_lines = Vec::with_capacity(lines.len());
let mut out_regions = Vec::with_capacity(lines.len());
let mut prev_was_blank = false;
for (line, region) in lines.iter().zip(regions.iter()) {
let is_blank = line.trim().is_empty();
if is_blank && region.collapsible() {
if prev_was_blank {
continue;
}
prev_was_blank = true;
out_lines.push(String::new());
out_regions.push(*region);
} else {
prev_was_blank = false;
out_lines.push(line.clone());
out_regions.push(*region);
}
}
PassResult {
lines: out_lines,
regions: out_regions,
}
}
fn trim_blank_edges(lines: &[String], regions: &[Region]) -> Vec<String> {
let mut start = 0;
while start < lines.len() && lines[start].trim().is_empty() && regions[start].collapsible() {
start += 1;
}
let mut end = lines.len();
while end > start && lines[end - 1].trim().is_empty() && regions[end - 1].collapsible() {
end -= 1;
}
lines[start..end].to_vec()
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CheckResult {
Unchanged,
WouldChange,
}
pub fn check(source: &str, opts: &Opts) -> CheckResult {
if format(source, opts) == source {
CheckResult::Unchanged
} else {
CheckResult::WouldChange
}
}
#[cfg(test)]
mod tests {
use super::*;
fn fmt(s: &str) -> String {
format(s, &Opts::default())
}
#[test]
fn empty_input_stays_empty() {
assert_eq!(fmt(""), "");
}
#[test]
fn single_line_gains_trailing_newline() {
assert_eq!(fmt("hello"), "hello\n");
}
#[test]
fn strips_trailing_whitespace() {
assert_eq!(fmt("hello \nworld\t \n"), "hello\nworld\n");
}
#[test]
fn collapses_runs_of_blank_lines() {
assert_eq!(fmt("a\n\n\n\nb\n"), "a\n\nb\n");
}
#[test]
fn trims_leading_and_trailing_blank_lines() {
assert_eq!(fmt("\n\nhello\n\n\n"), "hello\n");
}
#[test]
fn normalizes_crlf_to_lf() {
assert_eq!(fmt("a\r\nb\r\n"), "a\nb\n");
}
#[test]
fn normalizes_bare_cr_to_lf() {
assert_eq!(fmt("a\rb\r"), "a\nb\n");
}
#[test]
fn strips_leading_bom() {
assert_eq!(fmt("\u{feff}hello\n"), "hello\n");
}
#[test]
fn replaces_leading_tabs_with_two_spaces() {
assert_eq!(fmt("\t- item\n"), " - item\n");
assert_eq!(fmt("\t\t- item\n"), " - item\n");
}
#[test]
fn preserves_emphasis_markers_verbatim() {
let src = "*bold* and _underline_ and /italic/ and ~strike~\n";
assert_eq!(fmt(src), src);
}
#[test]
fn preserves_inline_shortcode_arg_order() {
let src = "see @link(href: \"x\", title: \"Y\")\n";
assert_eq!(fmt(src), src);
}
#[test]
fn preserves_code_fence_body_verbatim() {
let src = "```rust\n fn x ( ) { } \n```\n";
let out = fmt(src);
assert!(out.contains(" fn x ( ) { } "));
}
#[test]
fn aligns_table_columns() {
let src = "@t\n| Header | B\n| longcell | y\n| z | other\n";
let out = fmt(src);
let expected = "\
@t
| Header | B
| longcell | y
| z | other
";
assert_eq!(out, expected);
}
#[test]
fn table_alignment_handles_indent() {
let src = " @t\n | A | B\n | longer | y\n";
let out = fmt(src);
let expected = " @t\n | A | B\n | longer | y\n";
assert_eq!(out, expected);
}
#[test]
fn table_with_args() {
let src = "@t(align: [\"left\", \"right\"])\n| A | B\n| 1 | 22\n";
let out = fmt(src);
let expected = "@t(align: [\"left\", \"right\"])\n| A | B\n| 1 | 22\n";
assert_eq!(out, expected);
}
#[test]
fn frontmatter_passthrough_by_default() {
let src = "+++\nz = 1\na = 2\n+++\n# Doc\n";
assert_eq!(fmt(src), src);
}
#[test]
fn frontmatter_sort_when_opted_in() {
let opts = Opts {
sort_frontmatter: true,
};
let src = "+++\nz = 1\na = 2\n+++\n# Doc\n";
let out = format(src, &opts);
let header_end = out.find("+++\n").unwrap() + 4;
let between = &out[header_end..];
let close = between.find("+++").unwrap();
let body = &between[..close];
let a_pos = body.find("a = ").unwrap();
let z_pos = body.find("z = ").unwrap();
assert!(a_pos < z_pos, "frontmatter not sorted: {:?}", body);
}
#[test]
fn frontmatter_with_invalid_toml_unchanged_body() {
let opts = Opts {
sort_frontmatter: true,
};
let src = "+++\nfoo === 1\n+++\n";
let out = format(src, &opts);
assert!(out.contains("foo === 1"));
}
#[test]
fn frontmatter_unterminated_left_alone() {
let src = "+++\nfoo = 1\n";
let out = fmt(src);
assert!(out.contains("+++"));
assert!(out.contains("foo = 1"));
}
#[test]
fn block_comment_body_preserved() {
let src = "/*\n multi\n line\n*/\nbody\n";
let out = fmt(src);
assert!(out.contains(" multi"));
assert!(out.contains(" line"));
}
#[test]
fn does_not_collapse_blank_lines_inside_code_fence() {
let src = "```rust\nfn x() {\n\n\n}\n```\n";
let out = fmt(src);
assert!(out.contains("fn x() {\n\n\n}"));
}
#[test]
fn does_not_reorder_paragraphs() {
let src = "third\n\nfirst\n\nsecond\n";
assert_eq!(fmt(src), src);
}
#[test]
fn idempotent_on_already_formatted() {
let src = "# Hello\n\nThis is a paragraph.\n\n@t\n| A | B\n| 1 | 2\n";
let once = fmt(src);
let twice = fmt(&once);
assert_eq!(once, twice);
}
#[test]
fn idempotent_on_unformatted() {
let inputs = [
" # heading \n\n\n\nbody\n",
"@t\n| a | b\n| longercell | y\n",
"+++\nz = 1\na = 2\n+++\n# x\n",
"```rust\n fn x() {}\n```\n",
"/*\n comment\n*/\n# heading\n",
"\thello\n\t\tworld\n",
"",
];
for src in &inputs {
let once = fmt(src);
let twice = fmt(&once);
assert_eq!(once, twice, "not idempotent for: {:?}", src);
}
}
#[test]
fn idempotent_with_sort_frontmatter() {
let opts = Opts {
sort_frontmatter: true,
};
let src = "+++\nz = 1\na = 2\nm = \"x\"\n+++\nbody\n";
let once = format(src, &opts);
let twice = format(&once, &opts);
assert_eq!(once, twice);
}
#[test]
fn check_returns_unchanged_for_canonical_input() {
let src = "hello\n";
assert_eq!(check(src, &Opts::default()), CheckResult::Unchanged);
}
#[test]
fn check_returns_would_change_for_dirty_input() {
let src = "hello \n";
assert_eq!(check(src, &Opts::default()), CheckResult::WouldChange);
}
#[test]
fn nested_list_indentation_preserved() {
let src = "- top\n - nested\n - deeper\n";
assert_eq!(fmt(src), src);
}
#[test]
fn hard_break_backslash_preserved_after_trailing_ws_strip() {
let src = "line one \\ \nline two\n";
let out = fmt(src);
assert_eq!(out, "line one \\\nline two\n");
}
#[test]
fn empty_table_row_handled() {
let src = "@t\n|\n";
let out = fmt(src);
assert!(out.starts_with("@t\n|"));
}
#[test]
fn unicode_in_table_cells_aligns_by_codepoint() {
let src = "@t\n| α | b\n| longer | y\n";
let out = fmt(src);
assert!(out.contains("| α | b"));
assert!(out.contains("| longer | y"));
}
#[test]
fn comment_lines_stay_in_place() {
let src = "// a comment\n# heading\n";
assert_eq!(fmt(src), src);
}
#[test]
fn block_shortcode_unchanged() {
let src = "@callout(kind: warning)\nhello\n@end\n";
assert_eq!(fmt(src), src);
}
#[test]
fn fmt_idempotent_on_dl_example() {
let src = "@dl\nTerm 1\n: Definition of term 1.\nTerm 2\n: Definition of term 2.\n@end\n";
let once = fmt(src);
let twice = fmt(&once);
assert_eq!(once, twice, "fmt is not idempotent on @dl");
assert_eq!(once, src, "fmt rewrote canonical @dl source");
}
}