use anyhow::Result;
use clap::Parser;
use fancy_regex::{Captures, Regex};
use lazy_static::lazy_static;
use log::debug;
use markdown_table_formatter::format_tables;
use std::{
fs::File,
io::{self, Read, Write},
path::PathBuf,
};
#[derive(Parser)]
#[command(
name = "mdformat",
version,
about = "Formats Markdown code with consistent empty lines and spacing"
)]
struct CliArgs {
input: Option<PathBuf>,
#[arg(short, long)]
output: Option<PathBuf>,
#[arg(short, long, default_value_t = 4, value_parser = clap::value_parser!(usize))]
indent: usize,
}
fn main() -> Result<()> {
let args = CliArgs::parse();
let mut content = String::new();
match &args.input {
Some(path) => File::open(path)?.read_to_string(&mut content)?,
None => io::stdin().read_to_string(&mut content)?,
};
let formatted = format_markdown(&content);
match &args.output {
Some(path) => File::create(path)?.write_all(formatted.as_bytes())?,
None => io::stdout().write_all(formatted.as_bytes())?,
};
Ok(())
}
fn format_markdown(text: &str) -> String {
let lines = text
.trim()
.lines()
.map(|line| line.trim_end())
.collect::<Vec<_>>();
let new_lines = format_lines(lines);
let mut ret = new_lines.join("\n");
ret = format_tables(&ret);
if !ret.ends_with('\n') {
ret.push('\n');
}
ret
}
#[derive(Debug, PartialEq, Clone)]
enum LineState {
Normal,
Table,
CodeStart,
CodeEnd,
Code,
Empty,
Header,
}
fn get_line_state(line: &str, prev_state: LineState) -> LineState {
if prev_state == LineState::CodeStart || prev_state == LineState::Code {
if line.starts_with("```") {
return LineState::CodeEnd;
} else {
return LineState::Code;
}
}
if line.is_empty() {
return LineState::Empty;
}
if line.starts_with("```") {
return LineState::CodeStart;
}
if line.starts_with('#') {
return LineState::Header;
}
if line.starts_with('|') {
return LineState::Table;
}
LineState::Normal
}
fn format_lines(lines: Vec<&str>) -> Vec<String> {
let mut ret = vec![];
let mut prev_line_state = LineState::Normal;
for line in lines.iter() {
let mut cur_state = get_line_state(&line, prev_line_state.clone());
debug!("{:?}: {}", cur_state, line);
match cur_state {
LineState::Normal => {
if prev_line_state == LineState::Table || prev_line_state == LineState::CodeEnd {
ret.push(String::new());
}
ret.push(format_line(line));
}
LineState::CodeStart => {
if prev_line_state != LineState::Empty {
ret.push(String::new());
}
ret.push(line.to_string());
}
LineState::Code | LineState::CodeEnd => {
ret.push(line.to_string());
}
LineState::Table => {
if prev_line_state != LineState::Table && prev_line_state != LineState::Empty {
ret.push(String::new());
}
ret.push(format_line(line));
}
LineState::Empty => {
if prev_line_state != LineState::Empty {
ret.push(String::new());
}
}
LineState::Header => {
ret.push(format_line(line));
ret.push(String::new());
cur_state = LineState::Empty;
}
}
prev_line_state = cur_state;
}
ret
}
fn format_line(line: &str) -> String {
format_text(line)
}
fn format_text(text: &str) -> String {
let mut text = add_spaces_between_cjk_ascii(text);
text = add_spaces_between_cjk_ascii(&text);
text = add_space_around_code_spans(&text);
text = add_space_around_code_spans(&text);
text
}
lazy_static! {
static ref RE_CJK: Regex =
Regex::new(r"(\p{sc=Han})([a-zA-Z0-9])|([a-zA-Z0-9])(\p{sc=Han})").unwrap();
static ref RE_CODE_SPAN: Regex = Regex::new(r"([^`\s]?)(`[^`]*`)([^`\s]?)").unwrap();
}
fn add_spaces_between_cjk_ascii(text: &str) -> String {
RE_CJK
.replace_all(text, |caps: &Captures| {
if let Some(cjk) = caps.get(1) {
format!("{} {}", cjk.as_str(), &caps[2])
} else {
format!("{} {}", caps.get(3).unwrap().as_str(), &caps[4])
}
})
.to_string()
}
fn add_space_around_code_spans(text: &str) -> String {
RE_CODE_SPAN
.replace_all(text, |caps: &Captures| {
let before = caps.get(1).unwrap().as_str();
let code = caps.get(2).unwrap().as_str();
let after = caps.get(3).unwrap().as_str();
debug!("before: [{}], code: [{}], after: [{}]", before, code, after);
if before.is_empty() && after.is_empty() {
return format!("{}", code);
} else if before.is_empty() {
return format!("{} {}", code, after);
} else if after.is_empty() {
return format!("{} {}", before, code);
} else {
return format!("{} {} {}", before, code, after);
}
})
.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_align_table() {
let fmt_md = format_markdown("|a|b|\n|---|---|\n| column 1 | column 2 |");
assert_eq!(
fmt_md,
"\n| a | b |\n| -------- | -------- |\n| column 1 | column 2 |\n"
);
}
#[test]
fn test_insert_empty_line_for_title() {
let fmt_md = format_markdown("# title 1\n## title2\n### title3");
assert_eq!(fmt_md, "# title 1\n\n## title2\n\n### title3\n");
}
#[test]
fn test_insert_empty_line_for_table() {
let fmt_md = format_markdown(
"# title 1\ntext\n| aaa | bbb |\n| --- | --- |\n| 123 | 456 |\nline text",
);
assert_eq!(
fmt_md,
"# title 1\n\ntext\n\n| aaa | bbb |\n| --- | --- |\n| 123 | 456 |\n\nline text\n"
);
}
#[test]
fn test_insert_space() {
let fmt_md = format_markdown("# 123你好2谢谢hello`你好call function()`$text谢谢$谢谢");
assert_eq!(
fmt_md,
"# 123 你好 2 谢谢 hello `你好 call function()` $text 谢谢$谢谢\n"
);
}
#[test]
fn test_join_empty_lines() {
let fmt_md = format_markdown("line1\n\n\nline2\n\n \n \nline3");
assert_eq!(fmt_md, "line1\n\nline2\n\nline3\n");
}
#[test]
fn test_code_block() {
let input = r#"pre text
```
$ brew install ripgrep
```
after text
"#;
let fmt_md = format_markdown(input);
assert_eq!(
fmt_md,
r#"pre text
```
$ brew install ripgrep
```
after text
"#
);
}
#[test]
fn test_code_span() {
env_logger::init();
let input = "`start`ignored `by` your `.gitignore`/`.ignore`/`.rgignore` files`end`";
let fmt_md = format_markdown(input);
assert_eq!(
fmt_md,
"`start` ignored `by` your `.gitignore` / `.ignore` / `.rgignore` files `end`\n"
);
}
}