use super::ast::Node;
use super::tokens::{Token, TokenKind};
fn is_continuation_op(tok: &Token) -> bool {
matches!(tok.kind, TokenKind::Pipe | TokenKind::Comma)
|| (tok.kind == TokenKind::Operator && matches!(tok.value.as_str(), "&&" | "||"))
}
use super::trivia::TriviaKind;
use std::fmt;
#[derive(Debug, Clone)]
pub struct FormatOptions {
pub max_width: usize,
pub indent_width: usize,
}
impl Default for FormatOptions {
fn default() -> Self {
Self {
max_width: 100,
indent_width: 4,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FormatError {
Syntax { errors: Vec<String> },
Unsafe { reason: String },
}
impl fmt::Display for FormatError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
FormatError::Syntax { errors } => {
write!(f, "input has syntax errors: {}", errors.join("; "))
}
FormatError::Unsafe { reason } => {
write!(f, "formatter safety check failed: {reason}")
}
}
}
}
impl std::error::Error for FormatError {}
pub fn format_source(src: &str) -> Result<String, FormatError> {
format_source_with(src, &FormatOptions::default())
}
pub fn format_source_with(src: &str, options: &FormatOptions) -> Result<String, FormatError> {
let lexed = super::lex(src);
if !lexed.errors.is_empty() {
let errors = lexed.errors.iter().map(|e| e.to_string()).collect();
return Err(FormatError::Syntax { errors });
}
let parsed = super::parse_tokens(src, lexed.tokens);
if !parsed.errors.is_empty() {
let errors = parsed.errors.iter().map(|e| e.message.clone()).collect();
return Err(FormatError::Syntax { errors });
}
let doc = Emitter::new(&parsed.tokens).file();
let mut out = print(&doc, options);
if !out.is_empty() && !out.ends_with('\n') {
out.push('\n');
if verify_equivalent(&parsed.tokens, &parsed.script, &out).is_ok() {
return Ok(out);
}
out.pop();
}
verify_equivalent(&parsed.tokens, &parsed.script, &out)?;
Ok(out)
}
fn verify_equivalent(
input_tokens: &[Token],
input_root: &Node,
output: &str,
) -> Result<(), FormatError> {
let out_lexed = super::lex(output);
if !out_lexed.errors.is_empty() {
return Err(FormatError::Unsafe {
reason: "formatted output no longer parses cleanly".into(),
});
}
let significant = |tokens: &[Token]| -> Vec<(TokenKind, String)> {
tokens
.iter()
.filter(|t| t.kind != TokenKind::Eof)
.map(|t| (t.kind, t.value.clone()))
.collect()
};
if significant(input_tokens) != significant(&out_lexed.tokens) {
return Err(FormatError::Unsafe {
reason: "token sequence changed".into(),
});
}
let out_parsed = super::parse_tokens(output, out_lexed.tokens);
if !out_parsed.errors.is_empty() {
return Err(FormatError::Unsafe {
reason: "formatted output no longer parses cleanly".into(),
});
}
if !same_shape(input_root, &out_parsed.script) {
return Err(FormatError::Unsafe {
reason: "tree structure changed".into(),
});
}
Ok(())
}
fn same_shape(a: &Node, b: &Node) -> bool {
a.label() == b.label() && {
let (ca, cb) = (a.children(), b.children());
ca.len() == cb.len() && ca.iter().zip(&cb).all(|(x, y)| same_shape(x, y))
}
}
#[derive(Debug, Clone)]
enum Doc {
Text(String),
Raw(String),
Line,
SoftLine,
HardLine,
BlankLine,
Group(Vec<Doc>),
Indent(Vec<Doc>),
}
#[derive(Clone, Copy, PartialEq)]
enum Mode {
Flat,
Break,
}
fn print(doc: &Doc, options: &FormatOptions) -> String {
let mut out = String::new();
let mut col = 0usize;
let mut stack: Vec<(usize, Mode, &Doc)> = vec![(0, Mode::Break, doc)];
let mut fits_stack: Vec<&Doc> = Vec::new();
let newline = |out: &mut String, col: &mut usize, indent: usize| {
while out.ends_with(' ') {
out.pop();
}
out.push('\n');
let pad = indent * options.indent_width;
out.extend(std::iter::repeat_n(' ', pad));
*col = pad;
};
while let Some((indent, mode, d)) = stack.pop() {
match d {
Doc::Text(s) => {
out.push_str(s);
col += s.chars().count();
}
Doc::Raw(s) => {
out.push_str(s);
col = match s.rfind('\n') {
Some(i) => s[i + 1..].chars().count(),
None => col + s.chars().count(),
};
}
Doc::Line => match mode {
Mode::Flat => {
out.push(' ');
col += 1;
}
Mode::Break => newline(&mut out, &mut col, indent),
},
Doc::SoftLine => {
if mode == Mode::Break {
newline(&mut out, &mut col, indent);
}
}
Doc::HardLine => newline(&mut out, &mut col, indent),
Doc::BlankLine => {
while out.ends_with(' ') {
out.pop();
}
out.push('\n');
newline(&mut out, &mut col, indent);
}
Doc::Group(items) => {
let chosen = if mode == Mode::Flat
|| fits(
options.max_width.saturating_sub(col),
items,
&mut fits_stack,
) {
Mode::Flat
} else {
Mode::Break
};
for item in items.iter().rev() {
stack.push((indent, chosen, item));
}
}
Doc::Indent(items) => {
for item in items.iter().rev() {
stack.push((indent + 1, mode, item));
}
}
}
}
out
}
fn fits<'a>(remaining: usize, docs: &'a [Doc], stack: &mut Vec<&'a Doc>) -> bool {
let mut budget = remaining as isize;
stack.clear();
stack.extend(docs.iter().rev());
while let Some(d) = stack.pop() {
match d {
Doc::Text(s) => {
if (s.len() as isize) > budget {
return false;
}
budget -= s.chars().count() as isize;
}
Doc::Raw(s) => {
if s.contains('\n') || (s.len() as isize) > budget {
return false;
}
budget -= s.chars().count() as isize;
}
Doc::Line => budget -= 1,
Doc::SoftLine => {}
Doc::HardLine | Doc::BlankLine => return false,
Doc::Group(items) | Doc::Indent(items) => {
for item in items.iter().rev() {
stack.push(item);
}
}
}
if budget < 0 {
return false;
}
}
true
}
#[derive(Debug, Clone)]
enum GapItem {
Newline,
Comment(String, bool, bool, bool),
GluedContinuation(String),
}
struct Gap {
items: Vec<GapItem>,
adjacent: bool,
}
struct GapInfo {
comments: Vec<(usize, String, bool, bool, bool)>,
newlines_before_token: usize,
}
impl Gap {
fn newline_count(&self) -> usize {
self.items
.iter()
.filter(|i| matches!(i, GapItem::Newline))
.count()
}
fn clear_comments(&mut self) {
self.items.retain(|i| matches!(i, GapItem::Newline));
}
fn info(&self) -> GapInfo {
let mut comments = Vec::new();
let mut newlines = 0usize;
for item in &self.items {
match item {
GapItem::Newline => newlines += 1,
GapItem::Comment(text, block, gl, gr) => {
comments.push((newlines, text.clone(), *block, *gl, *gr));
newlines = 0;
}
GapItem::GluedContinuation(text) => {
comments.push((newlines, text.clone(), true, true, true));
newlines = 0;
}
}
}
GapInfo {
comments,
newlines_before_token: newlines,
}
}
}
struct Emitter<'a> {
tokens: &'a [Token],
gaps: Vec<Gap>,
pos: usize,
structured: bool,
}
impl<'a> Emitter<'a> {
fn new(tokens: &'a [Token]) -> Self {
let mut gaps = Vec::with_capacity(tokens.len());
for (i, tok) in tokens.iter().enumerate() {
let mut raw: Vec<(&TriviaKind, &str, super::span::Span)> = Vec::new();
if i > 0 {
for t in &tokens[i - 1].trailing {
raw.push((&t.kind, &t.text, t.span));
}
}
for t in &tok.leading {
raw.push((&t.kind, &t.text, t.span));
}
let mut items = Vec::new();
let mut left_end: Option<usize> = (i > 0).then(|| tokens[i - 1].span.end);
for (idx, (kind, text, span)) in raw.iter().enumerate() {
let right_start = raw
.get(idx + 1)
.map(|(_, _, s)| s.start)
.unwrap_or(tok.span.start);
let glued_left = left_end == Some(span.start);
let glued_right = span.end == right_start;
match kind {
TriviaKind::Newline => items.push(GapItem::Newline),
TriviaKind::LineComment => {
items.push(GapItem::Comment(
(*text).to_string(),
false,
glued_left,
glued_right,
));
}
TriviaKind::BlockComment => {
items.push(GapItem::Comment(
(*text).to_string(),
true,
glued_left,
glued_right,
));
}
TriviaKind::LineContinuation => {
if glued_left && glued_right {
items.push(GapItem::GluedContinuation((*text).to_string()));
}
}
TriviaKind::Whitespace => {}
}
left_end = match kind {
TriviaKind::Whitespace | TriviaKind::Newline => None,
_ => Some(span.end),
};
}
let adjacent = i > 0
&& tokens[i - 1].trailing.is_empty()
&& tok.leading.is_empty()
&& tokens[i - 1].span.end == tok.span.start;
gaps.push(Gap { items, adjacent });
}
Self {
structured: brackets_balanced(tokens),
tokens,
gaps,
pos: 0,
}
}
fn file(mut self) -> Doc {
let body = self.level(None);
Doc::Group(body)
}
fn cur(&self) -> &Token {
&self.tokens[self.pos]
}
fn at_closer_of(&self, closer: Option<TokenKind>) -> bool {
match closer {
Some(kind) => self.cur().kind == kind,
None => false,
}
}
fn push_break(out: &mut Vec<Doc>, newlines: usize, glued: bool, emitted: bool) {
if !emitted {
return;
}
out.push(match newlines {
0 if glued => Doc::Text(String::new()),
0 => Doc::Text(" ".into()),
1 => Doc::HardLine,
_ => Doc::BlankLine,
});
}
fn level(&mut self, closer: Option<TokenKind>) -> Vec<Doc> {
let mut out: Vec<Doc> = Vec::new();
let mut emitted = false;
loop {
let info = self.gaps[self.pos].info();
if !info.comments.is_empty() {
self.gaps[self.pos].clear_comments();
}
let mut token_glued = false;
for (newlines, text, block, glued_left, glued_right) in &info.comments {
Self::push_break(&mut out, *newlines, *glued_left, emitted);
out.push(comment_doc(text, *block));
emitted = true;
token_glued = *glued_right;
}
if self.cur().kind == TokenKind::Eof || self.at_closer_of(closer) {
break;
}
Self::push_break(&mut out, info.newlines_before_token, token_glued, emitted);
let line = self.line(closer);
out.push(Doc::Group(line));
emitted = true;
}
out
}
fn line(&mut self, closer: Option<TokenKind>) -> Vec<Doc> {
let mut out: Vec<Doc> = Vec::new();
let mut first = true;
loop {
let tok_kind = self.cur().kind;
if tok_kind == TokenKind::Eof || self.at_closer_of(closer) {
break;
}
if !first && self.gaps[self.pos].newline_count() > 0 {
if !is_continuation_op(&self.tokens[self.pos - 1]) {
break; }
}
if !first {
out.push(self.separator());
}
first = false;
match tok_kind {
TokenKind::LParen
| TokenKind::LBracket
| TokenKind::LBrace
| TokenKind::DollarParen
| TokenKind::AtParen
| TokenKind::AtBrace
if self.structured =>
{
let doc = self.bracketed();
out.push(doc);
}
_ => {
out.push(token_doc(self.cur()));
self.pos += 1;
}
}
}
out
}
fn bracketed(&mut self) -> Doc {
let open_value = self.cur().value.clone();
let close_kind = match self.cur().kind {
TokenKind::LParen | TokenKind::DollarParen | TokenKind::AtParen => TokenKind::RParen,
TokenKind::LBracket => TokenKind::RBracket,
_ => TokenKind::RBrace,
};
let spaced = matches!(self.cur().kind, TokenKind::LBrace | TokenKind::AtBrace);
self.pos += 1;
let opener_break = self.gaps[self.pos].newline_count() > 0;
let inner = self.level(Some(close_kind));
let has_close = self.cur().kind == close_kind;
if has_close {
self.pos += 1; }
let edge = || -> Doc {
if opener_break {
Doc::HardLine
} else if spaced {
Doc::Line
} else {
Doc::SoftLine
}
};
let mut docs = vec![Doc::Text(open_value)];
if !inner.is_empty() {
let mut indented = vec![edge()];
indented.extend(inner);
docs.push(Doc::Indent(indented));
docs.push(edge());
}
if has_close {
docs.push(Doc::Text(close_token_value(close_kind)));
}
Doc::Group(docs)
}
fn separator(&mut self) -> Doc {
let gap = &self.gaps[self.pos];
let info = gap.info();
if !info.comments.is_empty() {
let mut pieces: Vec<Doc> = Vec::new();
let mut last_glued_right = false;
for (_, text, block, glued_left, glued_right) in &info.comments {
if !*glued_left {
pieces.push(Doc::Text(" ".into()));
}
pieces.push(comment_doc(text, *block));
last_glued_right = *glued_right;
}
if !last_glued_right {
pieces.push(Doc::Text(" ".into()));
}
return Doc::Group(pieces);
}
if gap.adjacent {
return Doc::Text(String::new());
}
let prev = &self.tokens[self.pos - 1];
let cur = self.cur();
if matches!(cur.kind, TokenKind::Comma | TokenKind::Semicolon) {
return Doc::Text(String::new());
}
if is_continuation_op(prev) {
return Doc::Indent(vec![Doc::Line]);
}
if prev.kind == TokenKind::Semicolon {
return Doc::Line;
}
Doc::Text(" ".into())
}
}
fn brackets_balanced(tokens: &[Token]) -> bool {
let mut stack: Vec<TokenKind> = Vec::new();
for tok in tokens {
match tok.kind {
TokenKind::LParen | TokenKind::DollarParen | TokenKind::AtParen => {
stack.push(TokenKind::RParen)
}
TokenKind::LBracket => stack.push(TokenKind::RBracket),
TokenKind::LBrace | TokenKind::AtBrace => stack.push(TokenKind::RBrace),
TokenKind::RParen | TokenKind::RBracket | TokenKind::RBrace
if stack.pop() != Some(tok.kind) =>
{
return false;
}
_ => {}
}
}
stack.is_empty()
}
fn token_doc(tok: &Token) -> Doc {
if tok.value.contains('\n') {
Doc::Raw(tok.value.clone())
} else {
Doc::Text(tok.value.clone())
}
}
fn comment_doc(text: &str, block: bool) -> Doc {
if block && text.contains('\n') {
Doc::Raw(text.to_string())
} else {
Doc::Text(text.to_string())
}
}
fn close_token_value(kind: TokenKind) -> String {
match kind {
TokenKind::RParen => ")",
TokenKind::RBracket => "]",
_ => "}",
}
.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
fn fmt(src: &str) -> String {
format_source(src).unwrap_or_else(|e| panic!("format failed for {src:?}: {e}"))
}
#[test]
fn normalizes_spacing_and_indentation() {
assert_eq!(fmt("ls -la\n"), "ls -la\n");
assert_eq!(
fmt("if ($x) {\nWrite-Output $x\n}\n"),
"if ($x) {\n Write-Output $x\n}\n"
);
assert_eq!(
fmt("if ($x) {\n\t\t\tls\n pwd\n}\n"),
"if ($x) {\n ls\n pwd\n}\n"
);
}
#[test]
fn collapses_blank_lines_to_one() {
assert_eq!(fmt("ls\n\n\n\npwd\n"), "ls\n\npwd\n");
}
#[test]
fn keeps_adjacent_tokens_glued() {
assert_eq!(fmt("$x.Length\n"), "$x.Length\n");
assert_eq!(fmt("$a[0]\n"), "$a[0]\n");
assert_eq!(fmt("[int]::MaxValue\n"), "[int]::MaxValue\n");
}
#[test]
fn comments_survive_in_place() {
let src = "# header\nls # trailing\n\n# standalone\npwd\n";
assert_eq!(fmt(src), "# header\nls # trailing\n\n# standalone\npwd\n");
let block = "<#\n multi\n line\n#>\nls\n";
assert_eq!(fmt(block), block);
}
#[test]
fn here_strings_and_verbatim_args_untouched() {
let here = "$x = @'\n raw spacing\n'@\n";
assert_eq!(fmt(here), here);
let verbatim = "icacls --% C:\\Program Files\\* /grant x # not a comment\n";
assert_eq!(fmt(verbatim), verbatim);
}
#[test]
fn long_pipelines_break_at_pipes() {
let src = "Get-ChildItem -Path C:\\some\\deep\\path -Recurse | Where-Object { $_.Length -gt 1000000 } | Sort-Object Length | Select-Object -First 10\n";
let out = format_source_with(
src,
&FormatOptions {
max_width: 60,
indent_width: 4,
},
)
.unwrap();
assert!(
out.lines().count() > 1,
"expected a wrapped pipeline:\n{out}"
);
for line in out.lines() {
assert!(line.chars().count() <= 60 || line.contains('\''), "{line}");
}
assert!(out.contains("|\n"));
}
#[test]
fn short_blocks_stay_flat_long_ones_break() {
assert_eq!(
fmt("gci | Where-Object { $_.x }\n"),
"gci | Where-Object { $_.x }\n"
);
let out = format_source_with(
"gci | Where-Object { $_.VeryLongPropertyName -gt $someThreshold }\n",
&FormatOptions {
max_width: 30,
indent_width: 4,
},
)
.unwrap();
assert!(out.contains("{\n"), "expected block to break:\n{out}");
}
#[test]
fn continuations_are_joined() {
assert_eq!(fmt("ls `\n -la `\n /tmp\n"), "ls -la /tmp\n");
}
#[test]
fn refuses_syntax_errors() {
match format_source("'unterminated\n") {
Err(FormatError::Syntax { .. }) => {}
other => panic!("expected Syntax error, got {other:?}"),
}
}
#[test]
fn wrapped_continuations_with_here_strings_are_idempotent() {
for src in [
"cmd @\"\nx\n\"@ | bar\n",
"$x | Get-Item @\"\ny\n\"@\n",
"foo @\"\nh\n\"@ | bar | baz\n",
"$a && cmd @\"\nx\n\"@\n",
] {
let once = format_source(src).unwrap();
let twice = format_source(&once).unwrap();
assert_eq!(once, twice, "not idempotent: {src:?}");
}
let comma = "a, @\"\nx\n\"@, b\n";
assert!(
format_source(comma).is_err(),
"comma+here-string should decline"
);
}
#[test]
fn semicolon_separated_statements_are_idempotent() {
for src in [
"0; -d @\"\nx\n\"@\n",
"$x = 1; Get-Item @\"\ndata\n\"@\n",
"$a = 1; $b = 2\n",
"ls; cd; pwd\n",
"foo | bar; baz\n",
] {
let once = format_source(src).unwrap();
let twice = format_source(&once).unwrap();
assert_eq!(once, twice, "not idempotent: {src:?}");
}
}
#[test]
fn idempotent_over_corpus() {
let corpus = [
"function Get-Thing {\n param([string]$Name)\n process { $Name }\n}\n",
"$h = @{ a = 1; b = @(2, 3) }\n",
"try { risky } catch [System.Exception] { recover } finally { done }\n",
"foreach ($f in gci) {\n Write-Host $f.Name # each\n}\n",
"switch ($x) {\n 1 { 'one' }\n default { 'other' }\n}\n",
"\"interpolated $x and $($y.Prop) end\"\n",
"Test-Path $p && ls || Write-Error 'no'\n",
"$x = $a ?? $b; $i++\n",
];
for src in corpus {
let once = fmt(src);
let twice = fmt(&once);
assert_eq!(once, twice, "not idempotent for {src:?}");
}
}
#[test]
fn empty_and_comment_only_inputs() {
assert_eq!(fmt(""), "");
assert_eq!(fmt("# only a comment"), "# only a comment\n");
assert_eq!(fmt(" \n\n \n"), "");
}
#[test]
fn pathological_glue_is_preserved() {
let cases = [
"Y?c<_#2]_-//_[`\na0ab/`+c;X<$b`(/&,2#*#'&",
"b_2]#\"'}`,b&/{|/a|,0+? ;.0]&c)--?[`*(=/-c",
"&Z``?>_+-]`'a|1*c1 .?}*:#'1]@'+Y",
];
for src in cases {
assert_eq!(
fmt(src),
format!("{src}\n"),
"pathological glue was altered"
);
}
}
#[test]
fn fuzz_no_panics_accepted_inputs_idempotent() {
fn next(s: &mut u64) -> u64 {
*s ^= *s << 13;
*s ^= *s >> 7;
*s ^= *s << 17;
*s
}
let charset: Vec<char> = "$@{}()[]| ;,.\"'`#-=+*/<>?:&\nabcXYZ012_".chars().collect();
let mut state: u64 = 0x5EED;
let mut accepted = 0;
for _ in 0..600 {
let len = (next(&mut state) % 40) as usize;
let src: String = (0..len)
.map(|_| charset[(next(&mut state) as usize) % charset.len()])
.collect();
if let Ok(once) = format_source(&src) {
accepted += 1;
let twice = format_source(&once).unwrap_or_else(|e| {
panic!("accepted output failed to reformat ({e}): {src:?}")
});
assert_eq!(once, twice, "not idempotent for {src:?}");
}
}
assert!(accepted > 10, "fuzz accepted too few inputs: {accepted}");
}
}