use std::fmt::Write as _;
use rlsp_fmt::{
Doc, FormatOptions, concat, flat_alt, format as fmt_format, group, hard_line, indent, join,
line, text,
};
use rlsp_yaml_parser::CollectionStyle;
use rlsp_yaml_parser::node::{Document, Node};
use rlsp_yaml_parser::{Chomp, ScalarStyle, Span};
use crate::server::YamlVersion;
#[derive(Debug, Clone)]
struct Comment {
line: usize,
text: String,
}
fn find_comment_on_line(line: &str) -> Option<(usize, String)> {
let mut in_single = false;
let mut in_double = false;
let mut chars = line.char_indices();
while let Some((byte_pos, c)) = chars.next() {
match c {
'\'' if !in_double => {
in_single = !in_single;
}
'"' if !in_single => {
in_double = !in_double;
}
'\\' if in_double => {
chars.next();
}
'#' if !in_single && !in_double => {
let before = &line[..byte_pos];
if before.trim_end().is_empty() || before.ends_with(|c: char| c.is_whitespace()) {
return Some((byte_pos, line[byte_pos..].to_string()));
}
}
_ => {}
}
}
None
}
fn content_signature(line: &str) -> String {
if let Some((byte_pos, _)) = find_comment_on_line(line) {
line[..byte_pos].trim().to_string()
} else {
line.trim().to_string()
}
}
struct ContentEntry {
signature: String,
blank_lines_before: usize,
leading: Vec<String>,
}
fn last_content_line_from_ast(docs: &[Document<Span>]) -> Option<usize> {
fn node_last_content_line(node: &Node<Span>) -> Option<usize> {
match node {
Node::Scalar {
style: ScalarStyle::Literal(_) | ScalarStyle::Folded(_),
loc,
value,
..
} => {
let start_0 = loc.start.line.saturating_sub(1);
let line_count = value.lines().count();
Some(start_0 + line_count.saturating_sub(1))
}
Node::Scalar { loc, .. } | Node::Alias { loc, .. } => {
Some(loc.start.line.saturating_sub(1))
}
Node::Mapping { entries, .. } => entries
.iter()
.flat_map(|(k, v)| [node_last_content_line(k), node_last_content_line(v)])
.flatten()
.max(),
Node::Sequence { items, .. } => items.iter().filter_map(node_last_content_line).max(),
}
}
docs.iter()
.filter_map(|doc| node_last_content_line(&doc.root))
.max()
}
fn last_content_line_idx(
original: &str,
line_to_comment: &std::collections::HashMap<usize, &Comment>,
) -> Option<usize> {
original
.lines()
.enumerate()
.filter(|(idx, line)| {
!line.trim().is_empty()
&& (!line.trim_start().starts_with('#') || line_to_comment.contains_key(idx))
})
.map(|(idx, _)| idx)
.last()
}
fn attach_comments(
original: &str,
formatted: &str,
comments: &[Comment],
last_content_hint: Option<usize>,
) -> String {
let line_to_comment: std::collections::HashMap<usize, &Comment> =
comments.iter().map(|c| (c.line, c)).collect();
let last_content_idx = last_content_line_idx(original, &line_to_comment)
.map(|t| last_content_hint.map_or(t, |h| t.max(h)))
.or(last_content_hint);
let mut entries: Vec<ContentEntry> = Vec::new();
let mut pending_leading: Vec<String> = Vec::new();
let mut pending_blanks: usize = 0;
let mut first_entry = true;
for (idx, line) in original.lines().enumerate() {
if let Some(comment) = line_to_comment.get(&idx) {
if pending_blanks > 0 {
pending_leading.push(String::new());
}
pending_blanks = 0;
pending_leading.push(comment.text.clone());
} else if line.is_empty() {
pending_blanks += 1;
} else if line.trim_start().starts_with('#')
&& last_content_idx.is_some_and(|last| idx > last)
{
if pending_blanks > 0 {
pending_leading.push(String::new());
}
pending_blanks = 0;
pending_leading.push(line.trim().to_string());
} else {
entries.push(ContentEntry {
signature: content_signature(line),
blank_lines_before: if first_entry {
0
} else {
pending_blanks.min(1)
},
leading: std::mem::take(&mut pending_leading),
});
first_entry = false;
pending_blanks = 0;
}
}
let trailing_leading = pending_leading;
let mut result_lines: Vec<String> = Vec::new();
let mut entry_iter = entries.iter();
let mut next_entry = entry_iter.next();
for fmt_line in formatted.lines() {
let fmt_sig = content_signature(fmt_line);
if fmt_sig.is_empty() {
if matches!(next_entry, Some(e) if e.signature.is_empty()) {
if let Some(e) = next_entry {
if e.blank_lines_before > 0 {
result_lines.push(String::new());
}
}
next_entry = entry_iter.next();
}
result_lines.push(fmt_line.to_string());
continue;
}
let mut carried_blanks = 0usize;
while matches!(next_entry, Some(e) if e.signature.is_empty()) {
if let Some(e) = next_entry {
carried_blanks = carried_blanks.max(e.blank_lines_before);
}
next_entry = entry_iter.next();
}
if let Some(entry) = next_entry {
if entry.signature == fmt_sig {
let indent_len = fmt_line.len() - fmt_line.trim_start().len();
let indent_str = " ".repeat(indent_len);
let last_is_blank = result_lines.last().is_some_and(String::is_empty);
if (entry.blank_lines_before > 0 || carried_blanks > 0) && !last_is_blank {
result_lines.push(String::new());
}
for lc in &entry.leading {
if lc.is_empty() {
result_lines.push(String::new());
} else {
result_lines.push(format!("{indent_str}{lc}"));
}
}
result_lines.push(fmt_line.to_string());
next_entry = entry_iter.next();
continue;
}
}
result_lines.push(fmt_line.to_string());
}
for lc in &trailing_leading {
if lc.is_empty() {
result_lines.push(String::new());
} else {
result_lines.push(lc.clone());
}
}
let mut out = result_lines.join("\n");
if !out.ends_with('\n') {
out.push('\n');
}
out
}
#[derive(Debug, Clone)]
#[expect(
clippy::struct_excessive_bools,
reason = "each bool is a distinct, well-named formatting option; a flags enum would add complexity for no benefit"
)]
pub struct YamlFormatOptions {
pub print_width: usize,
pub tab_width: usize,
pub use_tabs: bool,
pub single_quote: bool,
pub bracket_spacing: bool,
pub yaml_version: YamlVersion,
pub format_enforce_block_style: bool,
pub format_remove_duplicate_keys: bool,
}
impl Default for YamlFormatOptions {
fn default() -> Self {
Self {
print_width: 80,
tab_width: 2,
use_tabs: false,
single_quote: false,
bracket_spacing: true,
yaml_version: YamlVersion::V1_2,
format_enforce_block_style: false,
format_remove_duplicate_keys: false,
}
}
}
#[must_use]
pub fn format_yaml(text_input: &str, options: &YamlFormatOptions) -> String {
let documents: Vec<Document<Span>> = match rlsp_yaml_parser::load(text_input) {
Ok(docs) => docs,
Err(_) => return text_input.to_string(),
};
if documents.is_empty() {
return String::new();
}
let prefix_comments = extract_doc_prefix_comments(text_input);
let fmt_options = FormatOptions {
print_width: options.print_width,
tab_width: options.tab_width,
use_tabs: options.use_tabs,
};
let documents: Vec<Document<Span>> = if options.format_remove_duplicate_keys {
documents
.into_iter()
.map(|mut doc| {
dedup_mapping_keys(&mut doc.root);
doc
})
.collect()
} else {
documents
};
let doc_marker = text("---");
let end_marker = text("...");
let mut parts: Vec<Doc> = Vec::new();
for (i, doc) in documents.iter().enumerate() {
let is_first = i == 0;
let needs_start_marker = !is_first || doc.explicit_start;
if needs_start_marker {
if !parts.is_empty() {
parts.push(hard_line());
}
parts.push(doc_marker.clone());
parts.push(hard_line());
}
parts.push(node_to_doc(&doc.root, options, false));
if doc.explicit_end {
parts.push(hard_line());
parts.push(end_marker.clone());
}
}
let joined = concat(parts);
let mut result = fmt_format(&joined, &fmt_options);
if !result.ends_with('\n') {
result.push('\n');
}
let last_content_hint = last_content_line_from_ast(&documents);
result = attach_comments(text_input, &result, &prefix_comments, last_content_hint);
result
}
fn extract_doc_prefix_comments(text: &str) -> Vec<Comment> {
let mut comments = Vec::new();
for (line_idx, line) in text.lines().enumerate() {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
if let Some((byte_pos, comment_text)) = find_comment_on_line(line) {
let before = &line[..byte_pos];
if before.trim().is_empty() {
comments.push(Comment {
line: line_idx,
text: comment_text,
});
continue;
}
}
break;
}
comments
}
#[expect(
clippy::too_many_lines,
reason = "comprehensive match over all node variants"
)]
fn node_to_doc(node: &Node<Span>, options: &YamlFormatOptions, in_key: bool) -> Doc {
match node {
Node::Scalar {
value,
style,
anchor,
tag,
..
} => {
let tag_prefix = tag.as_ref().and_then(|t| {
if is_core_schema_tag(t) {
if value.is_empty() {
let suffix = t.trim_start_matches("tag:yaml.org,2002:");
Some(format!("!!{suffix}"))
} else {
None
}
} else {
let formatted = format_tag(t);
if value.is_empty() {
Some(formatted)
} else {
Some(format!("{formatted} "))
}
}
});
let scalar_doc = match style {
ScalarStyle::Literal(_) | ScalarStyle::Folded(_) => {
let has_problematic_whitespace_line = !value.is_empty()
&& value.lines().filter(|l| !l.is_empty()).any(|l| {
l.starts_with(' ') && l.chars().all(|c| c == ' ' || c == '\t')
});
if has_problematic_whitespace_line {
text(format!("\"{}\"", escape_double_quoted(value)))
} else {
repr_block_to_doc(value, *style, options.tab_width)
}
}
ScalarStyle::SingleQuoted | ScalarStyle::DoubleQuoted => {
if requires_double_quoting(value) {
text(format!("\"{}\"", escape_double_quoted(value)))
} else if needs_quoting(value, options.yaml_version) {
if matches!(style, ScalarStyle::DoubleQuoted) {
text(format!("\"{}\"", escape_double_quoted(value)))
} else {
text(format!("'{}'", value.replace('\'', "''")))
}
} else {
string_to_doc(value, options, in_key)
}
}
ScalarStyle::Plain => {
if requires_double_quoting(value) {
text(format!("\"{}\"", escape_double_quoted(value)))
} else if needs_quoting(value, options.yaml_version) {
text(value.clone())
} else {
string_to_doc(value, options, in_key)
}
}
};
let tag_present_on_empty = tag_prefix.is_some() && value.is_empty();
let doc = if let Some(ref prefix) = tag_prefix {
if value.is_empty() {
text(prefix.clone())
} else {
concat(vec![text(prefix.clone()), scalar_doc])
}
} else {
scalar_doc
};
if let Some(name) = anchor {
if value.is_empty() {
if tag_present_on_empty {
concat(vec![text(format!("&{name} ")), doc])
} else {
concat(vec![text(format!("&{name}")), doc])
}
} else {
concat(vec![text(format!("&{name} ")), doc])
}
} else {
doc
}
}
Node::Mapping {
entries,
style,
anchor,
tag,
..
} => {
let doc = mapping_to_doc(entries, *style, options);
let effective_style = if options.format_enforce_block_style {
CollectionStyle::Block
} else {
*style
};
prepend_collection_properties(doc, anchor.as_deref(), tag.as_deref(), effective_style)
}
Node::Sequence {
items,
style,
anchor,
tag,
..
} => {
let doc = sequence_to_doc(items, *style, options);
let effective_style = if options.format_enforce_block_style {
CollectionStyle::Block
} else {
*style
};
prepend_collection_properties(doc, anchor.as_deref(), tag.as_deref(), effective_style)
}
Node::Alias { name, .. } => text(format!("*{name}")),
}
}
fn is_core_schema_tag(tag: &str) -> bool {
tag.starts_with("tag:yaml.org,2002:")
}
fn format_tag(tag: &str) -> String {
if tag.starts_with('!') {
tag.to_string()
} else {
format!("!<{tag}>")
}
}
fn prepend_collection_properties(
doc: Doc,
anchor: Option<&str>,
tag: Option<&str>,
style: CollectionStyle,
) -> Doc {
let tag_prefix = tag.and_then(|t| {
if is_core_schema_tag(t) {
None
} else {
Some(format_tag(t))
}
});
let props = match (anchor, tag_prefix.as_deref()) {
(Some(name), Some(t)) => Some(format!("&{name} {t}")),
(Some(name), None) => Some(format!("&{name}")),
(None, Some(t)) => Some(t.to_string()),
(None, None) => None,
};
let Some(props_str) = props else {
return doc;
};
match style {
CollectionStyle::Block => {
concat(vec![text(props_str), hard_line(), doc])
}
CollectionStyle::Flow => {
concat(vec![text(format!("{props_str} ")), doc])
}
}
}
fn string_to_doc(s: &str, options: &YamlFormatOptions, in_key: bool) -> Doc {
if needs_quoting(s, options.yaml_version) {
if options.single_quote && !s.contains('\'') {
text(format!("'{s}'"))
} else {
text(format!("\"{}\"", escape_double_quoted(s)))
}
} else if options.single_quote && !in_key {
text(format!("'{s}'"))
} else {
text(s.to_string())
}
}
fn needs_quoting(s: &str, version: YamlVersion) -> bool {
if s.is_empty() {
return true;
}
if s.chars().all(char::is_whitespace) {
return true;
}
if s.starts_with(char::is_whitespace) || s.ends_with(char::is_whitespace) {
return true;
}
let always_reserved = matches!(
s,
"null" | "~" | "true" | "false" | "Null" | "NULL" | "True" | "TRUE" | "False" | "FALSE"
);
let v1_1_reserved = version == YamlVersion::V1_1
&& matches!(
s,
"yes" | "no" | "on" | "off" | "Yes" | "No" | "On" | "Off" | "YES" | "NO" | "ON" | "OFF"
);
if s.contains('\n') {
return true;
}
always_reserved
|| v1_1_reserved
|| looks_like_number(s)
|| s.starts_with(|c: char| {
matches!(
c,
':' | '#'
| '&'
| '*'
| '?'
| '|'
| '-'
| '<'
| '>'
| '='
| '!'
| '%'
| '@'
| '`'
| '{'
| '}'
| '['
| ']'
| '"'
| '\''
)
})
|| s.contains(": ")
|| s.contains(" #")
|| s.starts_with("- ")
|| s.starts_with("--- ")
|| s == "---"
|| s == "..."
}
fn looks_like_number(s: &str) -> bool {
s.parse::<i64>().is_ok()
|| s.parse::<f64>().is_ok()
|| matches!(
s,
".inf" | ".Inf" | ".INF" | "+.inf" | "-.inf" | ".nan" | ".NaN" | ".NAN"
)
}
fn requires_double_quoting(s: &str) -> bool {
s.chars().any(|c| {
matches!(c, '\\')
|| (c as u32) <= 0x1F
|| c == '\u{0085}' || c == '\u{2028}' || c == '\u{2029}' })
}
fn escape_double_quoted(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for ch in s.chars() {
match ch {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
'\x00' => out.push_str("\\0"),
'\x07' => out.push_str("\\a"),
'\x08' => out.push_str("\\b"),
'\t' => out.push_str("\\t"),
'\n' => out.push_str("\\n"),
'\x0B' => out.push_str("\\v"),
'\x0C' => out.push_str("\\f"),
'\r' => out.push_str("\\r"),
'\x1B' => out.push_str("\\e"),
'\u{0085}' => out.push_str("\\N"),
'\u{00A0}' => out.push_str("\\_"),
'\u{2028}' => out.push_str("\\L"),
'\u{2029}' => out.push_str("\\P"),
c if (c as u32) <= 0x1F => {
let _ = write!(out, "\\x{:02X}", c as u32);
}
c => out.push(c),
}
}
out
}
fn repr_block_to_doc(s: &str, style: ScalarStyle, tab_width: usize) -> Doc {
let needs_indent_indicator = s
.lines()
.find(|l| !l.is_empty())
.is_some_and(|l| l.starts_with(' ') || l.chars().all(char::is_whitespace));
let base_header = match style {
ScalarStyle::Literal(Chomp::Clip) => "|",
ScalarStyle::Literal(Chomp::Strip) => "|-",
ScalarStyle::Literal(Chomp::Keep) => "|+",
ScalarStyle::Folded(Chomp::Clip) => ">",
ScalarStyle::Folded(Chomp::Strip) => ">-",
ScalarStyle::Folded(Chomp::Keep) => ">+",
ScalarStyle::Plain | ScalarStyle::SingleQuoted | ScalarStyle::DoubleQuoted => "",
};
let header = if needs_indent_indicator && !base_header.is_empty() {
let (block_char, chomp_char) = base_header.split_at(1);
format!("{block_char}{tab_width}{chomp_char}")
} else {
base_header.to_string()
};
let mut parts = vec![text(header)];
if matches!(style, ScalarStyle::Folded(_)) {
let mut segments: Vec<&str> = s.split('\n').collect();
if segments.last() == Some(&"") {
segments.pop();
}
let mut pending_empty: usize = 0;
let mut prev_content: Option<&str> = None;
for seg in &segments {
if seg.is_empty() {
pending_empty += 1;
} else {
if let Some(prev) = prev_content {
let prev_more = prev.starts_with([' ', '\t']);
let curr_more = seg.starts_with([' ', '\t']);
let either_more = prev_more || curr_more;
let blank_count = if either_more {
pending_empty
} else {
pending_empty + 1
};
for _ in 0..blank_count {
parts.push(hard_line());
}
}
pending_empty = 0;
parts.push(indent(concat(vec![hard_line(), text(seg.to_string())])));
prev_content = Some(seg);
}
}
} else {
for line_str in s.lines() {
if !line_str.is_empty() {
parts.push(indent(concat(vec![
hard_line(),
text(line_str.to_string()),
])));
}
}
}
concat(parts)
}
fn mapping_to_doc(
entries: &[(Node<Span>, Node<Span>)],
style: CollectionStyle,
options: &YamlFormatOptions,
) -> Doc {
if entries.is_empty() {
return text("{}");
}
let effective_style = if options.format_enforce_block_style {
CollectionStyle::Block
} else {
style
};
match effective_style {
CollectionStyle::Flow => flow_mapping_to_doc(entries, options),
CollectionStyle::Block => {
let pairs: Vec<Doc> = entries
.iter()
.map(|(key, value)| key_value_to_doc(key, value, options))
.collect();
join(&hard_line(), pairs)
}
}
}
fn flow_mapping_to_doc(entries: &[(Node<Span>, Node<Span>)], options: &YamlFormatOptions) -> Doc {
let (open, close) = if options.bracket_spacing {
("{ ", " }")
} else {
("{", "}")
};
let items: Vec<Doc> = entries
.iter()
.map(|(key, value)| {
let key_doc = node_to_doc(key, options, true);
let val_doc = node_to_doc(value, options, false);
let sep = if key_needs_space_before_colon(key) {
text(" : ")
} else {
text(": ")
};
concat(vec![key_doc, sep, val_doc])
})
.collect();
let sep = concat(vec![text(","), line()]);
let inner = join(&sep, items);
group(concat(vec![
text(open),
indent(concat(vec![flat_alt(text(""), line()), inner])),
flat_alt(text(""), line()),
text(close),
]))
}
const fn needs_explicit_key(key: &Node<Span>) -> bool {
match key {
Node::Mapping { entries, .. } if entries.is_empty() => false,
Node::Sequence { items, .. } if items.is_empty() => false,
Node::Scalar {
style: ScalarStyle::Plain | ScalarStyle::SingleQuoted | ScalarStyle::DoubleQuoted,
..
}
| Node::Alias { .. } => false,
Node::Mapping { .. }
| Node::Sequence { .. }
| Node::Scalar {
style: ScalarStyle::Literal(_) | ScalarStyle::Folded(_),
..
} => true,
}
}
const fn is_empty_key(key: &Node<Span>) -> bool {
matches!(key, Node::Scalar { value, tag: None, .. } if value.is_empty())
}
const fn key_needs_space_before_colon(key: &Node<Span>) -> bool {
matches!(key, Node::Scalar { value, tag: Some(_), .. } if value.is_empty())
|| matches!(key, Node::Alias { .. })
}
fn explicit_key_to_doc(key: &Node<Span>, value: &Node<Span>, options: &YamlFormatOptions) -> Doc {
let key_doc = node_to_doc(key, options, true);
let value_is_empty = matches!(value, Node::Scalar { value, .. } if value.is_empty());
let question_line = concat(vec![text("? "), indent(key_doc)]);
let colon_line = if value_is_empty {
text(":")
} else {
let effective_style = |style: CollectionStyle| {
if options.format_enforce_block_style {
CollectionStyle::Block
} else {
style
}
};
match value {
Node::Mapping {
entries,
style,
anchor,
tag,
..
} if !entries.is_empty() && effective_style(*style) == CollectionStyle::Block => {
let user_tag = tag.as_ref().filter(|t| !is_core_schema_tag(t));
let colon_prefix = match (anchor.as_ref(), user_tag) {
(Some(name), Some(t)) => format!(": &{name} {}", format_tag(t)),
(Some(name), None) => format!(": &{name}"),
(None, Some(t)) => format!(": {}", format_tag(t)),
(None, None) => ":".to_string(),
};
concat(vec![
text(colon_prefix),
indent(concat(vec![
hard_line(),
mapping_to_doc(entries, *style, options),
])),
])
}
Node::Sequence {
items,
style,
anchor,
tag,
..
} if !items.is_empty() && effective_style(*style) == CollectionStyle::Block => {
let user_tag = tag.as_ref().filter(|t| !is_core_schema_tag(t));
let colon_prefix = match (anchor.as_ref(), user_tag) {
(Some(name), Some(t)) => format!(": &{name} {}", format_tag(t)),
(Some(name), None) => format!(": &{name}"),
(None, Some(t)) => format!(": {}", format_tag(t)),
(None, None) => ":".to_string(),
};
concat(vec![
text(colon_prefix),
indent(concat(vec![
hard_line(),
sequence_to_doc(items, *style, options),
])),
])
}
Node::Scalar { .. }
| Node::Mapping { .. }
| Node::Sequence { .. }
| Node::Alias { .. } => {
let value_doc = node_to_doc(value, options, false);
concat(vec![text(": "), value_doc])
}
}
};
let colon_line = if let Some(tc) = value.trailing_comment() {
concat(vec![colon_line, text(format!(" {tc}"))])
} else {
colon_line
};
concat(vec![question_line, hard_line(), colon_line])
}
#[expect(
clippy::too_many_lines,
reason = "comprehensive match over all value variants"
)]
fn key_value_to_doc(key: &Node<Span>, value: &Node<Span>, options: &YamlFormatOptions) -> Doc {
let effective_style = |style: CollectionStyle| {
if options.format_enforce_block_style {
CollectionStyle::Block
} else {
style
}
};
let pair_doc = if needs_explicit_key(key) {
explicit_key_to_doc(key, value, options)
} else if is_empty_key(key) {
let value_doc = node_to_doc(value, options, false);
if matches!(value, Node::Scalar { value, .. } if value.is_empty()) {
text(":")
} else {
concat(vec![text(": "), value_doc])
}
} else {
let key_doc = node_to_doc(key, options, true);
match value {
Node::Mapping {
entries,
style,
anchor,
tag,
..
} if !entries.is_empty() && effective_style(*style) == CollectionStyle::Block => {
let user_tag = tag.as_ref().filter(|t| !is_core_schema_tag(t));
let bare_colon = if key_needs_space_before_colon(key) {
" :"
} else {
":"
};
let colon = match (anchor.as_ref(), user_tag) {
(Some(name), Some(t)) => text(format!(": &{name} {}", format_tag(t))),
(Some(name), None) => text(format!(": &{name}")),
(None, Some(t)) => text(format!(": {}", format_tag(t))),
(None, None) => text(bare_colon),
};
concat(vec![
key_doc,
colon,
indent(concat(vec![
hard_line(),
mapping_to_doc(entries, *style, options),
])),
])
}
Node::Sequence {
items,
style,
anchor,
tag,
..
} if !items.is_empty() && effective_style(*style) == CollectionStyle::Block => {
let user_tag = tag.as_ref().filter(|t| !is_core_schema_tag(t));
let bare_colon = if key_needs_space_before_colon(key) {
" :"
} else {
":"
};
let colon = match (anchor.as_ref(), user_tag) {
(Some(name), Some(t)) => text(format!(": &{name} {}", format_tag(t))),
(Some(name), None) => text(format!(": &{name}")),
(None, Some(t)) => text(format!(": {}", format_tag(t))),
(None, None) => text(bare_colon),
};
concat(vec![
key_doc,
colon,
indent(concat(vec![
hard_line(),
sequence_to_doc(items, *style, options),
])),
])
}
Node::Scalar { .. }
| Node::Mapping { .. }
| Node::Sequence { .. }
| Node::Alias { .. } => {
let value_doc = node_to_doc(value, options, false);
let sep = if key_needs_space_before_colon(key) {
text(" : ")
} else {
text(": ")
};
concat(vec![key_doc, sep, value_doc])
}
}
};
let pair_doc = if !needs_explicit_key(key) && !is_empty_key(key) {
if let Some(tc) = value.trailing_comment() {
concat(vec![pair_doc, text(format!(" {tc}"))])
} else {
pair_doc
}
} else {
pair_doc
};
let leading = key.leading_comments();
if leading.is_empty() {
pair_doc
} else {
let mut parts: Vec<Doc> = Vec::new();
for lc in leading {
parts.push(text(lc.clone()));
parts.push(hard_line());
}
parts.push(pair_doc);
concat(parts)
}
}
fn sequence_to_doc(seq: &[Node<Span>], style: CollectionStyle, options: &YamlFormatOptions) -> Doc {
if seq.is_empty() {
return text("[]");
}
let effective_style = if options.format_enforce_block_style {
CollectionStyle::Block
} else {
style
};
match effective_style {
CollectionStyle::Flow => flow_sequence_to_doc(seq, options),
CollectionStyle::Block => {
let items: Vec<Doc> = seq
.iter()
.map(|item| sequence_item_to_doc(item, options))
.collect();
join(&hard_line(), items)
}
}
}
fn flow_sequence_to_doc(seq: &[Node<Span>], options: &YamlFormatOptions) -> Doc {
let items: Vec<Doc> = seq
.iter()
.map(|item| node_to_doc(item, options, false))
.collect();
let sep = concat(vec![text(","), line()]);
let inner = join(&sep, items);
group(concat(vec![
text("["),
indent(concat(vec![flat_alt(text(""), line()), inner])),
flat_alt(text(""), line()),
text("]"),
]))
}
fn sequence_item_to_doc(item: &Node<Span>, options: &YamlFormatOptions) -> Doc {
let effective_style = |style: CollectionStyle| {
if options.format_enforce_block_style {
CollectionStyle::Block
} else {
style
}
};
let item_doc = match item {
Node::Mapping {
entries,
style,
anchor,
tag,
..
} if !entries.is_empty() && effective_style(*style) == CollectionStyle::Block => {
let pairs: Vec<Doc> = entries
.iter()
.map(|(k, v)| key_value_to_doc(k, v, options))
.collect();
let inner = join(&hard_line(), pairs);
let user_tag = tag.as_ref().filter(|t| !is_core_schema_tag(t));
let prefix = match (anchor.as_ref(), user_tag) {
(Some(name), Some(t)) => format!("&{name} {}", format_tag(t)),
(Some(name), None) => format!("&{name}"),
(None, Some(t)) => format_tag(t),
(None, None) => String::new(),
};
if prefix.is_empty() {
concat(vec![text("- "), indent(inner)])
} else {
concat(vec![
text("- "),
text(prefix),
indent(concat(vec![hard_line(), inner])),
])
}
}
Node::Sequence {
items,
style,
anchor,
tag,
..
} if !items.is_empty() && effective_style(*style) == CollectionStyle::Block => {
let user_tag = tag.as_ref().filter(|t| !is_core_schema_tag(t));
let prefix_doc = match (anchor.as_ref(), user_tag) {
(Some(name), Some(t)) => text(format!("&{name} {}", format_tag(t))),
(Some(name), None) => text(format!("&{name}")),
(None, Some(t)) => text(format_tag(t)),
(None, None) => text(String::new()),
};
concat(vec![
text("- "),
prefix_doc,
indent(concat(vec![
hard_line(),
sequence_to_doc(items, *style, options),
])),
])
}
Node::Scalar { .. } | Node::Mapping { .. } | Node::Sequence { .. } | Node::Alias { .. } => {
concat(vec![text("- "), node_to_doc(item, options, false)])
}
};
let item_doc = if let Some(tc) = item.trailing_comment() {
concat(vec![item_doc, text(format!(" {tc}"))])
} else {
item_doc
};
let leading = item.leading_comments();
if leading.is_empty() {
item_doc
} else {
let mut parts: Vec<Doc> = Vec::new();
for lc in leading {
parts.push(text(lc.clone()));
parts.push(hard_line());
}
parts.push(item_doc);
concat(parts)
}
}
fn dedup_key_str(key: &Node<Span>) -> Option<String> {
match key {
Node::Scalar { value, .. } => Some(value.clone()),
Node::Alias { name, .. } => Some(format!("*{name}")),
Node::Mapping { .. } | Node::Sequence { .. } => None,
}
}
fn dedup_mapping_keys(node: &mut Node<Span>) {
use std::collections::HashSet;
match node {
Node::Mapping { entries, .. } => {
let mut seen: HashSet<String> = HashSet::new();
let keep: Vec<bool> = entries
.iter()
.rev()
.map(|(key, _)| {
dedup_key_str(key).is_none_or(|k| seen.insert(k))
})
.collect::<Vec<_>>()
.into_iter()
.rev()
.collect();
let old = std::mem::take(entries);
*entries = old
.into_iter()
.zip(keep)
.filter_map(|(entry, k)| if k { Some(entry) } else { None })
.collect();
for (_, value) in entries.iter_mut() {
dedup_mapping_keys(value);
}
}
Node::Sequence { items, .. } => {
for item in items.iter_mut() {
dedup_mapping_keys(item);
}
}
Node::Scalar { .. } | Node::Alias { .. } => {}
}
}
#[cfg(test)]
mod tests {
use rstest::rstest;
use super::*;
fn default_opts() -> YamlFormatOptions {
YamlFormatOptions::default()
}
#[rstest]
#[case::boolean_values("enabled: true\ndisabled: false\n", &["true", "false"] as &[&str])]
#[case::numeric_values("port: 8080\nratio: 0.5\n", &["8080", "0.5"])]
#[case::mapping_block_style("a: 1\nb: 2\n", &["a: 1", "b: 2"])]
#[case::flow_sequence_items("items:\n - a\n - b\n - c\n", &["a", "b", "c"])]
#[case::multi_document(
"key1: value1\n---\nkey2: value2\n",
&["key1: value1", "---", "key2: value2"]
)]
#[case::float_special_values(
"nan_val: .nan\ninf_val: .inf\nneg_inf_val: -.inf\n",
&[".nan", ".inf", "-.inf"]
)]
#[case::tagged_node("tagged: !mytag some_value\n", &["!mytag", "some_value"])]
#[case::literal_block_scalar(
"body: |\n line one\n line two\n",
&["|", "line one", "line two"]
)]
#[case::folded_block_scalar("body: >\n folded line\n", &[">", "folded line"])]
#[case::single_quoted_scalar_content("key: 'quoted value'\n", &["quoted value", "key:"])]
#[case::double_quoted_scalar_content("key: \"quoted value\"\n", &["quoted value", "key:"])]
fn format_yaml_multi_contains(#[case] input: &str, #[case] expected: &[&str]) {
let result = format_yaml(input, &default_opts());
for &s in expected {
assert!(result.contains(s), "{s:?} missing: {result:?}");
}
}
#[rstest]
#[case::newline_escaped("a\nb", "a\\nb")]
#[case::carriage_return_escaped("a\rb", "a\\rb")]
#[case::tab_escaped("a\tb", "a\\tb")]
#[case::double_quote_escaped("say \"hi\"", "say \\\"hi\\\"")]
#[case::backslash_escaped("a\\b", "a\\\\b")]
fn escape_double_quoted_escapes(#[case] input: &str, #[case] expected: &str) {
assert_eq!(escape_double_quoted(input), expected);
}
#[rstest]
#[case::on_v1_1("on", YamlVersion::V1_1)]
#[case::yes_v1_1("yes", YamlVersion::V1_1)]
#[case::off_v1_1("off", YamlVersion::V1_1)]
#[case::no_v1_1("no", YamlVersion::V1_1)]
#[case::true_v1_1("true", YamlVersion::V1_1)]
#[case::true_v1_2("true", YamlVersion::V1_2)]
#[case::null_v1_1("null", YamlVersion::V1_1)]
#[case::null_v1_2("null", YamlVersion::V1_2)]
#[case::uppercase_yes_v1_1("YES", YamlVersion::V1_1)]
#[case::empty_string_v1_1("", YamlVersion::V1_1)]
#[case::empty_string_v1_2("", YamlVersion::V1_2)]
#[case::numeric_123_v1_1("123", YamlVersion::V1_1)]
#[case::numeric_123_v1_2("123", YamlVersion::V1_2)]
#[case::numeric_3_14_v1_2("3.14", YamlVersion::V1_2)]
fn needs_quoting_returns_true(#[case] word: &str, #[case] version: YamlVersion) {
assert!(
needs_quoting(word, version),
"{word:?} should require quoting in {version:?}"
);
}
#[rstest]
#[case::on_v1_2("on", YamlVersion::V1_2)]
#[case::yes_v1_2("yes", YamlVersion::V1_2)]
#[case::off_v1_2("off", YamlVersion::V1_2)]
#[case::no_v1_2("no", YamlVersion::V1_2)]
#[case::uppercase_yes_v1_2("YES", YamlVersion::V1_2)]
fn needs_quoting_returns_false(#[case] word: &str, #[case] version: YamlVersion) {
assert!(
!needs_quoting(word, version),
"{word:?} should not require quoting in {version:?}"
);
}
#[test]
fn format_enforce_block_style_defaults_to_false() {
assert!(!YamlFormatOptions::default().format_enforce_block_style);
}
#[test]
fn format_remove_duplicate_keys_defaults_to_false() {
assert!(!YamlFormatOptions::default().format_remove_duplicate_keys);
}
#[test]
fn dedup_disabled_does_not_remove_duplicate_keys() {
let input = "key: 1\nkey: 2\n";
let result = format_yaml(input, &default_opts());
let count = result.matches("key:").count();
assert!(
count >= 2,
"both keys should remain when dedup disabled: {result:?}"
);
}
fn dedup_opts() -> YamlFormatOptions {
YamlFormatOptions {
format_remove_duplicate_keys: true,
..default_opts()
}
}
#[test]
fn dedup_single_duplicate_keeps_last() {
let result = format_yaml("key: 1\nkey: 2\n", &dedup_opts());
assert!(
result.contains("key: 2"),
"last occurrence missing: {result:?}"
);
assert!(
!result.contains("key: 1"),
"first occurrence should be removed: {result:?}"
);
}
#[test]
fn dedup_three_occurrences_keeps_only_last() {
let result = format_yaml("key: a\nkey: b\nkey: c\n", &dedup_opts());
assert!(
result.contains("key: c"),
"last occurrence missing: {result:?}"
);
assert!(
!result.contains("key: a"),
"first occurrence should be removed: {result:?}"
);
assert!(
!result.contains("key: b"),
"middle occurrence should be removed: {result:?}"
);
}
#[test]
fn dedup_unique_keys_unchanged() {
let result = format_yaml("a: 1\nb: 2\n", &dedup_opts());
assert!(result.contains("a: 1"), "a:1 missing: {result:?}");
assert!(result.contains("b: 2"), "b:2 missing: {result:?}");
}
#[test]
fn dedup_mixed_unique_and_duplicate() {
let result = format_yaml("a: 1\nb: 2\na: 3\n", &dedup_opts());
assert!(
result.contains("a: 3"),
"last a: should be present: {result:?}"
);
assert!(
result.contains("b: 2"),
"unique b: should be present: {result:?}"
);
assert!(
!result.contains("a: 1"),
"first a: should be removed: {result:?}"
);
}
#[test]
fn dedup_empty_mapping_unchanged() {
let result = format_yaml("map: {}\n", &dedup_opts());
assert!(
result.contains("{}"),
"empty mapping should be preserved: {result:?}"
);
}
#[test]
fn dedup_single_entry_mapping_unchanged() {
let result = format_yaml("key: value\n", &dedup_opts());
assert!(
result.contains("key: value"),
"single entry should be preserved: {result:?}"
);
}
#[test]
fn dedup_alias_key_duplicate_keeps_last() {
let input = "? *ref\n: value1\n? *ref\n: value2\n";
let result = format_yaml(input, &dedup_opts());
assert!(
result.contains("value2"),
"last alias-keyed value missing: {result:?}"
);
assert!(
!result.contains("value1"),
"first alias-keyed value should be removed: {result:?}"
);
}
#[test]
fn dedup_complex_mapping_key_no_panic() {
let input = "? {a: 1}\n: value\n";
let result = format_yaml(input, &dedup_opts());
let _ = result;
}
#[test]
fn dedup_complex_sequence_key_no_panic() {
let input = "? [1, 2]\n: value\n";
let result = format_yaml(input, &dedup_opts());
let _ = result;
}
#[test]
fn dedup_case_sensitive_keys_both_kept() {
let result = format_yaml("Key: 1\nkey: 2\n", &dedup_opts());
assert!(result.contains("Key: 1"), "Key:1 missing: {result:?}");
assert!(result.contains("key: 2"), "key:2 missing: {result:?}");
}
#[test]
fn dedup_nested_mapping_removes_inner_duplicates() {
let input = "outer:\n inner: 1\n inner: 2\n";
let result = format_yaml(input, &dedup_opts());
assert!(result.contains("outer:"), "outer key missing: {result:?}");
assert!(
result.contains("inner: 2"),
"last inner should be kept: {result:?}"
);
assert!(
!result.contains("inner: 1"),
"first inner should be removed: {result:?}"
);
}
#[test]
fn dedup_recurses_into_sequence_items() {
let input = "items:\n - key: 1\n key: 2\n - key: 3\n key: 4\n";
let result = format_yaml(input, &dedup_opts());
assert!(
result.contains("key: 2"),
"last key in first item missing: {result:?}"
);
assert!(
result.contains("key: 4"),
"last key in second item missing: {result:?}"
);
assert!(
!result.contains("key: 1"),
"first key in first item should be removed: {result:?}"
);
assert!(
!result.contains("key: 3"),
"first key in second item should be removed: {result:?}"
);
}
#[test]
fn dedup_deeply_nested_removes_innermost_duplicates() {
let input = "a:\n b:\n c: 1\n c: 2\n";
let result = format_yaml(input, &dedup_opts());
assert!(result.contains("a:"), "a: missing: {result:?}");
assert!(result.contains("b:"), "b: missing: {result:?}");
assert!(
result.contains("c: 2"),
"last c: should be kept: {result:?}"
);
assert!(
!result.contains("c: 1"),
"first c: should be removed: {result:?}"
);
}
#[test]
fn dedup_flow_mapping_removes_duplicate() {
let result = format_yaml("{key: 1, key: 2}\n", &dedup_opts());
assert!(
result.contains("key: 2"),
"last occurrence missing: {result:?}"
);
assert!(
!result.contains("key: 1"),
"first occurrence should be removed: {result:?}"
);
}
#[test]
fn dedup_removed_entry_with_trailing_comment_no_crash() {
let input = "key: 1 # this gets removed\nkey: 2\n";
let result = format_yaml(input, &dedup_opts());
assert!(
result.contains("key: 2"),
"last occurrence missing: {result:?}"
);
assert!(
!result.contains("key: 1"),
"first occurrence should be removed: {result:?}"
);
}
#[test]
fn dedup_surviving_entry_leading_comment_preserved() {
let input = "key: 1\n# keep this\nkey: 2\n";
let result = format_yaml(input, &dedup_opts());
assert!(
result.contains("key: 2"),
"last occurrence missing: {result:?}"
);
assert!(
result.contains("# keep this"),
"leading comment should be preserved: {result:?}"
);
}
#[test]
fn dedup_multi_document_per_document() {
let input = "key: 1\nkey: 2\n---\nkey: 3\nkey: 4\n";
let result = format_yaml(input, &dedup_opts());
assert!(
result.contains("key: 2"),
"last key in doc1 missing: {result:?}"
);
assert!(
result.contains("key: 4"),
"last key in doc2 missing: {result:?}"
);
assert!(
result.contains("---"),
"document separator missing: {result:?}"
);
assert!(
!result.contains("key: 1"),
"first key in doc1 should be removed: {result:?}"
);
assert!(
!result.contains("key: 3"),
"first key in doc2 should be removed: {result:?}"
);
}
#[test]
fn dedup_idempotent() {
let input = "key: 1\nkey: 2\n";
let first = format_yaml(input, &dedup_opts());
let second = format_yaml(&first, &dedup_opts());
assert_eq!(first, second, "dedup not idempotent: {first:?}");
}
#[test]
fn anchor_scalar_preserved() {
let result = format_yaml("key: &anchor value\n", &default_opts());
assert_eq!(result, "key: &anchor value\n");
}
#[test]
fn anchor_block_mapping_preserved() {
let result = format_yaml("defaults: &defaults\n timeout: 30\n", &default_opts());
assert_eq!(result, "defaults: &defaults\n timeout: 30\n");
}
#[test]
fn anchor_block_sequence_preserved() {
let result = format_yaml("items: &mylist\n - a\n - b\n", &default_opts());
assert_eq!(result, "items: &mylist\n - a\n - b\n");
}
#[test]
fn anchor_flow_mapping_preserved() {
let result = format_yaml("key: &anchor {a: 1}\n", &default_opts());
assert!(result.contains("&anchor"), "anchor missing: {result:?}");
}
#[test]
fn anchor_flow_sequence_preserved() {
let result = format_yaml("key: &anchor [a, b]\n", &default_opts());
assert_eq!(result, "key: &anchor [a, b]\n");
}
#[test]
fn anchor_sequence_item_block_mapping_preserved() {
let result = format_yaml("items:\n - &item\n key: val\n", &default_opts());
assert_eq!(result, "items:\n - &item\n key: val\n");
}
#[test]
fn alias_reference_preserved() {
let result = format_yaml(
"defaults: &defaults\n timeout: 30\nservice:\n <<: *defaults\n",
&default_opts(),
);
assert!(result.contains("&defaults"), "anchor missing: {result:?}");
assert!(result.contains("*defaults"), "alias missing: {result:?}");
}
#[test]
fn anchor_alias_idempotent() {
let input = "defaults: &defaults\n timeout: 30\nservice:\n <<: *defaults\n";
let first = format_yaml(input, &default_opts());
let second = format_yaml(&first, &default_opts());
assert_eq!(first, second, "anchor/alias not idempotent: {first:?}");
}
#[test]
fn anchor_on_top_level_scalar_preserved() {
let result = format_yaml("&doc hello\n", &default_opts());
assert_eq!(result, "&doc hello\n");
}
#[test]
fn anchor_and_alias_round_trip_sequence() {
let input = "base: &base\n - x\n - y\nextended:\n - *base\n";
let result = format_yaml(input, &default_opts());
assert!(result.contains("&base"), "anchor missing: {result:?}");
assert!(result.contains("- x"), "sequence item missing: {result:?}");
assert!(result.contains("*base"), "alias missing: {result:?}");
}
#[test]
fn anchor_before_tag_on_scalar() {
let result = format_yaml("item: &myanchor !mytag value\n", &default_opts());
assert!(result.contains("&myanchor"), "anchor missing: {result:?}");
assert!(result.contains("!mytag"), "tag missing: {result:?}");
assert!(result.contains("value"), "value missing: {result:?}");
let before_tag = result.split("!mytag").next().unwrap_or("");
assert!(
before_tag.contains("&myanchor"),
"anchor must precede tag per YAML spec §6.8.1: {result:?}"
);
}
#[test]
fn anchor_with_trailing_comment_preserved() {
let result = format_yaml("key: &anchor value # inline comment\n", &default_opts());
assert!(
result.contains("&anchor value"),
"anchor+value missing: {result:?}"
);
assert!(
result.contains("# inline comment"),
"comment missing: {result:?}"
);
}
#[test]
fn anchor_on_empty_flow_mapping_preserved() {
let result = format_yaml("empty: &empty {}\n", &default_opts());
assert_eq!(result, "empty: &empty {}\n");
}
#[test]
fn anchor_on_empty_flow_sequence_preserved() {
let result = format_yaml("empty: &empty []\n", &default_opts());
assert_eq!(result, "empty: &empty []\n");
}
#[test]
fn no_spurious_anchor_when_none() {
let result = format_yaml("key: value\n", &default_opts());
assert!(
!result.contains('&'),
"spurious anchor in output: {result:?}"
);
}
#[test]
fn bare_document_emits_no_markers() {
let result = format_yaml("key: value\n", &default_opts());
assert!(result.contains("key: value"), "content missing: {result:?}");
assert!(
!result.contains("---"),
"unexpected `---` in output: {result:?}"
);
assert!(
!result.contains("..."),
"unexpected `...` in output: {result:?}"
);
}
#[test]
fn explicit_start_marker_preserved() {
let result = format_yaml("---\nkey: value\n", &default_opts());
assert!(
result.contains("---"),
"`---` missing from output: {result:?}"
);
}
#[test]
fn explicit_end_marker_preserved() {
let result = format_yaml("key: value\n...\n", &default_opts());
assert!(
result.contains("..."),
"`...` missing from output: {result:?}"
);
}
#[test]
fn both_markers_preserved() {
let result = format_yaml("---\nkey: value\n...\n", &default_opts());
assert!(
result.contains("---"),
"`---` missing from output: {result:?}"
);
assert!(
result.contains("..."),
"`...` missing from output: {result:?}"
);
}
#[test]
fn multi_document_separator_always_emitted() {
let result = format_yaml("doc1: a\n---\ndoc2: b\n", &default_opts());
assert!(
result.contains("---"),
"`---` separator missing: {result:?}"
);
assert!(
result.contains("doc1: a"),
"doc1 content missing: {result:?}"
);
assert!(
result.contains("doc2: b"),
"doc2 content missing: {result:?}"
);
}
#[test]
fn explicit_end_only_on_first_document() {
let result = format_yaml("doc1: a\n...\n---\ndoc2: b\n", &default_opts());
assert!(
result.contains("---"),
"`---` separator missing: {result:?}"
);
assert!(
result.contains("..."),
"`...` missing from output: {result:?}"
);
assert!(
result.find("...") < result.find("doc2: b"),
"`...` should appear before doc2, got: {result:?}"
);
let after_doc2 = result.find("doc2: b").map_or("", |pos| &result[pos..]);
assert!(
!after_doc2.contains("..."),
"unexpected `...` after doc2: {result:?}"
);
}
#[test]
fn explicit_end_on_all_documents_preserved() {
let result = format_yaml("doc1: a\n...\n---\ndoc2: b\n...\n", &default_opts());
let count = result.matches("...").count();
assert_eq!(
count, 2,
"expected 2 `...` markers, got {count}: {result:?}"
);
}
}