use crate::Diagnostic;
use crate::Severity;
#[derive(Debug, Clone, PartialEq)]
pub enum PreItem {
Field { key: String, fill: bool },
Comment(String),
}
#[derive(Debug, Clone, Default)]
pub struct PreScan {
pub cleaned_yaml: String,
pub items: Vec<PreItem>,
pub warnings: Vec<Diagnostic>,
pub fill_target_errors: Vec<String>,
}
pub fn prescan_fence_content(content: &str) -> PreScan {
let mut out = PreScan::default();
let mut saw_nested_comment = false;
let lines: Vec<&str> = content.split('\n').collect();
let mut cleaned_lines: Vec<String> = Vec::with_capacity(lines.len());
for raw_line in &lines {
let line = *raw_line;
let trimmed = line.trim_start_matches([' ', '\t']);
let is_top_level = line.len() == trimmed.len();
if trimmed.starts_with('#') {
if is_top_level {
let without_hash = &trimmed[1..];
let text = without_hash.strip_prefix(' ').unwrap_or(without_hash);
out.items.push(PreItem::Comment(text.to_string()));
continue;
} else {
if !saw_nested_comment {
saw_nested_comment = true;
out.warnings.push(
Diagnostic::new(
Severity::Warning,
"YAML comments inside nested values are dropped during parse; only top-level frontmatter comments round-trip".to_string(),
)
.with_code("parse::comments_in_nested_yaml_dropped".to_string()),
);
}
continue;
}
}
if is_top_level {
if let Some((key, after_colon)) = split_key(line) {
let (value_part, trailing_comment) = split_trailing_comment(&after_colon);
let (fill, value_without_tag, had_non_fill_tag, fill_target_err) =
inspect_fill_and_tags(&value_part, &key);
if had_non_fill_tag {
out.warnings.push(
Diagnostic::new(
Severity::Warning,
format!(
"YAML tag on key `{}` is not supported; the tag has been dropped and the value kept",
key
),
)
.with_code("parse::unsupported_yaml_tag".to_string()),
);
}
if let Some(err) = fill_target_err {
out.fill_target_errors.push(err);
}
out.items.push(PreItem::Field {
key: key.clone(),
fill,
});
let cleaned = format!("{}:{}", key, value_without_tag);
cleaned_lines.push(cleaned);
if let Some(c) = trailing_comment {
let stripped = c.trim_start_matches('#');
let text = stripped.strip_prefix(' ').unwrap_or(stripped);
out.items.push(PreItem::Comment(text.to_string()));
}
continue;
}
}
cleaned_lines.push(line.to_string());
}
out.cleaned_yaml = cleaned_lines.join("\n");
out
}
fn split_key(line: &str) -> Option<(String, String)> {
let bytes = line.as_bytes();
if bytes.is_empty() {
return None;
}
if !(bytes[0].is_ascii_alphabetic() || bytes[0] == b'_') {
return None;
}
let mut i = 1;
while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
i += 1;
}
if i >= bytes.len() || bytes[i] != b':' {
return None;
}
let key = line[..i].to_string();
let rest = line[i + 1..].to_string();
Some((key, rest))
}
fn split_trailing_comment(value: &str) -> (String, Option<String>) {
let bytes = value.as_bytes();
let mut i = 0;
let mut prev_was_ws = true; let mut in_dq = false;
let mut in_sq = false;
while i < bytes.len() {
let b = bytes[i];
if in_dq {
if b == b'\\' && i + 1 < bytes.len() {
i += 2;
continue;
}
if b == b'"' {
in_dq = false;
}
} else if in_sq {
if b == b'\'' {
in_sq = false;
}
} else {
if b == b'"' {
in_dq = true;
} else if b == b'\'' {
in_sq = true;
} else if b == b'#' && prev_was_ws {
let v = value[..i].trim_end().to_string();
let c = value[i..].to_string();
return (v, Some(c));
}
}
prev_was_ws = matches!(b, b' ' | b'\t');
i += 1;
}
(value.to_string(), None)
}
fn inspect_fill_and_tags(value: &str, key: &str) -> (bool, String, bool, Option<String>) {
let trimmed = value.trim_start();
let leading_ws_len = value.len() - trimmed.len();
if trimmed.is_empty() {
return (false, value.to_string(), false, None);
}
if trimmed == "!fill" {
let reconstructed = value[..leading_ws_len].to_string();
return (true, reconstructed, false, None);
}
if let Some(rest) = trimmed.strip_prefix("!fill") {
if rest.starts_with(' ') || rest.starts_with('\t') || rest.is_empty() {
let rest_trim = rest.trim_start();
let err = if rest_trim.starts_with('{') {
Some(format!(
"`!fill` on key `{}` targets a mapping; `!fill` is supported on scalars and sequences only",
key
))
} else {
None
};
let reconstructed = if rest_trim.is_empty() {
value[..leading_ws_len].to_string()
} else {
format!(" {}", rest_trim)
};
return (true, reconstructed, false, err);
}
}
if trimmed.starts_with('!') {
return (false, value.to_string(), true, None);
}
(false, value.to_string(), false, None)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extracts_own_line_comments() {
let input = "# top\ntitle: foo\n# mid\nauthor: bar\n";
let out = prescan_fence_content(input);
assert_eq!(
out.items,
vec![
PreItem::Comment("top".to_string()),
PreItem::Field {
key: "title".to_string(),
fill: false,
},
PreItem::Comment("mid".to_string()),
PreItem::Field {
key: "author".to_string(),
fill: false,
},
]
);
}
#[test]
fn splits_trailing_comments() {
let input = "title: foo # inline\n";
let out = prescan_fence_content(input);
assert_eq!(
out.items,
vec![
PreItem::Field {
key: "title".to_string(),
fill: false,
},
PreItem::Comment("inline".to_string()),
]
);
assert!(out.cleaned_yaml.contains("title: foo"));
assert!(!out.cleaned_yaml.contains("inline"));
}
#[test]
fn detects_fill_on_scalar() {
let input = "dept: !fill Department\n";
let out = prescan_fence_content(input);
assert_eq!(
out.items,
vec![PreItem::Field {
key: "dept".to_string(),
fill: true,
}]
);
assert!(out.cleaned_yaml.contains("dept: Department"));
assert!(!out.cleaned_yaml.contains("!fill"));
}
#[test]
fn detects_bare_fill() {
let input = "dept: !fill\n";
let out = prescan_fence_content(input);
assert_eq!(
out.items,
vec![PreItem::Field {
key: "dept".to_string(),
fill: true,
}]
);
assert!(!out.cleaned_yaml.contains("!fill"));
}
#[test]
fn unknown_tag_warns() {
let input = "x: !custom value\n";
let out = prescan_fence_content(input);
assert!(
out.warnings
.iter()
.any(|w| w.code.as_deref() == Some("parse::unsupported_yaml_tag")),
"expected unsupported_yaml_tag warning"
);
}
#[test]
fn nested_comment_warns_once() {
let input = "arr:\n - a # inline\n # own-line\n - b\n";
let out = prescan_fence_content(input);
let nested = out
.warnings
.iter()
.filter(|w| w.code.as_deref() == Some("parse::comments_in_nested_yaml_dropped"))
.count();
assert_eq!(nested, 1, "expected exactly one nested-comment warning");
}
#[test]
fn fill_on_flow_sequence_allowed() {
let input = "x: !fill [1, 2]\n";
let out = prescan_fence_content(input);
assert!(
out.fill_target_errors.is_empty(),
"expected no error; !fill on sequences is supported"
);
assert_eq!(
out.items,
vec![PreItem::Field {
key: "x".to_string(),
fill: true,
}]
);
}
#[test]
fn fill_on_flow_mapping_errors() {
let input = "x: !fill {a: 1}\n";
let out = prescan_fence_content(input);
assert!(
!out.fill_target_errors.is_empty(),
"expected error; !fill on mappings is rejected"
);
}
}