use std::borrow::Cow;
use crate::diagnostics::{CoreError, LintResult, ValidationError};
use serde::de::DeserializeOwned;
#[inline]
pub fn normalize_line_endings(s: &str) -> Cow<'_, str> {
if !s.contains('\r') {
return Cow::Borrowed(s);
}
let mut out = String::with_capacity(s.len());
let mut chars = s.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '\r' {
chars.next_if_eq(&'\n');
out.push('\n');
} else {
out.push(ch);
}
}
Cow::Owned(out)
}
pub const MAX_YAML_DEPTH: usize = 32;
pub(crate) fn check_yaml_depth(yaml: &str) -> LintResult<()> {
let mut flow_depth: usize = 0;
let mut max_flow: usize = 0;
let mut max_dash: usize = 0;
let mut max_indent_units: usize = 0;
let mut in_single: bool = false;
let mut in_double: bool = false;
for line in yaml.lines() {
if in_single || in_double {
let bytes = line.as_bytes();
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
if in_single {
if b == b'\'' {
if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
i += 2;
continue;
}
in_single = false;
}
} else if in_double {
if b == b'\\' {
i += 2;
continue;
}
if b == b'"' {
in_double = false;
}
}
i += 1;
}
continue;
}
let leading_ws = line
.bytes()
.take_while(|b| *b == b' ' || *b == b'\t')
.count();
let trimmed = &line[leading_ws..];
if trimmed.is_empty() || trimmed.starts_with('#') {
continue;
}
let spaces = line.bytes().take_while(|b| *b == b' ').count();
let tabs = line[spaces..].bytes().take_while(|b| *b == b'\t').count();
let indent_units = spaces + tabs;
if indent_units > max_indent_units {
max_indent_units = indent_units;
}
let mut rest = trimmed;
let mut dashes: usize = 0;
while let Some(after) = rest.strip_prefix("- ") {
dashes += 1;
rest = after;
}
if dashes > max_dash {
max_dash = dashes;
}
let bytes = rest.as_bytes();
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
if in_single {
if b == b'\'' {
if i + 1 < bytes.len() && bytes[i + 1] == b'\'' {
i += 2;
continue;
}
in_single = false;
}
} else if in_double {
if b == b'\\' {
i += 2;
continue;
}
if b == b'"' {
in_double = false;
}
} else {
match b {
b'\'' => in_single = true,
b'"' => in_double = true,
b'#' => break, b'[' | b'{' => {
flow_depth += 1;
if flow_depth > max_flow {
max_flow = flow_depth;
}
}
b']' | b'}' => {
flow_depth = flow_depth.saturating_sub(1);
}
_ => {}
}
}
i += 1;
}
}
let observed = max_flow.max(max_dash).max(max_indent_units);
if observed > MAX_YAML_DEPTH {
return Err(CoreError::Validation(ValidationError::Other(
anyhow::anyhow!(
"YAML frontmatter nesting depth {} exceeds maximum {} (possible YAML bomb)",
observed,
MAX_YAML_DEPTH
),
)));
}
Ok(())
}
#[allow(dead_code)] pub fn parse_frontmatter<T: DeserializeOwned>(content: &str) -> LintResult<(T, String)> {
let parts = split_frontmatter(content);
check_yaml_depth(&parts.frontmatter)?;
let parsed: T = serde_yaml::from_str(&parts.frontmatter)
.map_err(|e| CoreError::Validation(ValidationError::Other(e.into())))?;
Ok((parsed, parts.body.trim_start().to_string()))
}
#[derive(Debug, Clone)]
pub struct FrontmatterParts {
pub has_frontmatter: bool,
pub has_closing: bool,
pub frontmatter: String,
pub body: String,
pub frontmatter_start: usize,
pub body_start: usize,
}
pub fn split_frontmatter(content: &str) -> FrontmatterParts {
let trimmed = content.trim_start();
let trim_offset = content.len() - trimmed.len();
if !trimmed.starts_with("---") {
return FrontmatterParts {
has_frontmatter: false,
has_closing: false,
frontmatter: String::new(),
body: trimmed.to_string(),
frontmatter_start: trim_offset,
body_start: trim_offset,
};
}
let rest = &trimmed[3..];
let newline_len = if rest.starts_with("\r\n") {
2
} else if rest.starts_with('\n') {
1
} else {
0
};
let frontmatter_start = trim_offset + 3 + newline_len;
if let Some(end_pos) = rest.find("\n---") {
let frontmatter = rest.get(newline_len..end_pos).unwrap_or("");
let body = &rest[end_pos + 4..]; FrontmatterParts {
has_frontmatter: true,
has_closing: true,
frontmatter: frontmatter.to_string(),
body: body.to_string(),
frontmatter_start,
body_start: trim_offset + 3 + end_pos + 4,
}
} else {
let body = &rest[newline_len..];
FrontmatterParts {
has_frontmatter: true,
has_closing: false,
frontmatter: String::new(),
body: body.to_string(),
frontmatter_start,
body_start: frontmatter_start,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde::Deserialize;
#[derive(Debug, Deserialize, PartialEq)]
struct TestFrontmatter {
name: String,
description: String,
}
#[test]
fn test_parse_frontmatter() {
let content = r#"---
name: test-skill
description: A test skill
---
Body content here"#;
let (fm, body): (TestFrontmatter, String) = parse_frontmatter(content).unwrap();
assert_eq!(fm.name, "test-skill");
assert_eq!(fm.description, "A test skill");
assert_eq!(body, "Body content here");
}
#[test]
fn test_no_frontmatter() {
let content = "Just body content";
let result: LintResult<(TestFrontmatter, String)> = parse_frontmatter(content);
assert!(result.is_err()); }
#[test]
fn test_split_frontmatter_basic() {
let content = "---\nname: test\n---\nbody";
let parts = split_frontmatter(content);
assert!(parts.has_frontmatter);
assert!(parts.has_closing);
assert_eq!(parts.frontmatter, "name: test");
assert_eq!(parts.body, "\nbody");
assert_eq!(parts.frontmatter_start, 4);
assert_eq!(&content[parts.body_start..], parts.body);
}
#[test]
fn test_split_frontmatter_no_closing() {
let content = "---\nname: test";
let parts = split_frontmatter(content);
assert!(parts.has_frontmatter);
assert!(!parts.has_closing);
assert!(parts.frontmatter.is_empty());
assert_eq!(parts.body, "name: test");
assert_eq!(parts.body_start, 4); assert_eq!(&content[parts.body_start..], parts.body);
}
#[test]
fn test_split_frontmatter_no_closing_crlf() {
let content = "---\r\nname: test";
let parts = split_frontmatter(content);
assert!(parts.has_frontmatter);
assert!(!parts.has_closing);
assert!(parts.frontmatter.is_empty());
assert_eq!(parts.body, "name: test");
assert_eq!(parts.body_start, 5); assert_eq!(&content[parts.body_start..], parts.body);
}
#[test]
fn test_split_frontmatter_empty() {
let content = "";
let parts = split_frontmatter(content);
assert!(!parts.has_frontmatter);
assert!(!parts.has_closing);
}
#[test]
fn test_split_frontmatter_empty_body_lf() {
let content = "---\n---\nbody";
let parts = split_frontmatter(content);
assert!(parts.has_frontmatter);
assert!(parts.has_closing);
assert_eq!(parts.frontmatter, "");
assert_eq!(parts.body, "\nbody");
assert_eq!(&content[parts.body_start..], parts.body);
}
#[test]
fn test_split_frontmatter_empty_body_crlf() {
let content = "---\r\n---\r\nbody";
let parts = split_frontmatter(content);
assert!(parts.has_frontmatter);
assert!(parts.has_closing);
assert_eq!(parts.frontmatter, "");
assert_eq!(parts.body, "\r\nbody");
assert_eq!(&content[parts.body_start..], parts.body);
}
#[test]
fn test_split_frontmatter_whitespace_prefix() {
let content = " \n---\nkey: val\n---\nbody";
let parts = split_frontmatter(content);
assert!(parts.has_frontmatter);
assert!(parts.has_closing);
}
#[test]
fn test_split_frontmatter_multiple_dashes() {
let content = "---\nfirst: 1\n---\nmiddle\n---\nlast";
let parts = split_frontmatter(content);
assert!(parts.has_frontmatter);
assert!(parts.has_closing);
assert!(parts.body.contains("middle"));
}
#[test]
fn test_split_frontmatter_crlf() {
let content = "---\r\nname: test\r\n---\r\nbody";
let parts = split_frontmatter(content);
assert!(parts.has_frontmatter);
assert!(parts.has_closing);
assert!(parts.body.contains("body"));
}
#[test]
fn test_split_frontmatter_crlf_byte_offsets() {
let content = "---\r\nname: test\r\n---\r\nbody";
let parts = split_frontmatter(content);
assert!(parts.has_frontmatter);
assert!(parts.has_closing);
assert!(parts.frontmatter_start <= content.len());
assert!(parts.body_start <= content.len());
assert_eq!(parts.frontmatter_start, 5);
assert_eq!(parts.frontmatter, "name: test\r");
}
#[test]
fn test_split_frontmatter_no_newline_after_opener() {
let content = "---key: val\n---\nbody";
let parts = split_frontmatter(content);
assert!(parts.has_frontmatter);
assert!(parts.has_closing);
assert_eq!(parts.frontmatter_start, 3); assert_eq!(parts.frontmatter, "key: val");
assert_eq!(&content[parts.body_start..], parts.body);
}
#[test]
fn test_split_frontmatter_unicode_values() {
let content = "---\nname: \u{4f60}\u{597d}\ndescription: caf\u{00e9}\n---\nbody";
let parts = split_frontmatter(content);
assert!(parts.has_frontmatter);
assert!(parts.has_closing);
assert!(
parts.frontmatter.contains("\u{4f60}\u{597d}"),
"Frontmatter should contain CJK characters"
);
assert!(
parts.frontmatter.contains("caf\u{00e9}"),
"Frontmatter should contain accented character"
);
}
#[test]
fn test_split_frontmatter_escaped_quotes() {
let content = "---\nname: \"test\\\"skill\"\ndescription: test\n---\nbody";
let parts = split_frontmatter(content);
assert!(parts.has_frontmatter);
assert!(parts.has_closing);
assert!(
parts.frontmatter.contains("test\\\"skill"),
"Frontmatter should preserve escaped quotes"
);
}
#[test]
fn test_split_frontmatter_long_lines() {
let long_value = "x".repeat(5000);
let content = format!("---\nname: {}\n---\nbody", long_value);
let parts = split_frontmatter(&content);
assert!(parts.has_frontmatter);
assert!(parts.has_closing);
assert!(parts.frontmatter.contains(&long_value));
}
#[test]
fn test_split_frontmatter_empty_values() {
let content = "---\nname:\ndescription: test\n---\nbody";
let parts = split_frontmatter(content);
assert!(parts.has_frontmatter);
assert!(parts.has_closing);
assert!(parts.frontmatter.contains("name:"));
}
#[test]
fn test_split_frontmatter_nested_yaml() {
let content = "---\nmetadata:\n key1: val1\n key2: val2\n---\nbody";
let parts = split_frontmatter(content);
assert!(parts.has_frontmatter);
assert!(parts.has_closing);
assert!(parts.frontmatter.contains("key1: val1"));
assert!(parts.frontmatter.contains("key2: val2"));
}
#[test]
fn test_split_frontmatter_mixed_line_endings() {
let content = "---\nname: test\r\ndescription: val\n---\nbody";
let parts = split_frontmatter(content);
assert!(parts.has_frontmatter);
assert!(parts.has_closing);
}
#[test]
fn test_split_frontmatter_emoji_in_yaml_keys() {
let content = "---\n\u{1f525}fire: hot\n\u{1f680}rocket: fast\n---\nbody";
let parts = split_frontmatter(content);
assert!(parts.has_frontmatter);
assert!(parts.has_closing);
assert!(parts.frontmatter.contains("\u{1f525}fire"));
assert!(parts.frontmatter.contains("\u{1f680}rocket"));
assert!(content.is_char_boundary(parts.frontmatter_start));
assert!(content.is_char_boundary(parts.body_start));
}
#[test]
fn test_split_frontmatter_emoji_in_yaml_values() {
let content = "---\nstatus: \u{2705} done\nmood: \u{1f60a}\n---\nbody";
let parts = split_frontmatter(content);
assert!(parts.has_frontmatter);
assert!(parts.has_closing);
assert!(parts.frontmatter.contains("\u{2705}"));
assert!(parts.frontmatter.contains("\u{1f60a}"));
}
#[test]
fn test_normalize_lf_only_returns_borrowed() {
let input = "hello\nworld\n";
let result = normalize_line_endings(input);
assert!(
matches!(result, Cow::Borrowed(_)),
"LF-only input should return Cow::Borrowed"
);
assert_eq!(&*result, input);
}
#[test]
fn test_normalize_crlf_returns_owned() {
let input = "hello\r\nworld\r\n";
let result = normalize_line_endings(input);
assert!(
matches!(result, Cow::Owned(_)),
"CRLF input should return Cow::Owned"
);
assert_eq!(&*result, "hello\nworld\n");
}
#[test]
fn test_normalize_lone_cr() {
let input = "hello\rworld\r";
let result = normalize_line_endings(input);
assert_eq!(&*result, "hello\nworld\n");
}
#[test]
fn test_normalize_mixed_line_endings() {
let input = "line1\r\nline2\rline3\nline4";
let result = normalize_line_endings(input);
assert_eq!(&*result, "line1\nline2\nline3\nline4");
assert!(!result.contains('\r'));
}
#[test]
fn test_normalize_empty_string() {
let input = "";
let result = normalize_line_endings(input);
assert!(
matches!(result, Cow::Borrowed(_)),
"Empty string should return Cow::Borrowed"
);
assert_eq!(&*result, "");
}
#[test]
fn test_check_yaml_depth_accepts_typical_frontmatter() {
let yaml = "name: foo\ndescription: bar\ntags: [a, b, c]\n";
assert!(check_yaml_depth(yaml).is_ok());
}
#[test]
fn test_check_yaml_depth_accepts_realistic_nesting() {
let yaml = "a:\n b:\n c:\n d:\n e: value\n";
assert!(check_yaml_depth(yaml).is_ok());
}
#[test]
fn test_check_yaml_depth_rejects_deep_flow_brackets() {
let depth = 100;
let open = "[".repeat(depth);
let close = "]".repeat(depth);
let yaml = format!("data: {}v{}\n", open, close);
let err = check_yaml_depth(&yaml).expect_err("deep nesting must be rejected");
let msg = format!("{}", err);
assert!(
msg.contains("nesting depth") && msg.contains("exceeds maximum"),
"unexpected error message: {msg}"
);
}
#[test]
fn test_check_yaml_depth_rejects_deep_indent() {
let mut yaml = String::new();
for i in 0..100 {
for _ in 0..i {
yaml.push_str(" ");
}
yaml.push_str(&format!("k{}:\n", i));
}
assert!(check_yaml_depth(&yaml).is_err());
}
#[test]
fn test_check_yaml_depth_rejects_deep_one_space_indent() {
let mut yaml = String::new();
for i in 0..60 {
for _ in 0..i {
yaml.push(' ');
}
yaml.push_str(&format!("k{}:\n", i));
}
assert!(check_yaml_depth(&yaml).is_err());
}
#[test]
fn test_check_yaml_depth_ignores_brackets_in_quotes() {
let yaml = "note: \"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[not real]]]]\"\n";
assert!(check_yaml_depth(yaml).is_ok());
}
#[test]
fn test_check_yaml_depth_multiline_double_quoted_scalar_with_brackets() {
let yaml = "description: \"line one with [[[[ brackets\n\
still quoted [[[[ on line two\n\
and [[[[ line three ending here\"\n\
name: ok\n";
assert!(
check_yaml_depth(yaml).is_ok(),
"multi-line double-quoted scalar containing '[' must not be rejected"
);
}
#[test]
fn test_check_yaml_depth_multiline_single_quoted_scalar_with_brackets() {
let yaml = "description: 'line one with [[[[ brackets and it''s fine\n\
still quoted [[[[ on line two\n\
and [[[[ line three'\n\
name: ok\n";
assert!(
check_yaml_depth(yaml).is_ok(),
"multi-line single-quoted scalar containing '[' must not be rejected"
);
}
#[test]
fn test_check_yaml_depth_multiline_scalar_ignores_dash_list_prefix() {
let yaml = "note: \"start\n\
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - end\"\n\
name: ok\n";
assert!(
check_yaml_depth(yaml).is_ok(),
"dashes inside a multi-line quoted scalar must not count as list depth"
);
}
#[test]
fn test_parse_frontmatter_rejects_yaml_bomb() {
#[derive(Debug, serde::Deserialize)]
struct Any {
#[allow(dead_code)]
data: serde_yaml::Value,
}
let open = "[".repeat(100);
let close = "]".repeat(100);
let content = format!("---\ndata: {}v{}\n---\nbody\n", open, close);
let result: LintResult<(Any, String)> = parse_frontmatter(&content);
assert!(
result.is_err(),
"pathologically nested YAML must be rejected before serde_yaml"
);
}
}
#[cfg(test)]
mod proptests {
use super::*;
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(100))]
#[test]
fn split_frontmatter_never_panics(content in ".*") {
let _ = split_frontmatter(&content);
}
#[test]
fn split_frontmatter_valid_offsets(content in ".*") {
let parts = split_frontmatter(&content);
prop_assert!(parts.frontmatter_start <= content.len());
prop_assert!(parts.body_start <= content.len());
}
#[test]
fn frontmatter_with_dashes_detected(
yaml in "[a-z]+: [a-z]+",
) {
let content = format!("---\n{}\n---\nbody", yaml);
let parts = split_frontmatter(&content);
prop_assert!(parts.has_frontmatter);
prop_assert!(parts.has_closing);
}
#[test]
fn no_frontmatter_without_leading_dashes(
content in "[^-].*"
) {
let parts = split_frontmatter(&content);
prop_assert!(!parts.has_frontmatter);
}
#[test]
fn unclosed_frontmatter_has_empty_frontmatter(
yaml in "[a-z]+: [a-z]+"
) {
let content = format!("---\n{}", yaml);
let parts = split_frontmatter(&content);
prop_assert!(parts.has_frontmatter);
prop_assert!(!parts.has_closing);
prop_assert!(parts.frontmatter.is_empty());
}
#[test]
fn normalize_line_endings_never_contains_cr(content in ".*") {
let normalized = normalize_line_endings(&content);
prop_assert!(
!normalized.contains('\r'),
"Normalized output must not contain \\r"
);
}
}
}