use crate::fm::{self, FmDiagnostic, FmNode, FmSeverity, FmValue, FrontmatterBlock, ScalarSpan};
use crate::span::Span;
fn find_opening(source: &str) -> Option<usize> {
if source.starts_with("---\r\n") {
Some(5)
} else if source.starts_with("---\n") || source.starts_with("---\r") {
Some(4)
} else {
None
}
}
fn find_closing(rest: &str) -> Option<usize> {
let mut search_from = 0;
loop {
let candidate = rest[search_from..].find("---")?;
let abs_pos = search_from + candidate;
let prev = abs_pos.checked_sub(1).and_then(|i| rest.as_bytes().get(i));
let at_line_start = abs_pos == 0 || prev == Some(&b'\n') || prev == Some(&b'\r');
if !at_line_start {
search_from = abs_pos + 3;
continue;
}
let after = abs_pos + 3;
let valid_end = after >= rest.len()
|| rest.as_bytes().get(after) == Some(&b'\n')
|| rest.as_bytes().get(after) == Some(&b'\r');
if !valid_end {
search_from = after;
continue;
}
return Some(abs_pos);
}
}
struct Parser<'a> {
src: &'a [u8],
pos: usize,
base: usize,
diagnostics: Vec<FmDiagnostic>,
depth: usize,
depth_limit_hit: bool,
}
impl<'a> Parser<'a> {
fn new(content: &'a str, base: usize) -> Self {
Self {
src: content.as_bytes(),
pos: 0,
base,
diagnostics: Vec::new(),
depth: 0,
depth_limit_hit: false,
}
}
fn at_end(&self) -> bool {
self.pos >= self.src.len()
}
fn peek(&self) -> Option<u8> {
self.src.get(self.pos).copied()
}
fn peek_at(&self, offset: usize) -> Option<u8> {
self.src.get(self.pos + offset).copied()
}
fn advance(&mut self) -> Option<u8> {
let b = self.src.get(self.pos).copied()?;
self.pos += 1;
Some(b)
}
fn abs(&self) -> usize {
self.base + self.pos
}
fn emit(&mut self, span: Span, severity: FmSeverity, message: String) {
self.diagnostics.push(FmDiagnostic {
span,
severity,
message,
});
}
fn here_span(&self) -> Span {
let start = self.abs();
Span::new(start, (start + 1).min(self.base + self.src.len()))
}
fn skip_inline_whitespace(&mut self) -> usize {
let start = self.pos;
while let Some(b) = self.peek() {
match b {
b' ' => {
self.pos += 1;
}
b'\t' => {
let span = Span::new(self.abs(), self.abs() + 1);
self.emit(
span,
FmSeverity::Error,
"tab character in indentation is not allowed in YAML".into(),
);
self.pos += 1;
}
_ => break,
}
}
self.pos - start
}
fn skip_newline(&mut self) -> bool {
match self.peek() {
Some(b'\n') => {
self.pos += 1;
true
}
Some(b'\r') => {
self.pos += if self.peek_at(1) == Some(b'\n') { 2 } else { 1 };
true
}
_ => false,
}
}
fn push_char(&mut self, text: &mut String) {
self.pos = fm::push_utf8_char(text, self.src, self.pos - 1);
}
fn skip_to_eol(&mut self) {
while let Some(b) = self.peek() {
if b == b'\n' || b == b'\r' {
break;
}
self.pos += 1;
}
}
fn skip_comment(&mut self) {
self.skip_to_eol();
}
fn line_indent(&self) -> usize {
let mut p = self.pos;
let mut count = 0;
while p < self.src.len() {
match self.src[p] {
b' ' => {
count += 1;
p += 1;
}
b'\t' => {
count += 1;
p += 1;
}
_ => break,
}
}
count
}
fn skip_blanks_and_comments(&mut self) {
loop {
let saved = self.pos;
self.skip_inline_whitespace();
match self.peek() {
None => return,
Some(b'\n' | b'\r') => {
self.skip_newline();
}
Some(b'#') => {
self.skip_comment();
self.skip_newline();
}
_ => {
self.pos = saved;
return;
}
}
}
}
fn check_unsupported(&mut self) -> bool {
match self.peek() {
Some(b'&') => {
let start = self.abs();
self.skip_to_eol();
self.emit(
Span::new(start, self.abs()),
FmSeverity::Error,
"YAML anchors are not supported in frontmatter".into(),
);
true
}
Some(b'*') => {
let start = self.abs();
self.skip_to_eol();
self.emit(
Span::new(start, self.abs()),
FmSeverity::Error,
"YAML aliases are not supported in frontmatter".into(),
);
true
}
Some(b'!') => {
let start = self.abs();
self.skip_to_eol();
self.emit(
Span::new(start, self.abs()),
FmSeverity::Error,
"YAML tags are not supported in frontmatter".into(),
);
true
}
Some(b'%') => {
let start = self.abs();
self.skip_to_eol();
self.emit(
Span::new(start, self.abs()),
FmSeverity::Error,
"YAML directives are not supported in frontmatter".into(),
);
true
}
_ => false,
}
}
fn parse_plain_scalar(&mut self, in_flow: bool) -> ScalarSpan {
let start = self.pos;
let abs_start = self.abs();
loop {
match self.peek() {
None | Some(b'\n' | b'\r') => break,
Some(b'#') if self.pos > start && self.src[self.pos - 1] == b' ' => break,
Some(b',' | b']' | b'}') if in_flow => break,
Some(b':')
if in_flow && matches!(self.peek_at(1), Some(b' ' | b',' | b'}') | None) =>
{
break;
}
_ => {
self.pos += 1;
}
}
}
let raw = &self.src[start..self.pos];
let text = String::from_utf8_lossy(raw).trim_end().to_string();
let text_len = text.len();
ScalarSpan {
span: Span::new(abs_start, abs_start + text_len),
text,
}
}
fn parse_single_quoted(&mut self) -> ScalarSpan {
let abs_start = self.abs();
self.pos += 1;
let mut text = String::new();
loop {
match self.advance() {
None => {
self.emit(
Span::new(abs_start, self.abs()),
FmSeverity::Error,
"unclosed single-quoted scalar".into(),
);
break;
}
Some(b'\'') => {
if self.peek() == Some(b'\'') {
text.push('\'');
self.pos += 1;
} else {
break;
}
}
Some(b'\r') if self.peek() == Some(b'\n') => {
self.pos += 1;
text.push('\n');
}
Some(_) => self.push_char(&mut text),
}
}
ScalarSpan {
span: Span::new(abs_start, self.abs()),
text,
}
}
fn parse_double_quoted(&mut self) -> ScalarSpan {
let abs_start = self.abs();
self.pos += 1;
let mut text = String::new();
loop {
match self.advance() {
None => {
self.emit(
Span::new(abs_start, self.abs()),
FmSeverity::Error,
"unclosed double-quoted scalar".into(),
);
break;
}
Some(b'"') => break,
Some(b'\\') => {
match self.advance() {
None => break,
Some(b'n') => text.push('\n'),
Some(b't') => text.push('\t'),
Some(b'r') => text.push('\r'),
Some(b'\\') => text.push('\\'),
Some(b'"') => text.push('"'),
Some(b'/') => text.push('/'),
Some(b'0') => text.push('\0'),
Some(b' ') => text.push(' '),
Some(b'\n') => {
while self.peek() == Some(b' ') || self.peek() == Some(b'\t') {
self.pos += 1;
}
}
Some(b'\r') if self.peek() == Some(b'\n') => {
self.pos += 1;
while self.peek() == Some(b' ') || self.peek() == Some(b'\t') {
self.pos += 1;
}
}
Some(_) => {
text.push('\\');
self.push_char(&mut text);
}
}
}
Some(b'\r') if self.peek() == Some(b'\n') => {
self.pos += 1;
text.push('\n');
}
Some(_) => self.push_char(&mut text),
}
}
ScalarSpan {
span: Span::new(abs_start, self.abs()),
text,
}
}
fn parse_block_scalar(&mut self) -> FmValue {
let abs_start = self.abs();
self.pos += 1;
while let Some(b) = self.peek() {
match b {
b'+' | b'-' | b'0'..=b'9' => self.pos += 1,
_ => break,
}
}
self.skip_inline_whitespace();
if self.peek() == Some(b'#') {
self.skip_comment();
}
self.skip_newline();
let content_indent = self.detect_block_scalar_indent();
loop {
if self.at_end() {
break;
}
let saved = self.pos;
let indent = self.line_indent();
let is_blank = matches!(self.src.get(saved + indent), Some(b'\n' | b'\r') | None);
if is_blank {
self.pos = saved;
self.skip_to_eol();
self.skip_newline();
continue;
}
if indent < content_indent {
self.pos = saved;
break;
}
self.pos = saved;
self.skip_to_eol();
self.skip_newline();
}
FmValue::BlockScalar {
span: Span::new(abs_start, self.abs()),
}
}
fn detect_block_scalar_indent(&self) -> usize {
let mut p = self.pos;
loop {
let mut indent = 0;
while p < self.src.len() && (self.src[p] == b' ' || self.src[p] == b'\t') {
indent += 1;
p += 1;
}
match self.src.get(p) {
Some(b'\n') => p += 1,
Some(b'\r') if self.src.get(p + 1) == Some(&b'\n') => p += 2,
_ => return indent,
}
}
}
fn parse_flow_sequence(&mut self) -> FmValue {
let abs_start = self.abs();
self.pos += 1;
let mut items = Vec::new();
loop {
self.skip_flow_whitespace();
match self.peek() {
None => {
self.emit(
Span::new(abs_start, self.abs()),
FmSeverity::Error,
"unclosed flow sequence".into(),
);
break;
}
Some(b']') => {
self.pos += 1;
break;
}
Some(b',') => self.pos += 1,
Some(b'\'') => items.push(self.parse_single_quoted()),
Some(b'"') => items.push(self.parse_double_quoted()),
_ => {
let arm_start = self.pos;
let scalar = self.parse_plain_scalar(true);
if self.pos == arm_start {
self.pos += 1;
} else if !scalar.text.is_empty() {
items.push(scalar);
}
}
}
}
FmValue::FlowSequence {
span: Span::new(abs_start, self.abs()),
items,
}
}
fn parse_flow_mapping(&mut self) -> FmValue {
let abs_start = self.abs();
self.pos += 1;
let mut entries = Vec::new();
loop {
self.skip_flow_whitespace();
match self.peek() {
None => {
self.emit(
Span::new(abs_start, self.abs()),
FmSeverity::Error,
"unclosed flow mapping".into(),
);
break;
}
Some(b'}') => {
self.pos += 1;
break;
}
Some(b',') => self.pos += 1,
_ => {
let arm_start = self.pos;
let key = self.parse_flow_key();
self.skip_flow_whitespace();
if self.peek() == Some(b':') {
self.pos += 1;
self.skip_flow_whitespace();
}
let value = match self.peek() {
Some(b'\'') => self.parse_single_quoted(),
Some(b'"') => self.parse_double_quoted(),
Some(b',' | b'}') | None => ScalarSpan {
span: Span::new(self.abs(), self.abs()),
text: String::new(),
},
_ => self.parse_plain_scalar(true),
};
if self.pos == arm_start {
self.pos += 1;
} else {
entries.push((key, value));
}
}
}
}
FmValue::FlowMapping {
span: Span::new(abs_start, self.abs()),
entries,
}
}
fn parse_flow_key(&mut self) -> ScalarSpan {
match self.peek() {
Some(b'\'') => self.parse_single_quoted(),
Some(b'"') => self.parse_double_quoted(),
_ => self.parse_plain_scalar(true),
}
}
fn skip_flow_whitespace(&mut self) {
while let Some(b) = self.peek() {
match b {
b' ' | b'\t' | b'\n' => self.pos += 1,
b'\r' if self.peek_at(1) == Some(b'\n') => self.pos += 2,
b'#' => self.skip_comment(),
_ => break,
}
}
}
fn parse_inline_value(&mut self, parent_indent: usize) -> FmValue {
if self.check_unsupported() {
self.skip_newline();
return FmValue::Scalar(ScalarSpan {
span: Span::new(self.abs(), self.abs()),
text: String::new(),
});
}
match self.peek() {
None | Some(b'\n' | b'\r') => {
self.skip_newline();
self.parse_block_value(parent_indent)
}
Some(b'#') => {
self.skip_comment();
self.skip_newline();
self.parse_block_value(parent_indent)
}
Some(b'[') => {
let v = self.parse_flow_sequence();
self.skip_trailing();
v
}
Some(b'{') => {
let v = self.parse_flow_mapping();
self.skip_trailing();
v
}
Some(b'|' | b'>') => self.parse_block_scalar(),
Some(b'\'') => {
let s = self.parse_single_quoted();
self.skip_trailing();
FmValue::Scalar(s)
}
Some(b'"') => {
let s = self.parse_double_quoted();
self.skip_trailing();
FmValue::Scalar(s)
}
_ => {
let s = self.parse_plain_scalar(false);
self.skip_trailing();
FmValue::Scalar(s)
}
}
}
fn parse_block_value(&mut self, parent_indent: usize) -> FmValue {
self.skip_blanks_and_comments();
if self.at_end() {
return FmValue::Scalar(ScalarSpan {
span: Span::new(self.abs(), self.abs()),
text: String::new(),
});
}
let child_indent = self.line_indent();
if child_indent <= parent_indent {
return FmValue::Scalar(ScalarSpan {
span: Span::new(self.abs(), self.abs()),
text: String::new(),
});
}
if self.depth >= crate::limits::MAX_FRONTMATTER_NESTING {
self.note_depth_limit();
let start = self.abs();
self.skip_indented_block(parent_indent);
return FmValue::Scalar(ScalarSpan {
span: Span::new(start, self.abs()),
text: String::new(),
});
}
let first_content = self.src.get(self.pos + child_indent).copied();
self.depth += 1;
let value = if first_content == Some(b'-')
&& matches!(
self.src.get(self.pos + child_indent + 1),
Some(b' ' | b'\n' | b'\r') | None
) {
FmValue::Sequence(self.parse_block_sequence(child_indent))
} else {
FmValue::Mapping(self.parse_entries(child_indent))
};
self.depth -= 1;
value
}
fn note_depth_limit(&mut self) {
if !self.depth_limit_hit {
self.depth_limit_hit = true;
let pos = self.abs();
self.emit(
Span::new(pos, pos),
FmSeverity::Warning,
format!(
"YAML nesting exceeds the limit of {}; deeper structure is flattened",
crate::limits::MAX_FRONTMATTER_NESTING
),
);
}
}
fn skip_indented_block(&mut self, parent_indent: usize) {
loop {
self.skip_blanks_and_comments();
if self.at_end() {
return;
}
if self.line_indent() <= parent_indent {
return;
}
self.skip_inline_whitespace();
self.skip_to_eol();
if !self.skip_newline() {
return;
}
}
}
fn skip_trailing(&mut self) {
self.skip_inline_whitespace();
if self.peek() == Some(b'#') {
self.skip_comment();
}
self.skip_newline();
}
fn parse_entries(&mut self, indent: usize) -> Vec<FmNode> {
let mut entries = Vec::new();
loop {
self.skip_blanks_and_comments();
if self.at_end() {
break;
}
let current_indent = self.line_indent();
if current_indent < indent {
break;
}
if current_indent > indent {
let abs_start = self.abs();
self.skip_inline_whitespace();
self.skip_to_eol();
self.emit(
Span::new(abs_start, self.abs()),
FmSeverity::Error,
"unexpected indentation".into(),
);
self.skip_newline();
continue;
}
let entry_start = self.abs();
self.skip_inline_whitespace();
if self.check_unsupported() {
self.skip_newline();
continue;
}
if self.peek() == Some(b'-')
&& matches!(self.peek_at(1), Some(b' ' | b'\n' | b'\r') | None)
{
let items = self.parse_block_sequence(indent);
for item in items {
entries.push(item);
}
continue;
}
let key = self.parse_mapping_key();
let Some(key) = key else {
let err_start = self.abs();
self.skip_to_eol();
self.emit(
Span::new(err_start, self.abs()),
FmSeverity::Error,
"expected mapping key".into(),
);
self.skip_newline();
continue;
};
if self.peek() != Some(b':') {
self.emit(self.here_span(), FmSeverity::Error, "expected ':'".into());
self.skip_to_eol();
self.skip_newline();
continue;
}
self.pos += 1;
self.skip_inline_whitespace();
let value = self.parse_inline_value(indent);
let entry_end = self.abs();
entries.push(FmNode::Mapping {
key,
value,
span: Span::new(entry_start, entry_end),
});
}
entries
}
fn parse_mapping_key(&mut self) -> Option<ScalarSpan> {
match self.peek() {
Some(b'\'') => Some(self.parse_single_quoted()),
Some(b'"') => Some(self.parse_double_quoted()),
Some(b'[' | b'{') => {
let start = self.abs();
self.emit(
Span::new(start, start + 1),
FmSeverity::Error,
"complex keys are not supported in frontmatter".into(),
);
None
}
_ => {
let abs_start = self.abs();
let start = self.pos;
while let Some(b) = self.peek() {
if b == b':'
&& matches!(self.peek_at(1), Some(b' ' | b'\t' | b'\n' | b'\r') | None)
{
break;
}
if b == b'\n' || b == b'\r' {
break;
}
self.pos += 1;
}
let raw = &self.src[start..self.pos];
let text = String::from_utf8_lossy(raw).trim_end().to_string();
if text.is_empty() {
return None;
}
let text_len = text.len();
Some(ScalarSpan {
span: Span::new(abs_start, abs_start + text_len),
text,
})
}
}
}
fn parse_block_sequence(&mut self, indent: usize) -> Vec<FmNode> {
let mut items = Vec::new();
loop {
self.skip_blanks_and_comments();
if self.at_end() {
break;
}
let current_indent = self.line_indent();
if current_indent != indent {
break;
}
let saved = self.pos;
self.skip_inline_whitespace();
if self.peek() != Some(b'-')
|| !matches!(self.peek_at(1), Some(b' ' | b'\n' | b'\r') | None)
{
self.pos = saved;
break;
}
let item_start = self.abs();
self.pos += 1;
if self.peek() == Some(b' ') {
self.pos += 1;
}
let value = if self.at_end() || self.peek() == Some(b'\n') || self.peek() == Some(b'\r')
{
self.skip_newline();
self.parse_block_value(indent)
} else if self.peek() == Some(b'#') {
self.skip_comment();
self.skip_newline();
FmValue::Scalar(ScalarSpan {
span: Span::new(self.abs(), self.abs()),
text: String::new(),
})
} else {
let item_indent = indent + 2; self.parse_inline_value(item_indent)
};
let item_end = self.abs();
items.push(FmNode::SequenceItem {
value,
span: Span::new(item_start, item_end),
});
}
items
}
}
#[must_use]
pub fn parse_frontmatter_block(source: &str) -> Option<FrontmatterBlock> {
let (stripped, bom_offset) = fm::strip_bom(source);
let opener_len = find_opening(stripped)?;
let content_start = bom_offset + opener_len;
let rest = &stripped[opener_len..];
let closing_pos = find_closing(rest)?;
let yaml_content = &rest[..closing_pos];
let content_end = content_start + closing_pos;
let closing_line_len = if rest[closing_pos..].starts_with("---\r\n") {
5
} else if rest[closing_pos..].starts_with("---\n") || rest[closing_pos..].starts_with("---\r") {
4
} else {
3 };
let block_end = content_end + closing_line_len;
if yaml_content.len() > crate::limits::MAX_FRONTMATTER_BYTES {
return Some(FrontmatterBlock {
span: Span::new(bom_offset, block_end),
content_span: Span::new(content_start, content_end),
entries: Vec::new(),
diagnostics: vec![FmDiagnostic {
span: Span::new(content_start, content_start),
severity: FmSeverity::Warning,
message: format!(
"frontmatter exceeds the {}-byte limit; skipped",
crate::limits::MAX_FRONTMATTER_BYTES
),
}],
});
}
let mut parser = Parser::new(yaml_content, content_start);
let entries = parser.parse_entries(0);
let diagnostics = parser.diagnostics;
Some(FrontmatterBlock {
span: Span::new(bom_offset, block_end),
content_span: Span::new(content_start, content_end),
entries,
diagnostics,
})
}
#[must_use]
pub fn parse_yaml_body(body: &str, base: usize) -> FrontmatterBlock {
let block_span = Span::new(base, base + body.len());
if body.len() > crate::limits::MAX_FRONTMATTER_BYTES {
return FrontmatterBlock {
span: block_span,
content_span: block_span,
entries: Vec::new(),
diagnostics: vec![FmDiagnostic {
span: Span::new(base, base),
severity: FmSeverity::Warning,
message: format!(
"metadata block exceeds the {}-byte limit; skipped",
crate::limits::MAX_FRONTMATTER_BYTES
),
}],
};
}
let mut parser = Parser::new(body, base);
let entries = parser.parse_entries(0);
let diagnostics = parser.diagnostics;
FrontmatterBlock {
span: block_span,
content_span: block_span,
entries,
diagnostics,
}
}
#[cfg(test)]
#[allow(clippy::expect_used, reason = "tests use expect for clarity")]
#[allow(clippy::panic, reason = "tests use panic for unreachable match arms")]
mod tests {
use super::*;
use crate::fm::{extract_backlinks, find_predicate_line};
#[test]
fn scalar_fidelity_allows_single_quote_escape() {
let source = "---\nkey: 'it''s a test'\n---\n";
let block = parse_frontmatter_block(source).expect("frontmatter should parse");
crate::invariants::assert_block_wellformed(&block, source);
crate::invariants::assert_frontmatter_scalar_fidelity(&block, source);
let FmNode::Mapping { value, .. } = &block.entries[0] else {
panic!("expected a mapping entry");
};
let FmValue::Scalar(scalar) = value else {
panic!("expected a scalar value");
};
assert_eq!(
scalar.text, "it's a test",
"`''` should decode to a single apostrophe"
);
let plain = "---\nkey: tx''\n---\n";
let block = parse_frontmatter_block(plain).expect("frontmatter should parse");
crate::invariants::assert_frontmatter_scalar_fidelity(&block, plain);
let FmNode::Mapping { value, .. } = &block.entries[0] else {
panic!("expected a mapping entry");
};
let FmValue::Scalar(scalar) = value else {
panic!("expected a scalar value");
};
assert_eq!(scalar.text, "tx''", "a plain scalar keeps `''` verbatim");
}
#[test]
fn strip_bom_present() {
let source = "\u{FEFF}---\ntitle: test\n---\n";
let block = parse_frontmatter_block(source).expect("should parse frontmatter with BOM");
assert!(
block.diagnostics.is_empty(),
"BOM frontmatter should have no diagnostics"
);
assert_eq!(block.entries.len(), 1, "should have one top-level entry");
}
#[test]
fn strip_bom_absent() {
let source = "---\ntitle: test\n---\n";
let block = parse_frontmatter_block(source).expect("should parse frontmatter without BOM");
assert_eq!(block.span.start, 0, "span should start at 0 without BOM");
}
#[test]
fn no_frontmatter() {
let source = "# Just a heading\n";
assert!(
parse_frontmatter_block(source).is_none(),
"no frontmatter should return None"
);
}
#[test]
fn empty_frontmatter() {
let source = "---\n---\n# Heading\n";
let block = parse_frontmatter_block(source).expect("should parse empty frontmatter");
assert!(
block.entries.is_empty(),
"empty frontmatter should have no entries"
);
assert_eq!(
block.span,
Span::new(0, 8),
"span should cover both delimiters"
);
}
#[test]
fn frontmatter_at_eof_no_trailing_newline() {
let source = "---\ntitle: test\n---";
let block = parse_frontmatter_block(source).expect("should parse frontmatter at EOF");
assert_eq!(block.entries.len(), 1, "should have one entry");
}
#[test]
fn dashes_not_at_start() {
let source = "Some text\n---\ntitle: test\n---\n";
assert!(
parse_frontmatter_block(source).is_none(),
"dashes not at file start should not be frontmatter"
);
}
#[test]
fn simple_key_value() {
let source = "---\ntitle: My Document\nauthor: Test\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
assert_eq!(block.entries.len(), 2, "should have two entries");
if let FmNode::Mapping { key, value, .. } = &block.entries[0] {
assert_eq!(key.text, "title", "first key should be title");
if let FmValue::Scalar(s) = value {
assert_eq!(s.text, "My Document", "title value should match");
} else {
panic!("title value should be a scalar");
}
} else {
panic!("entry should be a mapping");
}
}
#[test]
fn null_values() {
let source = "---\nempty:\nnull_tilde: ~\nnull_word: null\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
assert_eq!(block.entries.len(), 3, "should have three entries");
if let FmNode::Mapping { key, value, .. } = &block.entries[0] {
assert_eq!(key.text, "empty", "first key");
if let FmValue::Scalar(s) = value {
assert!(s.text.is_empty(), "empty key should have empty value");
} else {
panic!("should be scalar");
}
} else {
panic!("should be mapping");
}
}
#[test]
fn nested_mapping() {
let source = "---\nbacklinks:\n superseded_by:\n - decisions/38.md\n amended_by:\n - decisions/38.md\n - tickets/14h.md\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
assert_eq!(block.entries.len(), 1, "one top-level entry");
if let FmNode::Mapping { key, value, .. } = &block.entries[0] {
assert_eq!(key.text, "backlinks", "top key");
if let FmValue::Mapping(preds) = value {
assert_eq!(preds.len(), 2, "two predicates");
} else {
panic!("backlinks value should be a mapping");
}
} else {
panic!("should be mapping");
}
}
#[test]
fn block_sequence() {
let source = "---\ntags:\n - rust\n - lsp\n - markdown\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
if let FmNode::Mapping { value, .. } = &block.entries[0] {
if let FmValue::Sequence(items) = value {
assert_eq!(items.len(), 3, "should have three items");
if let FmNode::SequenceItem {
value: FmValue::Scalar(s),
..
} = &items[0]
{
assert_eq!(s.text, "rust", "first item");
} else {
panic!("item should be scalar");
}
} else {
panic!("value should be sequence");
}
} else {
panic!("should be mapping");
}
}
#[test]
fn flow_sequence() {
let source = "---\ntags: [rust, lsp, markdown]\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
if let FmNode::Mapping { value, .. } = &block.entries[0] {
if let FmValue::FlowSequence { items, .. } = value {
assert_eq!(items.len(), 3, "should have three items");
assert_eq!(items[0].text, "rust", "first item");
assert_eq!(items[1].text, "lsp", "second item");
assert_eq!(items[2].text, "markdown", "third item");
} else {
panic!("value should be flow sequence");
}
} else {
panic!("should be mapping");
}
}
#[test]
fn empty_flow_sequence() {
let source = "---\ntags: []\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
if let FmNode::Mapping { value, .. } = &block.entries[0] {
if let FmValue::FlowSequence { items, .. } = value {
assert!(items.is_empty(), "should be empty");
} else {
panic!("value should be flow sequence");
}
} else {
panic!("should be mapping");
}
}
#[test]
fn flow_mapping() {
let source = "---\nmeta: {a: b, c: d}\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
if let FmNode::Mapping { value, .. } = &block.entries[0] {
if let FmValue::FlowMapping { entries, .. } = value {
assert_eq!(entries.len(), 2, "should have two entries");
assert_eq!(entries[0].0.text, "a", "first key");
assert_eq!(entries[0].1.text, "b", "first value");
} else {
panic!("value should be flow mapping");
}
} else {
panic!("should be mapping");
}
}
#[test]
fn single_quoted_scalar() {
let source = "---\ntitle: 'Hello World'\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
if let FmNode::Mapping { value, .. } = &block.entries[0] {
if let FmValue::Scalar(s) = value {
assert_eq!(s.text, "Hello World", "single-quoted value");
} else {
panic!("should be scalar");
}
} else {
panic!("should be mapping");
}
}
#[test]
fn single_quoted_escaped_quote() {
let source = "---\ntitle: 'it''s a test'\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
if let FmNode::Mapping { value, .. } = &block.entries[0] {
if let FmValue::Scalar(s) = value {
assert_eq!(s.text, "it's a test", "escaped single quote");
} else {
panic!("should be scalar");
}
} else {
panic!("should be mapping");
}
}
#[test]
fn double_quoted_scalar() {
let source = "---\ntitle: \"Hello World\"\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
if let FmNode::Mapping { value, .. } = &block.entries[0] {
if let FmValue::Scalar(s) = value {
assert_eq!(s.text, "Hello World", "double-quoted value");
} else {
panic!("should be scalar");
}
} else {
panic!("should be mapping");
}
}
#[test]
fn double_quoted_escapes() {
let source = "---\npath: \"line1\\nline2\\ttab\"\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
if let FmNode::Mapping { value, .. } = &block.entries[0] {
if let FmValue::Scalar(s) = value {
assert_eq!(s.text, "line1\nline2\ttab", "escape sequences");
} else {
panic!("should be scalar");
}
} else {
panic!("should be mapping");
}
}
#[test]
fn block_scalar_literal() {
let source = "---\ndesc: |\n line one\n line two\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
if let FmNode::Mapping { value, .. } = &block.entries[0] {
assert!(
matches!(value, FmValue::BlockScalar { .. }),
"should be block scalar"
);
} else {
panic!("should be mapping");
}
}
#[test]
fn block_scalar_folded() {
let source = "---\ndesc: >\n line one\n line two\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
if let FmNode::Mapping { value, .. } = &block.entries[0] {
assert!(
matches!(value, FmValue::BlockScalar { .. }),
"should be block scalar"
);
} else {
panic!("should be mapping");
}
}
#[test]
fn inline_comments() {
let source = "---\ntitle: test # this is a comment\nauthor: me\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
assert_eq!(block.entries.len(), 2, "should have two entries");
if let FmNode::Mapping { value, .. } = &block.entries[0] {
if let FmValue::Scalar(s) = value {
assert_eq!(s.text, "test", "comment should be stripped");
} else {
panic!("should be scalar");
}
} else {
panic!("should be mapping");
}
}
#[test]
fn comment_only_lines() {
let source = "---\n# a comment\ntitle: test\n# another comment\nauthor: me\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
assert_eq!(block.entries.len(), 2, "comments should be skipped");
}
#[test]
fn crlf_line_endings() {
let source = "---\r\nbacklinks:\r\n superseded_by:\r\n - a.md\r\n---\r\n";
let block = parse_frontmatter_block(source).expect("should parse CRLF");
assert_eq!(block.entries.len(), 1, "should parse with CRLF");
let backlinks = extract_backlinks(&block, source);
assert_eq!(
backlinks.get("superseded_by"),
Some(&vec!["a.md".to_string()]),
"should extract backlinks with CRLF"
);
}
#[test]
fn bare_cr_line_endings() {
let source = "---\rbacklinks:\r superseded_by:\r - a.md\r---\r";
let block = parse_frontmatter_block(source).expect("bare CR frontmatter should parse");
assert_eq!(block.entries.len(), 1, "should parse with bare CR");
assert_eq!(
block.span.end,
source.len(),
"block span includes the bare-CR closing delimiter and its `\\r`"
);
let backlinks = extract_backlinks(&block, source);
assert_eq!(
backlinks.get("superseded_by"),
Some(&vec!["a.md".to_string()]),
"should extract backlinks with bare CR"
);
}
#[test]
fn extract_backlinks_full() {
let source = "---\nbacklinks:\n superseded_by:\n - decisions/38.md\n amended_by:\n - decisions/38.md\n - tickets/14h.md\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
let bl = extract_backlinks(&block, source);
assert_eq!(bl.len(), 2, "should have two predicates");
assert_eq!(
bl.get("superseded_by"),
Some(&vec!["decisions/38.md".to_string()]),
"superseded_by"
);
assert_eq!(
bl.get("amended_by"),
Some(&vec![
"decisions/38.md".to_string(),
"tickets/14h.md".to_string()
]),
"amended_by"
);
}
#[test]
fn extract_backlinks_flow_sequence() {
let source = "---\nbacklinks:\n superseded_by: [decisions/38.md]\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
let bl = extract_backlinks(&block, source);
assert_eq!(
bl.get("superseded_by"),
Some(&vec!["decisions/38.md".to_string()]),
"flow sequence backlinks"
);
}
#[test]
fn extract_backlinks_empty_list() {
let source = "---\nbacklinks:\n superseded_by: []\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
let bl = extract_backlinks(&block, source);
assert_eq!(
bl.get("superseded_by"),
Some(&vec![]),
"empty flow sequence"
);
}
#[test]
fn no_backlinks_key() {
let source = "---\ntitle: test\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
let bl = extract_backlinks(&block, source);
assert!(bl.is_empty(), "no backlinks key should produce empty map");
}
#[test]
fn spans_are_correct() {
let source = "---\ntitle: test\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
assert_eq!(block.span, Span::new(0, 20), "block span covers delimiters");
assert_eq!(
block.content_span,
Span::new(4, 16),
"content span is between delimiters"
);
if let FmNode::Mapping { key, .. } = &block.entries[0] {
assert_eq!(key.span, Span::new(4, 9), "key span");
assert_eq!(
&source[key.span.start..key.span.end],
"title",
"key text matches span"
);
} else {
panic!("should be mapping");
}
}
#[test]
fn bom_spans_offset_correctly() {
let source = "\u{FEFF}---\ntitle: test\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
assert_eq!(block.span.start, 3, "block span starts after BOM");
if let FmNode::Mapping { key, .. } = &block.entries[0] {
assert_eq!(
&source[key.span.start..key.span.end],
"title",
"key text matches span with BOM"
);
} else {
panic!("should be mapping");
}
}
#[test]
fn find_predicate_line_correct() {
let source =
"---\nbacklinks:\n superseded_by:\n - a.md\n amended_by:\n - b.md\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
assert_eq!(
find_predicate_line(&block, "superseded_by", source),
3,
"superseded_by on line 3"
);
assert_eq!(
find_predicate_line(&block, "amended_by", source),
5,
"amended_by on line 5"
);
assert_eq!(
find_predicate_line(&block, "nonexistent", source),
1,
"missing predicate falls back to line 1"
);
}
#[test]
fn tab_in_indentation() {
let source = "---\ntitle: test\n\tindented: bad\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
assert!(
block.diagnostics.iter().any(|d| d.message.contains("tab")),
"should flag tab in indentation"
);
}
#[test]
fn unsupported_anchor() {
let source = "---\ntitle: &anchor value\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
assert!(
block
.diagnostics
.iter()
.any(|d| d.message.contains("anchor")),
"should flag anchor"
);
}
#[test]
fn unsupported_alias() {
let source = "---\nref: *alias\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
assert!(
block
.diagnostics
.iter()
.any(|d| d.message.contains("alias")),
"should flag alias"
);
}
#[test]
fn unsupported_tag() {
let source = "---\ncount: !!int 42\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
assert!(
block.diagnostics.iter().any(|d| d.message.contains("tag")),
"should flag tag"
);
}
#[test]
fn unsupported_directive() {
let source = "---\n%YAML 1.2\ntitle: test\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
assert!(
block
.diagnostics
.iter()
.any(|d| d.message.contains("directive")),
"should flag directive"
);
}
#[test]
fn unclosed_single_quote() {
let source = "---\ntitle: 'unclosed\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
assert!(
block
.diagnostics
.iter()
.any(|d| d.message.contains("unclosed single-quoted")),
"should flag unclosed single quote"
);
}
#[test]
fn unclosed_double_quote() {
let source = "---\ntitle: \"unclosed\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
assert!(
block
.diagnostics
.iter()
.any(|d| d.message.contains("unclosed double-quoted")),
"should flag unclosed double quote"
);
}
#[test]
fn unclosed_flow_sequence_error() {
let source = "---\ntags: [a, b\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
assert!(
block
.diagnostics
.iter()
.any(|d| d.message.contains("unclosed flow sequence")),
"should flag unclosed flow sequence"
);
}
#[test]
fn unclosed_flow_mapping_error() {
let source = "---\nmeta: {a: b\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
assert!(
block
.diagnostics
.iter()
.any(|d| d.message.contains("unclosed flow mapping")),
"should flag unclosed flow mapping"
);
}
#[test]
fn double_quoted_multiline() {
let source = "---\ndesc: \"line one \\\n continued\"\n---\n";
let block = parse_frontmatter_block(source).expect("should parse");
if let FmNode::Mapping { value, .. } = &block.entries[0] {
if let FmValue::Scalar(s) = value {
assert_eq!(s.text, "line one continued", "line continuation");
} else {
panic!("should be scalar");
}
} else {
panic!("should be mapping");
}
}
#[test]
fn deeply_nested_mapping_hits_limit() {
let mut content = String::from("---\n");
for depth in 0..300 {
content.push_str(&" ".repeat(depth));
content.push_str("k:\n");
}
content.push_str(&" ".repeat(300));
content.push_str("leaf: value\n---\n");
let block = parse_frontmatter_block(&content).expect("frontmatter should parse");
assert!(
block
.diagnostics
.iter()
.any(|d| d.message.contains("YAML nesting exceeds")),
"expected a YAML nesting diagnostic: {:?}",
block.diagnostics
);
}
#[test]
fn oversize_frontmatter_is_skipped() {
let big = "a: 1\n".repeat(crate::limits::MAX_FRONTMATTER_BYTES / 5 + 10);
let source = format!("---\n{big}---\n");
let block = parse_frontmatter_block(&source).expect("frontmatter block returned");
assert!(
block.entries.is_empty(),
"oversize frontmatter is not parsed"
);
assert!(
block
.diagnostics
.iter()
.any(|d| d.message.contains("exceeds the")),
"expected an oversize diagnostic: {:?}",
block.diagnostics
);
}
#[test]
fn parse_yaml_body_offsets_spans_by_base() {
let doc = "prefix\nbacklinks:\n referenced_by:\n - a.md\n";
let base = 7; let body = &doc[base..];
let block = parse_yaml_body(body, base);
assert_eq!(
block.span.start, base,
"block span starts at the base offset: {:?}",
block.span
);
let backlinks = extract_backlinks(&block, doc);
assert_eq!(
backlinks.get("referenced_by").map(Vec::as_slice),
Some(["a.md".to_string()].as_slice()),
"the body parses backlinks: {backlinks:?}"
);
let crate::fm::FmNode::Mapping { key, .. } = &block.entries[0] else {
panic!("expected a mapping entry");
};
assert_eq!(
&doc[key.span.start..key.span.end],
"backlinks",
"the key span resolves against the document"
);
}
#[test]
fn parse_yaml_body_malformed_yaml_diagnoses() {
let body = "backlinks:\n bad_indent\n";
let block = parse_yaml_body(body, 0);
assert!(
block
.diagnostics
.iter()
.any(|d| d.severity == FmSeverity::Error),
"malformed body yields an error diagnostic: {:?}",
block.diagnostics
);
}
#[test]
fn parse_yaml_body_empty_is_inert() {
let block = parse_yaml_body("", 42);
assert!(block.entries.is_empty(), "empty body has no entries");
assert!(
block.diagnostics.is_empty(),
"empty body has no diagnostics: {:?}",
block.diagnostics
);
assert_eq!(
(block.span.start, block.span.end),
(42, 42),
"empty body span is zero-length at base"
);
}
}