#![allow(
clippy::panic,
clippy::missing_panics_doc,
clippy::too_many_lines,
clippy::too_long_first_doc_paragraph,
reason = "these are assertion helpers: panicking with a descriptive message on violation is their entire contract, the tree-wellformedness check is necessarily long, and each helper intentionally leads with a full explanatory paragraph describing the invariant it enforces"
)]
use std::collections::HashMap;
use std::path::Path;
use crate::block::{self, ElementKind, Syntax, Tree};
use crate::config::Config;
use crate::fm::{self, Exceptions, FmNode, FmValue, FrontmatterBlock, ScalarSpan};
use crate::html::HtmlTag;
use crate::line_index::LineIndex;
use crate::validation::Diagnostic;
use crate::workspace::parse_content;
use crate::{json, lsp, metadata, structural, toml, yaml};
#[must_use]
pub fn detect_frontmatter(source: &str) -> (Option<FrontmatterBlock>, Syntax) {
yaml::parse_frontmatter_block(source).map_or_else(
|| {
toml::parse_frontmatter_block(source).map_or_else(
|| {
json::parse_frontmatter_block(source)
.map_or((None, Syntax::Yaml), |b| (Some(b), Syntax::Json))
},
|b| (Some(b), Syntax::Toml),
)
},
|b| (Some(b), Syntax::Yaml),
)
}
pub fn assert_tree_wellformed(tree: &Tree) {
let nodes = tree.nodes();
let source = tree.source();
let len = source.len();
assert!(!nodes.is_empty(), "tree must contain the Document root");
let doc_count = nodes
.iter()
.filter(|n| matches!(n.kind, ElementKind::Document))
.count();
assert_eq!(
doc_count, 1,
"tree must have exactly one Document node, found {doc_count}"
);
assert!(
matches!(nodes[0].kind, ElementKind::Document),
"root node (index 0) must be the Document, found {:?}",
nodes[0].kind
);
assert!(
nodes[0].parent.is_none(),
"Document root must have no parent"
);
for (id, node) in nodes.iter().enumerate() {
assert!(
node.span.start <= node.span.end,
"node {id} ({:?}) has start {} after end {}",
node.kind,
node.span.start,
node.span.end
);
assert!(
node.span.end <= len,
"node {id} ({:?}) span end {} exceeds source length {len}",
node.kind,
node.span.end
);
assert!(
source.is_char_boundary(node.span.start),
"node {id} ({:?}) span start {} is not a UTF-8 char boundary",
node.kind,
node.span.start
);
assert!(
source.is_char_boundary(node.span.end),
"node {id} ({:?}) span end {} is not a UTF-8 char boundary",
node.kind,
node.span.end
);
if id == 0 {
continue;
}
let parent_id = node
.parent
.unwrap_or_else(|| panic!("non-root node {id} ({:?}) must have a parent", node.kind));
assert!(
parent_id < nodes.len(),
"node {id} parent index {parent_id} is out of range ({} nodes)",
nodes.len()
);
let parent = &nodes[parent_id];
assert!(
parent.span.start <= node.span.start && node.span.end <= parent.span.end,
"node {id} ({:?}) span {:?} is not contained in parent {parent_id} ({:?}) span {:?}",
node.kind,
node.span,
parent.kind,
parent.span
);
}
for id in 0..nodes.len() {
let mut cursor = id;
let mut hops = 0usize;
while let Some(parent) = nodes[cursor].parent {
assert!(
parent < nodes.len(),
"ancestor of node {id} has out-of-range parent index {parent}"
);
cursor = parent;
hops += 1;
assert!(
hops <= nodes.len(),
"ancestor chain from node {id} exceeds node count — cycle detected"
);
}
assert_eq!(
cursor, 0,
"ancestor chain from node {id} must terminate at the Document root"
);
}
for diag in tree.diagnostics() {
assert!(
diag.span.start <= diag.span.end && diag.span.end <= len,
"diagnostic span {:?} out of bounds for source length {len}",
diag.span
);
}
}
pub fn assert_block_wellformed(block: &FrontmatterBlock, source: &str) {
let len = source.len();
assert!(
block.span.start <= block.span.end && block.span.end <= len,
"frontmatter block span {:?} out of bounds for source length {len}",
block.span
);
assert!(
source.is_char_boundary(block.span.start) && source.is_char_boundary(block.span.end),
"frontmatter block span {:?} not on UTF-8 char boundaries",
block.span
);
assert!(
block.content_span.start <= block.content_span.end && block.content_span.end <= len,
"frontmatter content span {:?} out of bounds for source length {len}",
block.content_span
);
for diag in &block.diagnostics {
assert!(
diag.span.start <= diag.span.end && diag.span.end <= len,
"frontmatter diagnostic span {:?} out of bounds for source length {len}",
diag.span
);
}
}
pub fn assert_frontmatter_scalar_fidelity(block: &FrontmatterBlock, source: &str) {
for sc in collect_scalars(block) {
assert!(
sc.span.end <= source.len()
&& source.is_char_boundary(sc.span.start)
&& source.is_char_boundary(sc.span.end),
"scalar span {:?} out of bounds / off a char boundary (len {})",
sc.span,
source.len()
);
let sliced = &source[sc.span.start..sc.span.end];
if sliced.contains('\\') || sliced.contains('\n') || sliced.contains('\r') {
continue;
}
let occurs = sliced.contains(sc.text.as_str())
|| (sliced.contains("''") && sliced.replace("''", "'").contains(sc.text.as_str()));
assert!(
occurs,
"resolved scalar text {:?} does not occur in its source slice {:?} \
— encoding corruption",
sc.text, sliced
);
}
}
#[must_use]
pub fn collect_scalars(block: &FrontmatterBlock) -> Vec<&ScalarSpan> {
let mut out = Vec::new();
for entry in &block.entries {
collect_node_scalars(entry, &mut out);
}
out
}
fn collect_node_scalars<'a>(node: &'a FmNode, out: &mut Vec<&'a ScalarSpan>) {
match node {
FmNode::Mapping { key, value, .. } => {
out.push(key);
collect_value_scalars(value, out);
}
FmNode::SequenceItem { value, .. } => collect_value_scalars(value, out),
}
}
fn collect_value_scalars<'a>(value: &'a FmValue, out: &mut Vec<&'a ScalarSpan>) {
match value {
FmValue::Scalar(s) => out.push(s),
FmValue::Sequence(items) | FmValue::Mapping(items) => {
for item in items {
collect_node_scalars(item, out);
}
}
FmValue::FlowSequence { items, .. } => out.extend(items.iter()),
FmValue::FlowMapping { entries, .. } => {
for (k, v) in entries {
out.push(k);
out.push(v);
}
}
FmValue::BlockScalar { .. } => {}
}
}
pub fn assert_inline_resource_fidelity(tree: &Tree) {
let source = tree.source();
for node in tree.nodes() {
let (ElementKind::Link { url, title }
| ElementKind::Image { url, title }
| ElementKind::Video { url, title }
| ElementKind::Audio { url, title }) = &node.kind
else {
continue;
};
for field in [url, title] {
if field.is_empty() || field.contains(['\\', '\n', '\r']) {
continue;
}
let needle = field.strip_prefix("mailto:").unwrap_or(field);
assert!(
source.contains(needle),
"resolved inline field {field:?} (as {needle:?}) does not occur in the source \
— encoding corruption"
);
}
}
}
pub fn assert_emphasis_span_fidelity(tree: &Tree) {
let source = tree.source();
for node in tree.nodes() {
let open_len = match node.kind {
ElementKind::Strong => 2,
ElementKind::Emphasis | ElementKind::Strikethrough => 1,
_ => continue,
};
let slice = &source[node.span.start..node.span.end];
let delim = slice.chars().next().unwrap_or(' ');
let expected_family = matches!(node.kind, ElementKind::Strikethrough);
let is_strike_delim = delim == '~';
let is_emphasis_delim = delim == '*' || delim == '_';
assert!(
(expected_family && is_strike_delim) || (!expected_family && is_emphasis_delim),
"emphasis run {slice:?} starts with {delim:?}, not a delimiter for {:?}",
node.kind
);
let lead = slice.chars().take_while(|&c| c == delim).count();
let trail = slice.chars().rev().take_while(|&c| c == delim).count();
assert!(
lead >= open_len && trail >= open_len,
"emphasis run {slice:?} is not delimited by at least {open_len} {delim:?} at each \
edge for {:?}",
node.kind
);
if is_strike_delim {
assert!(
lead == trail && (1..=2).contains(&lead),
"strikethrough run {slice:?} edges {lead}/{trail} are not a symmetric 1- or \
2-tilde pair"
);
}
assert!(
slice.len() > 2 * open_len,
"emphasis run {slice:?} has no content between its delimiters for {:?}",
node.kind
);
}
}
pub fn assert_html_tag_in_bounds(tag: &HtmlTag, text: &str) {
let len = text.len();
match tag {
HtmlTag::Open {
attrs,
len: consumed,
..
} => {
assert!(
*consumed <= len,
"open tag len {consumed} exceeds text {len}"
);
for attr in attrs {
assert!(
attr.name_span.start <= attr.name_span.end && attr.name_span.end <= len,
"attribute name span {:?} out of bounds for text length {len}",
attr.name_span
);
if let Some(value_span) = attr.value_span {
assert!(
value_span.start <= value_span.end && value_span.end <= len,
"attribute value span {value_span:?} out of bounds for text length {len}"
);
}
}
}
HtmlTag::Close { len: consumed, .. } | HtmlTag::Comment { len: consumed } => {
assert!(
*consumed <= len,
"tag len {consumed} exceeds text length {len}"
);
}
}
}
pub fn assert_position_round_trip(source: &str) {
let bytes = source.as_bytes();
for off in 0..=source.len() {
if !source.is_char_boundary(off) {
continue;
}
if off > 0 && bytes[off - 1] == b'\r' && bytes.get(off) == Some(&b'\n') {
continue;
}
let pos = crate::server::byte_offset_to_lsp_position(source, off);
let back = crate::server::lsp_position_to_byte_offset(source, pos);
assert_eq!(
back, off,
"byte → LSP position → byte must round-trip at offset {off} \
(position {pos:?} mapped back to {back})"
);
}
}
pub fn assert_line_index_agrees(source: &str, index: &LineIndex) {
let bytes = source.as_bytes();
for off in 0..=source.len() {
if !source.is_char_boundary(off) {
continue;
}
let scalar = crate::server::byte_offset_to_lsp_position(source, off);
let indexed = index.position(source, off);
assert_eq!(
indexed, scalar,
"LineIndex position {indexed:?} disagrees with the scalar conversion \
{scalar:?} at offset {off}"
);
if off > 0 && bytes[off - 1] == b'\r' && bytes.get(off) == Some(&b'\n') {
continue;
}
let back = index.offset(source, indexed);
assert_eq!(
back, off,
"LineIndex offset → position → offset must round-trip at {off} \
(position {indexed:?} mapped back to {back})"
);
}
}
#[derive(Debug, Clone)]
pub struct Edit {
pub start_line: u32,
pub start_char: u32,
pub end_line: u32,
pub end_char: u32,
pub text: String,
}
#[must_use]
pub fn apply_lsp_edit(source: &str, index: &LineIndex, edit: &Edit) -> String {
let a = index.offset(
source,
lsp::Position {
line: edit.start_line,
character: edit.start_char,
},
);
let b = index.offset(
source,
lsp::Position {
line: edit.end_line,
character: edit.end_char,
},
);
let lo = a.min(b);
let hi = a.max(b);
let mut edited = String::with_capacity(source.len() + edit.text.len());
edited.push_str(&source[..lo]);
edited.push_str(&edit.text);
edited.push_str(&source[hi..]);
edited
}
pub fn assert_document_invariants(source: &str) {
let file = parse_content(source, Path::new("oracle.md"), &Config::default());
assert_tree_wellformed(&file.tree);
assert_inline_resource_fidelity(&file.tree);
assert_emphasis_span_fidelity(&file.tree);
assert_position_round_trip(source);
assert_line_index_agrees(source, &file.line_index);
if let (Some(block), _) = detect_frontmatter(source) {
assert_block_wellformed(&block, source);
assert_frontmatter_scalar_fidelity(&block, source);
}
}
pub fn assert_edit_sequence_stable(base: &str, edits: &[Edit]) {
assert_document_invariants(base);
let mut text = base.to_string();
for edit in edits {
let index = LineIndex::new(&text);
text = apply_lsp_edit(&text, &index, edit);
assert_document_invariants(&text);
}
}
fn has_leading_frontmatter_block(source: &str) -> bool {
yaml::parse_frontmatter_block(source).is_some()
|| toml::parse_frontmatter_block(source).is_some()
|| json::parse_frontmatter_block(source).is_some()
}
fn carrier_tree(source: &str) -> Tree {
block::parse_tree_with_entries(source, None, Syntax::Yaml, None)
}
pub fn assert_carrier_fidelity(source: &str) {
if has_leading_frontmatter_block(source) {
return;
}
let tree = carrier_tree(source);
let Some(carrier_block) = metadata::parse_carrier_block(&tree) else {
return;
};
assert_block_wellformed(&carrier_block, source);
assert_frontmatter_scalar_fidelity(&carrier_block, source);
let carrier_backlinks = fm::extract_backlinks(&carrier_block, source);
let carrier_exceptions = fm::extract_exceptions(&carrier_block, source);
let body = &source[carrier_block.content_span.start..carrier_block.content_span.end];
let Some(leading) = equivalent_leading_block(body) else {
return;
};
let Some(leading_block) = yaml::parse_frontmatter_block(&leading) else {
return;
};
let leading_backlinks = fm::extract_backlinks(&leading_block, &leading);
let leading_exceptions = fm::extract_exceptions(&leading_block, &leading);
assert_eq!(
carrier_backlinks, leading_backlinks,
"backlinks from a `yaml lattice` carrier must equal those from the same YAML as a \
leading `---` block — carrier-specific parse drift\n carrier body: {body:?}"
);
assert_exceptions_equivalent(&carrier_exceptions, &leading_exceptions, body);
}
fn equivalent_leading_block(body: &str) -> Option<String> {
if body
.lines()
.any(|line| line.trim_end_matches('\r') == "---")
{
return None;
}
if body.starts_with('\u{feff}') {
return None;
}
if !body.ends_with(['\n', '\r']) {
return None;
}
Some(format!("---\n{body}---\n"))
}
fn assert_exceptions_equivalent(carrier: &Exceptions, leading: &Exceptions, body: &str) {
fn pairs(entries: &[fm::ExceptionEntry]) -> Vec<(&str, &str)> {
entries
.iter()
.map(|e| (e.reference.as_str(), e.reason.as_str()))
.collect()
}
let count = |key: Option<&fm::CountKey>| -> Option<(usize, String, String)> {
key.map(|c| (c.expected, c.reason.clone(), c.raw.clone()))
};
assert_eq!(
pairs(&carrier.stale_references),
pairs(&leading.stale_references),
"carrier and leading-block `stale_references` exceptions must match — carrier parse \
drift\n carrier body: {body:?}"
);
assert_eq!(
pairs(&carrier.bare_paths),
pairs(&leading.bare_paths),
"carrier and leading-block `bare_paths` exceptions must match — carrier parse drift\n \
carrier body: {body:?}"
);
assert_eq!(
count(carrier.stale_references_count.as_ref()),
count(leading.stale_references_count.as_ref()),
"carrier and leading-block `stale_references` count-keys must match — carrier parse \
drift\n carrier body: {body:?}"
);
assert_eq!(
count(carrier.bare_paths_count.as_ref()),
count(leading.bare_paths_count.as_ref()),
"carrier and leading-block `bare_paths` count-keys must match — carrier parse drift\n \
carrier body: {body:?}"
);
}
#[must_use]
pub fn carrier_backlinks(source: &str) -> HashMap<String, Vec<String>> {
if has_leading_frontmatter_block(source) {
return HashMap::new();
}
let tree = carrier_tree(source);
metadata::parse_carrier_block(&tree)
.map(|block| fm::extract_backlinks(&block, source))
.unwrap_or_default()
}
#[must_use]
pub fn collect_structural(source: &str) -> Vec<Diagnostic> {
let rel_path = Path::new("fuzz.md");
let config = Config::default();
let file = parse_content(source, rel_path, &config);
let empty_exceptions = Exceptions::default();
let exceptions = file
.frontmatter
.as_ref()
.map_or(&empty_exceptions, |fm| &fm.exceptions);
let file_exists = |target: &Path| path_exists_oracle(target);
let external_exists = |target: &Path| path_exists_oracle(target);
structural::collect(
&file.tree,
rel_path,
&config,
&file_exists,
&external_exists,
exceptions,
)
}
#[must_use]
fn path_exists_oracle(path: &Path) -> bool {
let sum: u32 = path
.as_os_str()
.as_encoded_bytes()
.iter()
.map(|&b| u32::from(b))
.sum();
sum.is_multiple_of(2)
}
pub fn assert_structural_diagnostics_valid(source: &str, diagnostics: &[Diagnostic]) {
let len = source.len();
let bytes = source.as_bytes();
for diag in diagnostics {
let Some(span) = diag.span else {
assert!(
diag.line >= 1,
"line-only structural diagnostic must carry a 1-based line, found {} ({:?})",
diag.line,
diag.message
);
continue;
};
assert!(
span.start <= span.end && span.end <= len,
"structural diagnostic span {span:?} out of bounds for source length {len} ({:?})",
diag.message
);
assert!(
source.is_char_boundary(span.start),
"structural diagnostic span start {} is not a UTF-8 char boundary ({:?})",
span.start,
diag.message
);
assert!(
source.is_char_boundary(span.end),
"structural diagnostic span end {} is not a UTF-8 char boundary ({:?})",
span.end,
diag.message
);
for off in [span.start, span.end] {
if off > 0 && bytes[off - 1] == b'\r' && bytes.get(off) == Some(&b'\n') {
continue;
}
let pos = crate::server::byte_offset_to_lsp_position(source, off);
let back = crate::server::lsp_position_to_byte_offset(source, pos);
assert_eq!(
back, off,
"structural diagnostic span endpoint {off} must round-trip \
byte → LSP position → byte (position {pos:?} mapped back to {back}) ({:?})",
diag.message
);
}
}
}
pub fn assert_structural_invariants(source: &str) {
let diagnostics = collect_structural(source);
assert_structural_diagnostics_valid(source, &diagnostics);
}