#![allow(
clippy::panic,
clippy::missing_panics_doc,
clippy::too_many_lines,
clippy::too_long_first_doc_paragraph,
reason = "these are assertion helpers: panicking with a descriptive message on violation is their entire contract, the tree-wellformedness check is necessarily long, and each helper intentionally leads with a full explanatory paragraph describing the invariant it enforces"
)]
use std::path::Path;
use crate::block::{ElementKind, Syntax, Tree};
use crate::config::Config;
use crate::fm::{Exceptions, FmNode, FmValue, FrontmatterBlock, ScalarSpan};
use crate::html::HtmlTag;
use crate::line_index::LineIndex;
use crate::validation::Diagnostic;
use crate::workspace::parse_content;
use crate::{json, lsp, structural, toml, yaml};
#[must_use]
pub fn detect_frontmatter(source: &str) -> (Option<FrontmatterBlock>, Syntax) {
yaml::parse_frontmatter_block(source).map_or_else(
|| {
toml::parse_frontmatter_block(source).map_or_else(
|| {
json::parse_frontmatter_block(source)
.map_or((None, Syntax::Yaml), |b| (Some(b), Syntax::Json))
},
|b| (Some(b), Syntax::Toml),
)
},
|b| (Some(b), Syntax::Yaml),
)
}
pub fn assert_tree_wellformed(tree: &Tree) {
let nodes = tree.nodes();
let source = tree.source();
let len = source.len();
assert!(!nodes.is_empty(), "tree must contain the Document root");
let doc_count = nodes
.iter()
.filter(|n| matches!(n.kind, ElementKind::Document))
.count();
assert_eq!(
doc_count, 1,
"tree must have exactly one Document node, found {doc_count}"
);
assert!(
matches!(nodes[0].kind, ElementKind::Document),
"root node (index 0) must be the Document, found {:?}",
nodes[0].kind
);
assert!(
nodes[0].parent.is_none(),
"Document root must have no parent"
);
for (id, node) in nodes.iter().enumerate() {
assert!(
node.span.start <= node.span.end,
"node {id} ({:?}) has start {} after end {}",
node.kind,
node.span.start,
node.span.end
);
assert!(
node.span.end <= len,
"node {id} ({:?}) span end {} exceeds source length {len}",
node.kind,
node.span.end
);
assert!(
source.is_char_boundary(node.span.start),
"node {id} ({:?}) span start {} is not a UTF-8 char boundary",
node.kind,
node.span.start
);
assert!(
source.is_char_boundary(node.span.end),
"node {id} ({:?}) span end {} is not a UTF-8 char boundary",
node.kind,
node.span.end
);
if id == 0 {
continue;
}
let parent_id = node
.parent
.unwrap_or_else(|| panic!("non-root node {id} ({:?}) must have a parent", node.kind));
assert!(
parent_id < nodes.len(),
"node {id} parent index {parent_id} is out of range ({} nodes)",
nodes.len()
);
let parent = &nodes[parent_id];
assert!(
parent.span.start <= node.span.start && node.span.end <= parent.span.end,
"node {id} ({:?}) span {:?} is not contained in parent {parent_id} ({:?}) span {:?}",
node.kind,
node.span,
parent.kind,
parent.span
);
}
for id in 0..nodes.len() {
let mut cursor = id;
let mut hops = 0usize;
while let Some(parent) = nodes[cursor].parent {
assert!(
parent < nodes.len(),
"ancestor of node {id} has out-of-range parent index {parent}"
);
cursor = parent;
hops += 1;
assert!(
hops <= nodes.len(),
"ancestor chain from node {id} exceeds node count — cycle detected"
);
}
assert_eq!(
cursor, 0,
"ancestor chain from node {id} must terminate at the Document root"
);
}
for diag in tree.diagnostics() {
assert!(
diag.span.start <= diag.span.end && diag.span.end <= len,
"diagnostic span {:?} out of bounds for source length {len}",
diag.span
);
}
}
pub fn assert_block_wellformed(block: &FrontmatterBlock, source: &str) {
let len = source.len();
assert!(
block.span.start <= block.span.end && block.span.end <= len,
"frontmatter block span {:?} out of bounds for source length {len}",
block.span
);
assert!(
source.is_char_boundary(block.span.start) && source.is_char_boundary(block.span.end),
"frontmatter block span {:?} not on UTF-8 char boundaries",
block.span
);
assert!(
block.content_span.start <= block.content_span.end && block.content_span.end <= len,
"frontmatter content span {:?} out of bounds for source length {len}",
block.content_span
);
for diag in &block.diagnostics {
assert!(
diag.span.start <= diag.span.end && diag.span.end <= len,
"frontmatter diagnostic span {:?} out of bounds for source length {len}",
diag.span
);
}
}
pub fn assert_frontmatter_scalar_fidelity(block: &FrontmatterBlock, source: &str) {
for sc in collect_scalars(block) {
assert!(
sc.span.end <= source.len()
&& source.is_char_boundary(sc.span.start)
&& source.is_char_boundary(sc.span.end),
"scalar span {:?} out of bounds / off a char boundary (len {})",
sc.span,
source.len()
);
let sliced = &source[sc.span.start..sc.span.end];
if sliced.contains('\\') || sliced.contains('\n') || sliced.contains('\r') {
continue;
}
let occurs = sliced.contains(sc.text.as_str())
|| (sliced.contains("''") && sliced.replace("''", "'").contains(sc.text.as_str()));
assert!(
occurs,
"resolved scalar text {:?} does not occur in its source slice {:?} \
— encoding corruption",
sc.text, sliced
);
}
}
#[must_use]
pub fn collect_scalars(block: &FrontmatterBlock) -> Vec<&ScalarSpan> {
let mut out = Vec::new();
for entry in &block.entries {
collect_node_scalars(entry, &mut out);
}
out
}
fn collect_node_scalars<'a>(node: &'a FmNode, out: &mut Vec<&'a ScalarSpan>) {
match node {
FmNode::Mapping { key, value, .. } => {
out.push(key);
collect_value_scalars(value, out);
}
FmNode::SequenceItem { value, .. } => collect_value_scalars(value, out),
}
}
fn collect_value_scalars<'a>(value: &'a FmValue, out: &mut Vec<&'a ScalarSpan>) {
match value {
FmValue::Scalar(s) => out.push(s),
FmValue::Sequence(items) | FmValue::Mapping(items) => {
for item in items {
collect_node_scalars(item, out);
}
}
FmValue::FlowSequence { items, .. } => out.extend(items.iter()),
FmValue::FlowMapping { entries, .. } => {
for (k, v) in entries {
out.push(k);
out.push(v);
}
}
FmValue::BlockScalar { .. } => {}
}
}
pub fn assert_inline_resource_fidelity(tree: &Tree) {
let source = tree.source();
for node in tree.nodes() {
let (ElementKind::Link { url, title }
| ElementKind::Image { url, title }
| ElementKind::Video { url, title }
| ElementKind::Audio { url, title }) = &node.kind
else {
continue;
};
for field in [url, title] {
if field.is_empty() || field.contains(['\\', '\n', '\r']) {
continue;
}
let needle = field.strip_prefix("mailto:").unwrap_or(field);
assert!(
source.contains(needle),
"resolved inline field {field:?} (as {needle:?}) does not occur in the source \
— encoding corruption"
);
}
}
}
pub fn assert_html_tag_in_bounds(tag: &HtmlTag, text: &str) {
let len = text.len();
match tag {
HtmlTag::Open {
attrs,
len: consumed,
..
} => {
assert!(
*consumed <= len,
"open tag len {consumed} exceeds text {len}"
);
for attr in attrs {
assert!(
attr.name_span.start <= attr.name_span.end && attr.name_span.end <= len,
"attribute name span {:?} out of bounds for text length {len}",
attr.name_span
);
if let Some(value_span) = attr.value_span {
assert!(
value_span.start <= value_span.end && value_span.end <= len,
"attribute value span {value_span:?} out of bounds for text length {len}"
);
}
}
}
HtmlTag::Close { len: consumed, .. } | HtmlTag::Comment { len: consumed } => {
assert!(
*consumed <= len,
"tag len {consumed} exceeds text length {len}"
);
}
}
}
pub fn assert_position_round_trip(source: &str) {
let bytes = source.as_bytes();
for off in 0..=source.len() {
if !source.is_char_boundary(off) {
continue;
}
if off > 0 && bytes[off - 1] == b'\r' && bytes.get(off) == Some(&b'\n') {
continue;
}
let pos = crate::server::byte_offset_to_lsp_position(source, off);
let back = crate::server::lsp_position_to_byte_offset(source, pos);
assert_eq!(
back, off,
"byte → LSP position → byte must round-trip at offset {off} \
(position {pos:?} mapped back to {back})"
);
}
}
pub fn assert_line_index_agrees(source: &str, index: &LineIndex) {
let bytes = source.as_bytes();
for off in 0..=source.len() {
if !source.is_char_boundary(off) {
continue;
}
let scalar = crate::server::byte_offset_to_lsp_position(source, off);
let indexed = index.position(source, off);
assert_eq!(
indexed, scalar,
"LineIndex position {indexed:?} disagrees with the scalar conversion \
{scalar:?} at offset {off}"
);
if off > 0 && bytes[off - 1] == b'\r' && bytes.get(off) == Some(&b'\n') {
continue;
}
let back = index.offset(source, indexed);
assert_eq!(
back, off,
"LineIndex offset → position → offset must round-trip at {off} \
(position {indexed:?} mapped back to {back})"
);
}
}
#[derive(Debug, Clone)]
pub struct Edit {
pub start_line: u32,
pub start_char: u32,
pub end_line: u32,
pub end_char: u32,
pub text: String,
}
#[must_use]
pub fn apply_lsp_edit(source: &str, index: &LineIndex, edit: &Edit) -> String {
let a = index.offset(
source,
lsp::Position {
line: edit.start_line,
character: edit.start_char,
},
);
let b = index.offset(
source,
lsp::Position {
line: edit.end_line,
character: edit.end_char,
},
);
let lo = a.min(b);
let hi = a.max(b);
let mut edited = String::with_capacity(source.len() + edit.text.len());
edited.push_str(&source[..lo]);
edited.push_str(&edit.text);
edited.push_str(&source[hi..]);
edited
}
pub fn assert_document_invariants(source: &str) {
let file = parse_content(source, Path::new("oracle.md"), &Config::default());
assert_tree_wellformed(&file.tree);
assert_inline_resource_fidelity(&file.tree);
assert_position_round_trip(source);
assert_line_index_agrees(source, &file.line_index);
if let (Some(block), _) = detect_frontmatter(source) {
assert_block_wellformed(&block, source);
assert_frontmatter_scalar_fidelity(&block, source);
}
}
pub fn assert_edit_sequence_stable(base: &str, edits: &[Edit]) {
assert_document_invariants(base);
let mut text = base.to_string();
for edit in edits {
let index = LineIndex::new(&text);
text = apply_lsp_edit(&text, &index, edit);
assert_document_invariants(&text);
}
}
#[must_use]
pub fn collect_structural(source: &str) -> Vec<Diagnostic> {
let rel_path = Path::new("fuzz.md");
let config = Config::default();
let file = parse_content(source, rel_path, &config);
let empty_exceptions = Exceptions::default();
let exceptions = file
.frontmatter
.as_ref()
.map_or(&empty_exceptions, |fm| &fm.exceptions);
let file_exists = |target: &Path| path_exists_oracle(target);
let external_exists = |target: &Path| path_exists_oracle(target);
structural::collect(
&file.tree,
rel_path,
&config,
&file_exists,
&external_exists,
exceptions,
)
}
#[must_use]
fn path_exists_oracle(path: &Path) -> bool {
let sum: u32 = path
.as_os_str()
.as_encoded_bytes()
.iter()
.map(|&b| u32::from(b))
.sum();
sum.is_multiple_of(2)
}
pub fn assert_structural_diagnostics_valid(source: &str, diagnostics: &[Diagnostic]) {
let len = source.len();
let bytes = source.as_bytes();
for diag in diagnostics {
let Some(span) = diag.span else {
assert!(
diag.line >= 1,
"line-only structural diagnostic must carry a 1-based line, found {} ({:?})",
diag.line,
diag.message
);
continue;
};
assert!(
span.start <= span.end && span.end <= len,
"structural diagnostic span {span:?} out of bounds for source length {len} ({:?})",
diag.message
);
assert!(
source.is_char_boundary(span.start),
"structural diagnostic span start {} is not a UTF-8 char boundary ({:?})",
span.start,
diag.message
);
assert!(
source.is_char_boundary(span.end),
"structural diagnostic span end {} is not a UTF-8 char boundary ({:?})",
span.end,
diag.message
);
for off in [span.start, span.end] {
if off > 0 && bytes[off - 1] == b'\r' && bytes.get(off) == Some(&b'\n') {
continue;
}
let pos = crate::server::byte_offset_to_lsp_position(source, off);
let back = crate::server::lsp_position_to_byte_offset(source, pos);
assert_eq!(
back, off,
"structural diagnostic span endpoint {off} must round-trip \
byte → LSP position → byte (position {pos:?} mapped back to {back}) ({:?})",
diag.message
);
}
}
}
pub fn assert_structural_invariants(source: &str) {
let diagnostics = collect_structural(source);
assert_structural_diagnostics_valid(source, &diagnostics);
}