use std::collections::{BTreeSet, HashMap};
use std::fmt::Write as _;
use thiserror::Error;
use crate::escape::is_xml_whitespace_only;
use crate::schema::{CompiledAttrKind, CompiledTagSchema, Schema};
use crate::types::{ElementData, TextSegments};
use crate::Markdown;
#[derive(Debug, Clone, Error, PartialEq, Eq)]
#[non_exhaustive]
pub enum ValidationError {
#[error("line {line}: <{tag}> missing required attribute {attr}")]
MissingAttr {
tag: String,
attr: String,
line: u32,
},
#[error("line {line}: <{tag}> attribute {attr} has invalid value {value:?} ({reason})")]
InvalidAttr {
tag: String,
attr: String,
value: String,
reason: String,
line: u32,
},
#[error("line {line}: <{tag}> missing required child <{child}>")]
MissingChild {
tag: String,
child: String,
line: u32,
},
#[error("line {line}: <{tag}> has unexpected child <{child}>")]
UnexpectedChild {
tag: String,
child: String,
line: u32,
},
#[error("line {line}: <{tag}> requires non-empty content")]
EmptyContent {
tag: String,
line: u32,
},
}
#[derive(Debug, Clone, Default, PartialEq, Eq)]
#[non_exhaustive]
pub struct ValidationReport {
errors: Vec<ValidationError>,
}
impl ValidationReport {
#[must_use]
pub fn is_valid(&self) -> bool {
self.errors.is_empty()
}
#[must_use]
pub fn errors(&self) -> &[ValidationError] {
&self.errors
}
#[must_use]
pub fn len(&self) -> usize {
self.errors.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.errors.is_empty()
}
pub fn iter(&self) -> std::slice::Iter<'_, ValidationError> {
self.errors.iter()
}
}
impl<'a> IntoIterator for &'a ValidationReport {
type Item = &'a ValidationError;
type IntoIter = std::slice::Iter<'a, ValidationError>;
fn into_iter(self) -> Self::IntoIter {
self.errors.iter()
}
}
impl IntoIterator for ValidationReport {
type Item = ValidationError;
type IntoIter = std::vec::IntoIter<ValidationError>;
fn into_iter(self) -> Self::IntoIter {
self.errors.into_iter()
}
}
const ATTR_MAP_THRESHOLD: usize = 16;
#[must_use]
pub fn validate(doc: &Markdown, schema: &Schema) -> ValidationReport {
let mut errors = Vec::new();
for root in doc.roots_internal() {
walk(root, doc.raw(), doc.trivia(), schema, &mut errors);
}
ValidationReport { errors }
}
fn walk(
node: &ElementData,
raw: &str,
trivia: &[core::ops::Range<usize>],
schema: &Schema,
errors: &mut Vec<ValidationError>,
) {
if let Some(ts) = schema.tags.get(&node.tag) {
check_element(node, raw, trivia, ts, errors);
}
for child in &node.children {
walk(child, raw, trivia, schema, errors);
}
}
fn check_element(
node: &ElementData,
raw: &str,
trivia: &[core::ops::Range<usize>],
ts: &CompiledTagSchema,
errors: &mut Vec<ValidationError>,
) {
let line = node.span.start.line;
let attr_map: Option<HashMap<&str, &str>> =
if node.attrs.len() >= ATTR_MAP_THRESHOLD || ts.attrs.len() >= ATTR_MAP_THRESHOLD {
Some(
node.attrs
.iter()
.map(|(k, v)| (k.as_str(), v.as_str()))
.collect(),
)
} else {
None
};
for (attr_name, constraint) in &ts.attrs {
let value = if let Some(map) = &attr_map {
map.get(attr_name.as_str()).copied()
} else {
node.attrs
.iter()
.find(|(k, _)| k == attr_name)
.map(|(_, v)| v.as_str())
};
match value {
None => {
if constraint.required {
errors.push(ValidationError::MissingAttr {
tag: node.tag.clone(),
attr: attr_name.clone(),
line,
});
}
}
Some(v) => {
if let Some(reason) = check_kind(&constraint.kind, v) {
errors.push(ValidationError::InvalidAttr {
tag: node.tag.clone(),
attr: attr_name.clone(),
value: v.to_string(),
reason,
line,
});
}
}
}
}
let child_tags: BTreeSet<&str> = node.children.iter().map(|c| c.tag.as_str()).collect();
for required in &ts.children_required {
if !child_tags.contains(required.as_str()) {
errors.push(ValidationError::MissingChild {
tag: node.tag.clone(),
child: required.clone(),
line,
});
}
}
if ts.children_exclusive {
for child in &node.children {
if !ts.children_allowed.contains(&child.tag) {
errors.push(ValidationError::UnexpectedChild {
tag: node.tag.clone(),
child: child.tag.clone(),
line: child.span.start.line,
});
}
}
}
if ts.content_required {
let has_text =
TextSegments::new_with_trivia(raw, node, trivia).any(|s| !is_xml_whitespace_only(s));
if !has_text {
errors.push(ValidationError::EmptyContent {
tag: node.tag.clone(),
line,
});
}
}
}
fn check_kind(kind: &CompiledAttrKind, value: &str) -> Option<String> {
match kind {
CompiledAttrKind::String => None,
CompiledAttrKind::Enum(allowed) => {
if allowed.contains(value) {
None
} else {
let mut msg = String::from("expected one of [");
let mut first = true;
for v in allowed {
if !first {
msg.push_str(", ");
}
first = false;
let _ = write!(msg, "{v:?}");
}
msg.push(']');
Some(msg)
}
}
CompiledAttrKind::Regex(re) => {
if re.is_match(value) {
None
} else {
Some(format!("did not match regex /{}/", re.as_str()))
}
}
}
}