use std::collections::{BTreeMap, BTreeSet};
use regex::Regex;
use thiserror::Error;
use crate::escape::is_valid_name;
#[derive(Debug, Clone, Default)]
pub struct Schema {
pub(crate) tags: BTreeMap<String, CompiledTagSchema>,
}
impl Schema {
#[must_use]
pub fn builder() -> SchemaBuilder {
SchemaBuilder {
tags: BTreeMap::new(),
duplicates: Vec::new(),
}
}
}
#[derive(Debug, Clone, Default)]
pub(crate) struct TagSchema {
pub(crate) attrs: BTreeMap<String, AttrConstraint>,
pub(crate) children_required: Vec<String>,
pub(crate) children_optional: Vec<String>,
pub(crate) children_exclusive: bool,
pub(crate) content_required: bool,
pub(crate) duplicate_attrs: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum AttrKind {
String,
Enum(Vec<String>),
Regex(String),
}
impl AttrKind {
#[must_use]
pub fn one_of<I, S>(values: I) -> Self
where
I: IntoIterator<Item = S>,
S: Into<String>,
{
Self::Enum(values.into_iter().map(Into::into).collect())
}
#[must_use]
pub fn required(self) -> AttrConstraint {
AttrConstraint {
kind: self,
required: true,
}
}
#[must_use]
pub fn optional(self) -> AttrConstraint {
AttrConstraint {
kind: self,
required: false,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct AttrConstraint {
pub(crate) kind: AttrKind,
pub(crate) required: bool,
}
impl From<AttrKind> for AttrConstraint {
fn from(kind: AttrKind) -> Self {
Self {
kind,
required: false,
}
}
}
#[derive(Debug, Clone)]
pub(crate) struct CompiledTagSchema {
pub(crate) attrs: BTreeMap<String, CompiledAttrConstraint>,
pub(crate) children_required: Vec<String>,
pub(crate) children_allowed: BTreeSet<String>,
pub(crate) children_exclusive: bool,
pub(crate) content_required: bool,
}
#[derive(Debug, Clone)]
pub(crate) struct CompiledAttrConstraint {
pub(crate) kind: CompiledAttrKind,
pub(crate) required: bool,
}
#[derive(Debug, Clone)]
pub(crate) enum CompiledAttrKind {
String,
Enum(BTreeSet<String>),
Regex(Regex),
}
#[derive(Debug, Clone, Error, PartialEq, Eq)]
#[non_exhaustive]
pub enum SchemaError {
#[error("invalid regex for {tag}.{attr}: {reason}")]
InvalidRegex {
tag: String,
attr: String,
reason: String,
},
#[error("invalid XML name in schema: {scope} {name:?}")]
InvalidName {
scope: &'static str,
name: String,
},
#[error("duplicate tag {tag:?} in schema")]
DuplicateTag {
tag: String,
},
#[error("duplicate attribute {attr:?} on tag {tag:?}")]
DuplicateAttr {
tag: String,
attr: String,
},
}
#[derive(Debug, Clone, Default)]
pub struct SchemaBuilder {
tags: BTreeMap<String, TagSchema>,
duplicates: Vec<String>,
}
impl SchemaBuilder {
#[must_use]
pub fn tag<F>(mut self, name: impl Into<String>, f: F) -> Self
where
F: FnOnce(TagBuilder) -> TagBuilder,
{
let name = name.into();
let builder = TagBuilder {
schema: TagSchema::default(),
};
let tag_schema = f(builder).schema;
if self.tags.insert(name.clone(), tag_schema).is_some() {
self.duplicates.push(name);
}
self
}
#[must_use]
pub fn build(self) -> Schema {
self.try_build()
.unwrap_or_else(|e| panic!("schema build failed: {e}"))
}
pub fn try_build(self) -> Result<Schema, SchemaError> {
if let Some(dup) = self.duplicates.into_iter().next() {
return Err(SchemaError::DuplicateTag { tag: dup });
}
let mut tags = BTreeMap::new();
for (tag, ts) in self.tags {
tags.insert(tag.clone(), compile_tag(&tag, ts)?);
}
Ok(Schema { tags })
}
}
fn compile_tag(tag: &str, ts: TagSchema) -> Result<CompiledTagSchema, SchemaError> {
if !is_valid_name(tag) {
return Err(SchemaError::InvalidName {
scope: "tag",
name: tag.to_string(),
});
}
if let Some(dup) = ts.duplicate_attrs.into_iter().next() {
return Err(SchemaError::DuplicateAttr {
tag: tag.to_string(),
attr: dup,
});
}
let mut attrs = BTreeMap::new();
for (name, c) in ts.attrs {
if !is_valid_name(&name) {
return Err(SchemaError::InvalidName {
scope: "attr",
name,
});
}
let kind = match c.kind {
AttrKind::String => CompiledAttrKind::String,
AttrKind::Enum(values) => CompiledAttrKind::Enum(values.into_iter().collect()),
AttrKind::Regex(pat) => {
let anchored = format!("\\A(?:{pat})\\z");
let re = Regex::new(&anchored).map_err(|e| SchemaError::InvalidRegex {
tag: tag.to_string(),
attr: name.clone(),
reason: e.to_string(),
})?;
CompiledAttrKind::Regex(re)
}
};
attrs.insert(
name,
CompiledAttrConstraint {
kind,
required: c.required,
},
);
}
let mut children_allowed: BTreeSet<String> = BTreeSet::new();
for name in &ts.children_required {
if !is_valid_name(name) {
return Err(SchemaError::InvalidName {
scope: "child",
name: name.clone(),
});
}
children_allowed.insert(name.clone());
}
for name in &ts.children_optional {
if !is_valid_name(name) {
return Err(SchemaError::InvalidName {
scope: "child",
name: name.clone(),
});
}
children_allowed.insert(name.clone());
}
let mut seen_required: BTreeSet<String> = BTreeSet::new();
let children_required: Vec<String> = ts
.children_required
.into_iter()
.filter(|n| seen_required.insert(n.clone()))
.collect();
Ok(CompiledTagSchema {
attrs,
children_required,
children_allowed,
children_exclusive: ts.children_exclusive,
content_required: ts.content_required,
})
}
#[derive(Debug, Clone, Default)]
pub struct TagBuilder {
schema: TagSchema,
}
impl TagBuilder {
#[must_use]
pub fn attr(mut self, name: impl Into<String>, constraint: impl Into<AttrConstraint>) -> Self {
let name = name.into();
if self
.schema
.attrs
.insert(name.clone(), constraint.into())
.is_some()
{
self.schema.duplicate_attrs.push(name);
}
self
}
#[must_use]
pub fn child_required(mut self, name: impl Into<String>) -> Self {
self.schema.children_required.push(name.into());
self
}
#[must_use]
pub fn child_optional(mut self, name: impl Into<String>) -> Self {
self.schema.children_optional.push(name.into());
self
}
#[must_use]
pub fn exclusive_children(mut self) -> Self {
self.schema.children_exclusive = true;
self
}
#[must_use]
pub fn content_required(mut self) -> Self {
self.schema.content_required = true;
self
}
}