Skip to main content

muffy_validation_macro/
lib.rs

1//! Macros for document validation.
2
3extern crate alloc;
4
5mod error;
6
7use self::error::MacroError;
8use alloc::collections::{BTreeMap, BTreeSet};
9use core::mem::replace;
10use muffy_rnc::{
11    Combine, Grammar, GrammarContent, Identifier, NameClass, Pattern, SchemaBody, parse_schema,
12};
13use proc_macro::TokenStream;
14use proc_macro2::Span;
15use quote::quote;
16use std::{fs::read_to_string, path::Path};
17
18/// Generates HTML validation functions.
19#[proc_macro]
20pub fn html(_input: TokenStream) -> TokenStream {
21    generate_html().unwrap_or_else(|error| {
22        syn::Error::new(Span::call_site(), error)
23            .to_compile_error()
24            .into()
25    })
26}
27
28fn generate_html() -> Result<TokenStream, MacroError> {
29    let mut definitions = Default::default();
30
31    for file in ["html5.rnc", "rdfa.rnc"] {
32        load_schema(
33            &Path::new(env!("CARGO_MANIFEST_DIR"))
34                .join("src")
35                .join("schema")
36                .join("html5")
37                .join(file),
38            &mut definitions,
39        )?;
40    }
41
42    // element -> (attributes, children)
43    let mut element_rules = BTreeMap::<String, (Vec<String>, Vec<String>)>::new();
44
45    for pattern in definitions.values() {
46        let Pattern::Element { name_class, .. } = pattern else {
47            continue;
48        };
49        let Some(element) = get_name(name_class) else {
50            continue;
51        };
52
53        if let Pattern::Element { pattern, .. } = pattern {
54            let (attributes, children) = element_rules
55                .entry(element)
56                .or_insert_with(|| (vec![], vec![]));
57
58            attributes.extend(collect_attributes(pattern, &definitions)?);
59            children.extend(collect_children(pattern, &definitions)?);
60        }
61    }
62
63    let mut element_matches = vec![];
64
65    for (element, (mut attributes, mut children)) in element_rules {
66        attributes.sort();
67        attributes.dedup();
68        children.sort();
69        children.dedup();
70
71        let attributes = attributes.iter().map(|attribute| quote!(#attribute));
72        let children = children.iter().map(|child| quote!(#child));
73
74        element_matches.push(quote! {
75            #element => {
76                let mut attributes = ::alloc::collections::BTreeMap::<
77                    String,
78                    ::alloc::collections::BTreeSet<AttributeError>,
79                >::new();
80
81                for (attribute, _) in element.attributes() {
82                    if ignored_attributes.iter().any(|pattern| pattern.is_match(attribute)) {
83                        continue;
84                    }
85
86                    match attribute {
87                        #(#attributes |)* "_DUMMY_" => {}
88                        _ => {
89                            attributes
90                                .entry(attribute.into())
91                                .or_insert_with(Default::default)
92                                .insert(AttributeError::NotAllowed);
93                        }
94                    }
95                }
96
97                let mut children = ::alloc::collections::BTreeMap::<
98                    String,
99                    ::alloc::collections::BTreeSet<ChildError>,
100                >::new();
101
102                for child in element.children() {
103                    if let muffy_document::html::Node::Element(element) = child {
104                        let name = element.name();
105
106                        if ignored_elements.iter().any(|pattern| pattern.is_match(name)) {
107                            continue;
108                        }
109
110                        match name {
111                            #(#children |)* "_DUMMY_" => {}
112                            _ => {
113                                children
114                                    .entry(name.into())
115                                    .or_insert_with(Default::default)
116                                    .insert(ChildError::NotAllowed);
117                            }
118                        }
119                    }
120                }
121
122                if attributes.is_empty() && children.is_empty() {
123                    Ok(())
124                } else {
125                    Err(MarkupError::InvalidElement {
126                        attributes,
127                        children,
128                    })
129                }
130            }
131        });
132    }
133
134    Ok(quote! {
135        /// Validates an HTML element.
136        pub fn validate_html_element(
137            element: &Element,
138            ignored_attributes: &[::regex::Regex],
139            ignored_elements: &[::regex::Regex],
140        ) -> Result<(), MarkupError> {
141            match element.name() {
142                #(#element_matches)*
143                name if ignored_elements.iter().any(|pattern| pattern.is_match(name)) => Ok(()),
144                _ => Err(MarkupError::UnknownTag(element.name().to_string())),
145            }
146        }
147    }
148    .into())
149}
150
151fn load_schema(
152    path: &Path,
153    definitions: &mut BTreeMap<Identifier, Pattern>,
154) -> Result<(), MacroError> {
155    let schema = parse_schema(&read_to_string(path)?)?;
156
157    // We do not use the declarations.
158
159    match schema.body {
160        SchemaBody::Grammar(grammar) => {
161            load_grammar(
162                &grammar,
163                definitions,
164                path.parent().ok_or(MacroError::NoParentDirectory)?,
165            )?;
166        }
167        SchemaBody::Pattern(_) => return Err(MacroError::RncSyntax("top-level pattern")),
168    }
169
170    Ok(())
171}
172
173fn load_grammar(
174    grammar: &Grammar,
175    definitions: &mut BTreeMap<Identifier, Pattern>,
176    directory: &Path,
177) -> Result<(), MacroError> {
178    for content in &grammar.contents {
179        match content {
180            GrammarContent::Definition(definition) => {
181                let name = definition.name.clone();
182                let pattern = definition.pattern.clone();
183
184                if let Some(combine) = definition.combine {
185                    combine_patterns(
186                        definitions.entry(name).or_insert(Pattern::NotAllowed),
187                        pattern,
188                        combine,
189                    );
190                } else {
191                    definitions.insert(name, pattern);
192                }
193            }
194            GrammarContent::Div(grammar) => load_grammar(grammar, definitions, directory)?,
195            GrammarContent::Include(include) => {
196                let include_path = directory.join(&include.uri);
197
198                load_schema(&include_path, definitions)?;
199
200                if let Some(grammar) = &include.grammar {
201                    load_grammar(grammar, definitions, directory)?;
202                }
203            }
204            GrammarContent::Annotation(_) | GrammarContent::Start { .. } => {}
205        }
206    }
207
208    Ok(())
209}
210
211fn combine_patterns(existing: &mut Pattern, new: Pattern, combine: Combine) {
212    match combine {
213        Combine::Choice => match existing {
214            Pattern::Choice(choices) => choices.push(new),
215            Pattern::NotAllowed => *existing = new,
216            Pattern::Attribute { .. }
217            | Pattern::Data { .. }
218            | Pattern::Element { .. }
219            | Pattern::Empty
220            | Pattern::External(_)
221            | Pattern::Grammar(_)
222            | Pattern::Group(_)
223            | Pattern::Interleave(_)
224            | Pattern::List(_)
225            | Pattern::Many0(_)
226            | Pattern::Many1(_)
227            | Pattern::Name(_)
228            | Pattern::Optional(_)
229            | Pattern::Text
230            | Pattern::Value { .. } => {
231                let old = replace(existing, Pattern::Choice(vec![]));
232
233                if let Pattern::Choice(choices) = existing {
234                    choices.push(old);
235                    choices.push(new);
236                }
237            }
238        },
239        Combine::Interleave => match existing {
240            Pattern::Interleave(patterns) => patterns.push(new),
241            Pattern::NotAllowed => *existing = new,
242            Pattern::Attribute { .. }
243            | Pattern::Choice(_)
244            | Pattern::Data { .. }
245            | Pattern::Element { .. }
246            | Pattern::Empty
247            | Pattern::External(_)
248            | Pattern::Grammar(_)
249            | Pattern::Group(_)
250            | Pattern::List(_)
251            | Pattern::Many0(_)
252            | Pattern::Many1(_)
253            | Pattern::Name(_)
254            | Pattern::Optional(_)
255            | Pattern::Text
256            | Pattern::Value { .. } => {
257                let old = replace(existing, Pattern::Interleave(vec![]));
258
259                if let Pattern::Interleave(patterns) = existing {
260                    patterns.push(old);
261                    patterns.push(new);
262                }
263            }
264        },
265    }
266}
267
268fn get_name(name_class: &NameClass) -> Option<String> {
269    match name_class {
270        NameClass::Name(name) => Some(name.local.component.clone()),
271        NameClass::Choice(choices) => choices.iter().find_map(get_name),
272        NameClass::AnyName | NameClass::Except { .. } | NameClass::NamespaceName(_) => None,
273    }
274}
275
276fn collect_attributes(
277    pattern: &Pattern,
278    definitions: &BTreeMap<Identifier, Pattern>,
279) -> Result<BTreeSet<String>, MacroError> {
280    let mut attributes = Default::default();
281
282    collect_nested_attributes(
283        pattern,
284        definitions,
285        &mut attributes,
286        &mut Default::default(),
287    )?;
288
289    Ok(attributes)
290}
291
292fn collect_nested_attributes<'a>(
293    pattern: &'a Pattern,
294    definitions: &'a BTreeMap<Identifier, Pattern>,
295    attributes: &mut BTreeSet<String>,
296    visited: &mut BTreeSet<&'a Identifier>,
297) -> Result<(), MacroError> {
298    match pattern {
299        Pattern::Attribute { name_class, .. } => {
300            if let Some(name) = get_name(name_class) {
301                attributes.insert(name);
302            }
303        }
304        Pattern::Name(name) => {
305            if !visited.contains(&name.local) {
306                visited.insert(&name.local);
307
308                if let Some(pattern) = definitions.get(&name.local) {
309                    collect_nested_attributes(pattern, definitions, attributes, visited)?;
310                }
311            }
312        }
313        Pattern::Choice(patterns) | Pattern::Group(patterns) | Pattern::Interleave(patterns) => {
314            for pattern in patterns {
315                collect_nested_attributes(pattern, definitions, attributes, visited)?;
316            }
317        }
318        Pattern::Many0(pattern) | Pattern::Many1(pattern) | Pattern::Optional(pattern) => {
319            collect_nested_attributes(pattern, definitions, attributes, visited)?;
320        }
321        Pattern::Data { .. } => return Err(MacroError::RncPattern("data")),
322        Pattern::External(_) => return Err(MacroError::RncPattern("external")),
323        Pattern::Grammar(_) => return Err(MacroError::RncPattern("grammar")),
324        Pattern::List { .. } => return Err(MacroError::RncPattern("list")),
325        Pattern::Value { .. } => return Err(MacroError::RncPattern("value")),
326        Pattern::Empty | Pattern::Element { .. } | Pattern::NotAllowed | Pattern::Text => {}
327    }
328
329    Ok(())
330}
331
332fn collect_children(
333    pattern: &Pattern,
334    definitions: &BTreeMap<Identifier, Pattern>,
335) -> Result<BTreeSet<String>, MacroError> {
336    let mut children = Default::default();
337
338    collect_nested_children(pattern, definitions, &mut children, &mut Default::default())?;
339
340    Ok(children)
341}
342
343fn collect_nested_children<'a>(
344    pattern: &'a Pattern,
345    definitions: &'a BTreeMap<Identifier, Pattern>,
346    children: &mut BTreeSet<String>,
347    visited: &mut BTreeSet<&'a Identifier>,
348) -> Result<(), MacroError> {
349    match pattern {
350        Pattern::Element { name_class, .. } => {
351            if let Some(name) = get_name(name_class) {
352                children.insert(name);
353            }
354        }
355        Pattern::Name(name) => {
356            if !visited.contains(&name.local) {
357                visited.insert(&name.local);
358
359                if let Some(pattern) = definitions.get(&name.local) {
360                    collect_nested_children(pattern, definitions, children, visited)?;
361                }
362            }
363        }
364        Pattern::Choice(patterns) | Pattern::Group(patterns) | Pattern::Interleave(patterns) => {
365            for pattern in patterns {
366                collect_nested_children(pattern, definitions, children, visited)?;
367            }
368        }
369        Pattern::Many0(pattern) | Pattern::Many1(pattern) | Pattern::Optional(pattern) => {
370            collect_nested_children(pattern, definitions, children, visited)?;
371        }
372        Pattern::Data { .. } => return Err(MacroError::RncPattern("data")),
373        Pattern::External(_) => return Err(MacroError::RncPattern("external")),
374        Pattern::Grammar(_) => return Err(MacroError::RncPattern("grammar")),
375        Pattern::List { .. } => return Err(MacroError::RncPattern("list")),
376        Pattern::Value { .. } => return Err(MacroError::RncPattern("value")),
377        Pattern::Attribute { .. } | Pattern::Empty | Pattern::NotAllowed | Pattern::Text => {}
378    }
379
380    Ok(())
381}