Skip to main content

clayers_spec/
schema.rs

1use std::path::Path;
2
3/// Schema annotation discovery: finds content-elements and keyrefs
4/// declared via `spec:content-element` and `spec:keyref` appinfo annotations.
5///
6/// A content element discovered from schema annotations.
7#[derive(Debug, Clone)]
8pub struct ContentElement {
9    pub prefix: String,
10    pub element_name: String,
11    pub namespace: String,
12}
13
14/// A keyref discovered from schema annotations.
15#[derive(Debug, Clone)]
16pub struct Keyref {
17    pub name: String,
18    pub selector: String,
19    pub field: String,
20}
21
22/// Discover acyclic relation types from relation.xsd annotations.
23///
24/// # Errors
25///
26/// Returns an error if the schema file cannot be read or parsed.
27pub fn discover_acyclic_types(
28    schema_dir: &Path,
29) -> Result<std::collections::HashSet<String>, crate::Error> {
30    let rel_xsd = schema_dir.join("relation.xsd");
31    let mut acyclic = std::collections::HashSet::new();
32
33    if !rel_xsd.exists() {
34        return Ok(acyclic);
35    }
36
37    let content = std::fs::read_to_string(&rel_xsd)?;
38
39    // Simple text-based extraction: find enumeration values with acyclic="true"
40    // This mirrors the Python logic without needing full XSD parsing
41    let mut xot = xot::Xot::new();
42    let doc = xot.parse(&content).map_err(xot::Error::from)?;
43    let root = xot.document_element(doc)?;
44
45    let xs_ns = xot.add_namespace("http://www.w3.org/2001/XMLSchema");
46    let enum_name = xot.add_name_ns("enumeration", xs_ns);
47    let value_attr = xot.add_name("value");
48
49    let relation_ns = xot.add_namespace(crate::namespace::RELATION);
50    let acyclic_name = xot.add_name_ns("acyclic", relation_ns);
51    let acyclic_value_attr = xot.add_name("value");
52
53    collect_acyclic_types(
54        &xot,
55        root,
56        enum_name,
57        value_attr,
58        acyclic_name,
59        acyclic_value_attr,
60        &mut acyclic,
61    );
62
63    Ok(acyclic)
64}
65
66fn collect_acyclic_types(
67    xot: &xot::Xot,
68    node: xot::Node,
69    enum_name: xot::NameId,
70    value_attr: xot::NameId,
71    acyclic_name: xot::NameId,
72    acyclic_value_attr: xot::NameId,
73    acyclic: &mut std::collections::HashSet<String>,
74) {
75    if xot.is_element(node)
76        && xot.element(node).is_some_and(|e| e.name() == enum_name)
77        && let Some(value) = xot.get_attribute(node, value_attr)
78    {
79        let value = value.to_string();
80        if has_acyclic_true(xot, node, acyclic_name, acyclic_value_attr) {
81            acyclic.insert(value);
82        }
83    }
84    for child in xot.children(node) {
85        collect_acyclic_types(
86            xot,
87            child,
88            enum_name,
89            value_attr,
90            acyclic_name,
91            acyclic_value_attr,
92            acyclic,
93        );
94    }
95}
96
97fn has_acyclic_true(
98    xot: &xot::Xot,
99    node: xot::Node,
100    acyclic_name: xot::NameId,
101    value_attr: xot::NameId,
102) -> bool {
103    for child in xot.children(node) {
104        if xot.is_element(child)
105            && xot.element(child).is_some_and(|e| e.name() == acyclic_name)
106            && let Some(v) = xot.get_attribute(child, value_attr)
107            && v == "true"
108        {
109            return true;
110        }
111        if has_acyclic_true(xot, child, acyclic_name, value_attr) {
112            return true;
113        }
114    }
115    false
116}
117
118/// Discover content elements from schema annotations.
119///
120/// Scans all `.xsd` files for global elements annotated with `spec:content-element`.
121///
122/// # Errors
123///
124/// Returns an error if schema files cannot be read.
125pub fn discover_content_elements(schema_dir: &Path) -> Result<Vec<ContentElement>, crate::Error> {
126    let mut elements = Vec::new();
127
128    for entry in std::fs::read_dir(schema_dir)? {
129        let entry = entry?;
130        let path = entry.path();
131        if path.extension().is_some_and(|e| e == "xsd")
132            && let Ok(content) = std::fs::read_to_string(&path)
133        {
134            discover_from_xsd(&content, &mut elements);
135        }
136    }
137
138    Ok(elements)
139}
140
141fn discover_from_xsd(content: &str, elements: &mut Vec<ContentElement>) {
142    let mut xot = xot::Xot::new();
143    let Ok(doc) = xot.parse(content) else { return };
144    let Ok(root) = xot.document_element(doc) else {
145        return;
146    };
147
148    let xs_ns = xot.add_namespace("http://www.w3.org/2001/XMLSchema");
149    let element_tag = xot.add_name_ns("element", xs_ns);
150    let spec_ns = xot.add_namespace(crate::namespace::SPEC);
151    let content_element_tag = xot.add_name_ns("content-element", spec_ns);
152    let name_attr = xot.add_name("name");
153    let target_ns_attr = xot.add_name("targetNamespace");
154
155    let target_ns = xot.get_attribute(root, target_ns_attr)
156        .unwrap_or("")
157        .to_string();
158
159    // Find prefix for this namespace
160    let prefix = crate::namespace::prefix_for(&target_ns)
161        .unwrap_or("")
162        .to_string();
163
164    collect_content_elements(
165        &xot,
166        root,
167        element_tag,
168        content_element_tag,
169        name_attr,
170        &prefix,
171        &target_ns,
172        elements,
173    );
174}
175
176#[allow(clippy::too_many_arguments)]
177fn collect_content_elements(
178    xot: &xot::Xot,
179    node: xot::Node,
180    element_tag: xot::NameId,
181    content_element_tag: xot::NameId,
182    name_attr: xot::NameId,
183    prefix: &str,
184    namespace: &str,
185    elements: &mut Vec<ContentElement>,
186) {
187    if xot.is_element(node)
188        && xot.element(node).is_some_and(|e| e.name() == element_tag)
189        && let Some(name) = xot.get_attribute(node, name_attr)
190    {
191        let name = name.to_string();
192        if has_content_element_annotation(xot, node, content_element_tag) && !prefix.is_empty() {
193            elements.push(ContentElement {
194                prefix: prefix.to_string(),
195                element_name: name,
196                namespace: namespace.to_string(),
197            });
198        }
199    }
200    for child in xot.children(node) {
201        collect_content_elements(
202            xot,
203            child,
204            element_tag,
205            content_element_tag,
206            name_attr,
207            prefix,
208            namespace,
209            elements,
210        );
211    }
212}
213
214fn has_content_element_annotation(xot: &xot::Xot, node: xot::Node, tag: xot::NameId) -> bool {
215    for child in xot.children(node) {
216        if xot.is_element(child) && xot.element(child).is_some_and(|e| e.name() == tag) {
217            return true;
218        }
219        if has_content_element_annotation(xot, child, tag) {
220            return true;
221        }
222    }
223    false
224}
225
226#[cfg(test)]
227mod tests {
228    use super::*;
229    use std::path::PathBuf;
230
231    fn schema_dir() -> PathBuf {
232        PathBuf::from(env!("CARGO_MANIFEST_DIR"))
233            .join("../../schemas")
234            .canonicalize()
235            .expect("schemas/ not found")
236    }
237
238    #[test]
239    fn discover_content_elements_from_shipped_schemas() {
240        let elements = discover_content_elements(&schema_dir()).expect("discovery failed");
241        assert!(
242            elements.len() >= 5,
243            "expected 5+ content elements, got {}",
244            elements.len()
245        );
246    }
247
248    #[test]
249    fn discover_acyclic_types_from_relation_xsd() {
250        let acyclic = discover_acyclic_types(&schema_dir()).expect("discovery failed");
251        assert!(
252            acyclic.contains("depends-on"),
253            "depends-on should be acyclic"
254        );
255    }
256}