Skip to main content

clayers_spec/
validate.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use xot::Xot;
5
6use crate::namespace;
7
8/// Result of running `validate_spec` over a spec directory: both the
9/// hand-rolled structural checks (well-formedness, cross-file ID
10/// uniqueness, cross-layer reference resolution) and the schema-driven
11/// checks (via [`crate::xsd_validation`]). Errors from both layers are
12/// merged into a single list.
13#[derive(Debug)]
14pub struct ValidationResult {
15    pub spec_name: String,
16    pub file_count: usize,
17    pub errors: Vec<ValidationError>,
18}
19
20impl ValidationResult {
21    #[must_use]
22    pub fn is_valid(&self) -> bool {
23        self.errors.is_empty()
24    }
25}
26
27/// A single validation finding. Messages carry enough context (file path,
28/// line/column when available, a human-readable description) to be
29/// printed directly by the CLI.
30#[derive(Debug)]
31pub struct ValidationError {
32    pub message: String,
33}
34
35/// Validate a spec against its layer schemas.
36///
37/// Runs two layers of checks over every file reachable from the index(es):
38///
39/// 1. **Structural (hand-rolled)** — well-formedness, cross-file ID
40///    uniqueness (XSD `xs:ID` only enforces within a single document;
41///    clayers specs are multi-file so this fills that gap), and
42///    cross-layer reference resolution (`rel:relation` `from`/`to`,
43///    `art:artifact/@repo`).
44/// 2. **Schema-driven (XSD 1.1)** — required attributes, pattern facets
45///    (e.g. hash format), enumeration restrictions (e.g. `coverage`
46///    values), content-model conformance, and strict
47///    `xs:any namespace="##other"` wildcard resolution. Delegated to
48///    [`crate::xsd_validation::validate_against_schemas`], which uses the
49///    `uppsala` crate under the hood. Runs only when a schema directory
50///    is reachable from `spec_dir`.
51///
52/// Both layers contribute to the same flat error list; the caller can
53/// print them directly or test `is_valid()` to gate a build.
54///
55/// # Errors
56///
57/// Returns an error if spec files cannot be discovered or read, or if
58/// the schema-driven validator cannot be constructed.
59pub fn validate_spec(spec_dir: &Path) -> Result<ValidationResult, crate::Error> {
60    let index_files = crate::discovery::find_index_files(spec_dir)?;
61
62    if index_files.is_empty() {
63        return Ok(ValidationResult {
64            spec_name: spec_dir.display().to_string(),
65            file_count: 0,
66            errors: vec![ValidationError {
67                message: "no index files found".into(),
68            }],
69        });
70    }
71
72    let mut all_errors = Vec::new();
73    let mut total_files = 0;
74    let mut spec_name = String::new();
75
76    for index_path in &index_files {
77        let file_paths = crate::discovery::discover_spec_files(index_path)?;
78        total_files += file_paths.len();
79
80        spec_name = index_path
81            .parent()
82            .and_then(|p| p.file_name())
83            .map_or_else(|| "unknown".into(), |n| n.to_string_lossy().into_owned());
84
85        // Check each file is well-formed XML
86        for file_path in &file_paths {
87            if let Err(e) = check_well_formed(file_path) {
88                all_errors.push(ValidationError {
89                    message: format!("{}: {e}", file_path.display()),
90                });
91            }
92        }
93
94        // Check ID uniqueness across all files
95        let id_errors = check_id_uniqueness(&file_paths)?;
96        all_errors.extend(id_errors);
97
98        // Check cross-layer references
99        let ref_errors = check_references(&file_paths)?;
100        all_errors.extend(ref_errors);
101
102        // Schema-driven validation: enforce required attributes, pattern
103        // facets, enum restrictions, content models, etc. via uppsala.
104        // Only runs if a schema directory is reachable from the spec dir.
105        if let Some(schema_dir) = crate::discovery::find_schema_dir(spec_dir) {
106            let xsd_errors =
107                crate::xsd_validation::validate_against_schemas(&schema_dir, &file_paths)?;
108            all_errors.extend(xsd_errors);
109        }
110    }
111
112    Ok(ValidationResult {
113        spec_name,
114        file_count: total_files,
115        errors: all_errors,
116    })
117}
118
119fn check_well_formed(path: &Path) -> Result<(), String> {
120    let content = std::fs::read_to_string(path).map_err(|e| e.to_string())?;
121    let mut xot = Xot::new();
122    xot.parse(&content).map_err(|e| e.to_string())?;
123    Ok(())
124}
125
126fn check_id_uniqueness(
127    file_paths: &[impl AsRef<Path>],
128) -> Result<Vec<ValidationError>, crate::Error> {
129    let mut seen: HashMap<String, String> = HashMap::new();
130    let mut errors = Vec::new();
131
132    for file_path in file_paths {
133        let file_path = file_path.as_ref();
134        let content = std::fs::read_to_string(file_path)?;
135        let mut xot = Xot::new();
136        let doc = xot.parse(&content).map_err(xot::Error::from)?;
137        let root = xot.document_element(doc)?;
138        let id_attr = xot.add_name("id");
139        let xml_ns = xot.add_namespace(namespace::XML);
140        let xml_id_attr = xot.add_name_ns("id", xml_ns);
141
142        collect_ids(
143            &xot,
144            root,
145            id_attr,
146            xml_id_attr,
147            file_path,
148            &mut seen,
149            &mut errors,
150        );
151    }
152
153    Ok(errors)
154}
155
156fn collect_ids(
157    xot: &Xot,
158    node: xot::Node,
159    id_attr: xot::NameId,
160    xml_id_attr: xot::NameId,
161    file_path: &Path,
162    seen: &mut HashMap<String, String>,
163    errors: &mut Vec<ValidationError>,
164) {
165    if xot.is_element(node) {
166        // Collect bare @id
167        if let Some(id) = xot.get_attribute(node, id_attr) {
168            let id = id.to_string();
169            let file_str = file_path.display().to_string();
170            if let Some(prev_file) = seen.get(&id) {
171                errors.push(ValidationError {
172                    message: format!(
173                        "duplicate id \"{id}\" (first in {prev_file}, also in {file_str})"
174                    ),
175                });
176            } else {
177                seen.insert(id, file_str);
178            }
179        }
180        // Collect xml:id (W3C standard, used by XMI/UML elements)
181        if let Some(xml_id) = xot.get_attribute(node, xml_id_attr) {
182            let xml_id = xml_id.to_string();
183            let file_str = file_path.display().to_string();
184            if let Some(prev_file) = seen.get(&xml_id) {
185                errors.push(ValidationError {
186                    message: format!(
187                        "duplicate id \"{xml_id}\" (first in {prev_file}, also in {file_str})"
188                    ),
189                });
190            } else {
191                seen.insert(xml_id, file_str);
192            }
193        }
194    }
195    for child in xot.children(node) {
196        collect_ids(xot, child, id_attr, xml_id_attr, file_path, seen, errors);
197    }
198}
199
200fn check_references(file_paths: &[impl AsRef<Path>]) -> Result<Vec<ValidationError>, crate::Error> {
201    // Collect all known IDs (both bare @id and xml:id)
202    let mut all_ids = std::collections::HashSet::new();
203    let mut errors = Vec::new();
204
205    for file_path in file_paths {
206        let content = std::fs::read_to_string(file_path.as_ref())?;
207        let mut xot = Xot::new();
208        let doc = xot.parse(&content).map_err(xot::Error::from)?;
209        let root = xot.document_element(doc)?;
210        let id_attr = xot.add_name("id");
211        let xml_ns = xot.add_namespace(namespace::XML);
212        let xml_id_attr = xot.add_name_ns("id", xml_ns);
213        collect_all_ids(&xot, root, id_attr, xml_id_attr, &mut all_ids);
214    }
215
216    // Check relation and artifact references
217    for file_path in file_paths {
218        let content = std::fs::read_to_string(file_path.as_ref())?;
219        let mut xot = Xot::new();
220        let doc = xot.parse(&content).map_err(xot::Error::from)?;
221        let root = xot.document_element(doc)?;
222
223        let relation_ns = xot.add_namespace(namespace::RELATION);
224        let relation_tag = xot.add_name_ns("relation", relation_ns);
225        let from_attr = xot.add_name("from");
226        let to_attr = xot.add_name("to");
227        let to_spec_attr = xot.add_name("to-spec");
228
229        check_relation_refs(
230            &xot,
231            root,
232            relation_tag,
233            from_attr,
234            to_attr,
235            to_spec_attr,
236            &all_ids,
237            &mut errors,
238        );
239
240        // Check art:artifact/@repo references a known ID (typically vcs:git/@id)
241        let art_ns = xot.add_namespace(namespace::ARTIFACT);
242        let artifact_tag = xot.add_name_ns("artifact", art_ns);
243        let repo_attr = xot.add_name("repo");
244
245        check_artifact_repo_refs(
246            &xot,
247            root,
248            artifact_tag,
249            repo_attr,
250            &all_ids,
251            &mut errors,
252        );
253    }
254
255    Ok(errors)
256}
257
258fn collect_all_ids(
259    xot: &Xot,
260    node: xot::Node,
261    id_attr: xot::NameId,
262    xml_id_attr: xot::NameId,
263    ids: &mut std::collections::HashSet<String>,
264) {
265    if xot.is_element(node) {
266        if let Some(id) = xot.get_attribute(node, id_attr) {
267            ids.insert(id.to_string());
268        }
269        if let Some(xml_id) = xot.get_attribute(node, xml_id_attr) {
270            ids.insert(xml_id.to_string());
271        }
272    }
273    for child in xot.children(node) {
274        collect_all_ids(xot, child, id_attr, xml_id_attr, ids);
275    }
276}
277
278fn check_artifact_repo_refs(
279    xot: &Xot,
280    node: xot::Node,
281    artifact_tag: xot::NameId,
282    repo_attr: xot::NameId,
283    all_ids: &std::collections::HashSet<String>,
284    errors: &mut Vec<ValidationError>,
285) {
286    if xot.is_element(node)
287        && xot.element(node).is_some_and(|e| e.name() == artifact_tag)
288        && let Some(repo) = xot.get_attribute(node, repo_attr)
289        && !all_ids.contains(repo)
290    {
291        errors.push(ValidationError {
292            message: format!(
293                "art:artifact repo=\"{repo}\" references unknown id \
294                 (add a vcs:git or other element with id=\"{repo}\")"
295            ),
296        });
297    }
298    for child in xot.children(node) {
299        check_artifact_repo_refs(xot, child, artifact_tag, repo_attr, all_ids, errors);
300    }
301}
302
303#[allow(clippy::too_many_arguments)]
304fn check_relation_refs(
305    xot: &Xot,
306    node: xot::Node,
307    relation_tag: xot::NameId,
308    from_attr: xot::NameId,
309    to_attr: xot::NameId,
310    to_spec_attr: xot::NameId,
311    all_ids: &std::collections::HashSet<String>,
312    errors: &mut Vec<ValidationError>,
313) {
314    if xot.is_element(node) && xot.element(node).is_some_and(|e| e.name() == relation_tag) {
315        // Skip cross-spec relations
316        if xot.get_attribute(node, to_spec_attr)
317            .is_none()
318        {
319            if let Some(from) = xot.get_attribute(node, from_attr)
320                && !all_ids.contains(from)
321                && !from.starts_with("type-")
322            {
323                errors.push(ValidationError {
324                    message: format!("relation from=\"{from}\" references nonexistent id"),
325                });
326            }
327            if let Some(to) = xot.get_attribute(node, to_attr)
328                && !all_ids.contains(to)
329                && !to.starts_with("type-")
330            {
331                errors.push(ValidationError {
332                    message: format!("relation to=\"{to}\" references nonexistent id"),
333                });
334            }
335        }
336    }
337    for child in xot.children(node) {
338        check_relation_refs(
339            xot,
340            child,
341            relation_tag,
342            from_attr,
343            to_attr,
344            to_spec_attr,
345            all_ids,
346            errors,
347        );
348    }
349}
350
351#[cfg(test)]
352mod tests {
353    use super::*;
354    use std::path::PathBuf;
355
356    fn spec_dir() -> PathBuf {
357        PathBuf::from(env!("CARGO_MANIFEST_DIR"))
358            .join("../../clayers/clayers")
359            .canonicalize()
360            .expect("clayers/clayers/ not found")
361    }
362
363    #[test]
364    fn shipped_spec_passes_validation() {
365        let result = validate_spec(&spec_dir()).expect("validation failed");
366        assert!(
367            result.is_valid(),
368            "shipped spec should be valid, got errors: {:?}",
369            result.errors.iter().map(|e| &e.message).collect::<Vec<_>>()
370        );
371    }
372
373    #[test]
374    fn duplicate_id_detected() {
375        let dir = tempfile::tempdir().expect("tempdir");
376        let xml = r#"<?xml version="1.0"?>
377<spec:clayers xmlns:spec="urn:clayers:spec"
378              xmlns:idx="urn:clayers:index"
379              xmlns:pr="urn:clayers:prose">
380  <idx:file href="content.xml"/>
381</spec:clayers>"#;
382        std::fs::write(dir.path().join("index.xml"), xml).expect("write");
383
384        let content = r#"<?xml version="1.0"?>
385<spec:clayers xmlns:spec="urn:clayers:spec"
386              xmlns:pr="urn:clayers:prose"
387              spec:index="index.xml">
388  <pr:section id="dupe">first</pr:section>
389  <pr:section id="dupe">second</pr:section>
390</spec:clayers>"#;
391        std::fs::write(dir.path().join("content.xml"), content).expect("write");
392
393        let result = validate_spec(dir.path()).expect("validation failed");
394        assert!(!result.is_valid(), "duplicate IDs should fail validation");
395        assert!(
396            result
397                .errors
398                .iter()
399                .any(|e| e.message.contains("duplicate")),
400            "error message should mention duplicate"
401        );
402    }
403
404    #[test]
405    fn empty_dir_reports_no_index() {
406        let dir = tempfile::tempdir().expect("tempdir");
407        let result = validate_spec(dir.path()).expect("validation failed");
408        assert!(!result.is_valid());
409    }
410}