Skip to main content

clayers_spec/
drift.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use clayers_xml::c14n;
5
6use crate::artifact::{self, ArtifactMapping};
7
8/// Result of drift detection for a single artifact mapping.
9#[derive(Debug)]
10pub enum DriftStatus {
11    /// Both spec and artifact hashes match stored values.
12    Clean,
13    /// The spec node's content has changed.
14    SpecDrifted {
15        stored_hash: String,
16        current_hash: String,
17    },
18    /// The artifact file's content has changed.
19    ArtifactDrifted {
20        stored_hash: String,
21        current_hash: String,
22        artifact_path: String,
23    },
24    /// Cannot check drift (missing file, missing hash, etc.).
25    Unavailable { reason: String },
26}
27
28/// Result of drift detection for a single mapping.
29#[derive(Debug)]
30pub struct MappingDrift {
31    pub mapping_id: String,
32    pub status: DriftStatus,
33}
34
35/// Overall drift report for a spec.
36#[derive(Debug)]
37pub struct DriftReport {
38    pub spec_name: String,
39    pub total_mappings: usize,
40    pub drifted_count: usize,
41    pub mapping_drifts: Vec<MappingDrift>,
42}
43
44/// Check for drift across all artifact mappings in a spec.
45///
46/// Compares stored hashes against current content for both spec nodes
47/// and artifact files. Reports which mappings have drifted.
48///
49/// # Errors
50///
51/// Returns an error if spec files cannot be read.
52pub fn check_drift(spec_dir: &Path, repo_root: Option<&Path>) -> Result<DriftReport, crate::Error> {
53    let index_files = crate::discovery::find_index_files(spec_dir)?;
54    let spec_name = spec_dir
55        .file_name()
56        .map_or_else(|| "unknown".into(), |n| n.to_string_lossy().into_owned());
57
58    let mut all_mappings = Vec::new();
59    let mut all_file_paths = Vec::new();
60
61    for index_path in &index_files {
62        let file_paths = crate::discovery::discover_spec_files(index_path)?;
63        let mappings = artifact::collect_artifact_mappings(&file_paths)?;
64        all_mappings.extend(mappings);
65        all_file_paths.extend(file_paths);
66    }
67
68    // Compute current C14N hashes for each mapped spec node by assembling
69    // the combined document once and serializing each referenced node.
70    let current_node_hashes = collect_current_node_hashes(&all_file_paths, &all_mappings);
71
72    let mut mapping_drifts = Vec::new();
73    let mut drifted_count = 0;
74
75    for mapping in &all_mappings {
76        let drift = check_single_mapping(mapping, &current_node_hashes, repo_root, spec_dir);
77        if matches!(
78            drift.status,
79            DriftStatus::SpecDrifted { .. } | DriftStatus::ArtifactDrifted { .. }
80        ) {
81            drifted_count += 1;
82        }
83        mapping_drifts.push(drift);
84    }
85
86    Ok(DriftReport {
87        spec_name,
88        total_mappings: all_mappings.len(),
89        drifted_count,
90        mapping_drifts,
91    })
92}
93
94fn check_single_mapping(
95    mapping: &ArtifactMapping,
96    current_node_hashes: &HashMap<String, String>,
97    repo_root: Option<&Path>,
98    spec_dir: &Path,
99) -> MappingDrift {
100    let id = mapping.id.clone();
101
102    // Check spec-side node hash. Placeholders and missing hashes are skipped
103    // so freshly-authored mappings without `--fix-node-hash` don't false-positive.
104    if let Some(stored_hash) = &mapping.node_hash
105        && stored_hash.starts_with("sha256:")
106        && stored_hash != "sha256:placeholder"
107        && let Some(current_hash) = current_node_hashes.get(&mapping.spec_ref_node)
108        && current_hash != stored_hash
109    {
110        return MappingDrift {
111            mapping_id: id,
112            status: DriftStatus::SpecDrifted {
113                stored_hash: stored_hash.clone(),
114                current_hash: current_hash.clone(),
115            },
116        };
117    }
118
119    // Check artifact hash
120    for range in &mapping.ranges {
121        if let Some(ref stored_hash) = range.hash {
122            if !stored_hash.starts_with("sha256:") || stored_hash == "sha256:placeholder" {
123                continue;
124            }
125
126            let artifact_path =
127                artifact::resolve_artifact_path(&mapping.artifact_path, spec_dir, repo_root);
128
129            if !artifact_path.exists() {
130                return MappingDrift {
131                    mapping_id: id,
132                    status: DriftStatus::Unavailable {
133                        reason: format!("artifact file not found: {}", mapping.artifact_path),
134                    },
135                };
136            }
137
138            let current_hash_result =
139                if let (Some(start), Some(end)) = (range.start_line, range.end_line) {
140                    artifact::hash_line_range(&artifact_path, start, end)
141                } else {
142                    artifact::hash_file(&artifact_path)
143                };
144
145            match current_hash_result {
146                Ok(current_hash) => {
147                    let current_str = current_hash.to_prefixed();
148                    if &current_str != stored_hash {
149                        return MappingDrift {
150                            mapping_id: id,
151                            status: DriftStatus::ArtifactDrifted {
152                                stored_hash: stored_hash.clone(),
153                                current_hash: current_str,
154                                artifact_path: mapping.artifact_path.clone(),
155                            },
156                        };
157                    }
158                }
159                Err(e) => {
160                    return MappingDrift {
161                        mapping_id: id,
162                        status: DriftStatus::Unavailable {
163                            reason: format!("hash computation failed: {e}"),
164                        },
165                    };
166                }
167            }
168        }
169    }
170
171    MappingDrift {
172        mapping_id: id,
173        status: DriftStatus::Clean,
174    }
175}
176
177/// Compute current C14N hashes for every spec node referenced by a mapping.
178///
179/// Builds the combined document once, then for each unique `spec_ref_node`
180/// looks up the node, serializes it, and applies inclusive C14N + SHA-256.
181/// Returns a map `node_id -> "sha256:<hex>"` for nodes that were found and
182/// hashed successfully. Nodes that don't exist or fail to hash are simply
183/// absent from the map; callers must handle that case as "no drift signal".
184fn collect_current_node_hashes(
185    file_paths: &[std::path::PathBuf],
186    mappings: &[ArtifactMapping],
187) -> HashMap<String, String> {
188    let mut hashes = HashMap::new();
189    let Ok((mut xot, root)) = crate::assembly::assemble_combined(file_paths) else {
190        return hashes;
191    };
192    let id_attr = xot.add_name("id");
193    let xml_ns = xot.add_namespace(crate::namespace::XML);
194    let xml_id_attr = xot.add_name_ns("id", xml_ns);
195
196    for mapping in mappings {
197        if mapping.spec_ref_node.is_empty() || hashes.contains_key(&mapping.spec_ref_node) {
198            continue;
199        }
200        let Some(node) =
201            crate::fix::find_node_by_id(&xot, root, id_attr, xml_id_attr, &mapping.spec_ref_node)
202        else {
203            continue;
204        };
205        let xml_str = xot.to_string(node).unwrap_or_default();
206        let Ok(hash) = c14n::canonicalize_and_hash(&xml_str, c14n::CanonicalizationMode::Inclusive)
207        else {
208            continue;
209        };
210        hashes.insert(mapping.spec_ref_node.clone(), hash.to_prefixed());
211    }
212
213    hashes
214}
215
216/// Compare two hashes and return whether they match.
217#[must_use]
218pub fn hashes_match(stored: &str, current: &str) -> bool {
219    stored == current
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225
226    #[test]
227    fn identical_hashes_no_drift() {
228        assert!(hashes_match(
229            "sha256:abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890",
230            "sha256:abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890"
231        ));
232    }
233
234    #[test]
235    fn different_hashes_drift_detected() {
236        assert!(!hashes_match(
237            "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
238            "sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
239        ));
240    }
241
242    #[test]
243    fn drift_report_on_shipped_spec() {
244        let spec_dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
245            .join("../../clayers/clayers")
246            .canonicalize()
247            .expect("clayers/clayers/ not found");
248        let report = check_drift(&spec_dir, None).expect("drift check failed");
249        // Should report some mappings (even if they're all drifted or unavailable)
250        assert!(
251            report.total_mappings > 0,
252            "shipped spec should have artifact mappings"
253        );
254    }
255
256    /// When a mapped spec node's content changes after `fix_node_hashes` has
257    /// recorded its C14N hash, `check_drift` must report that mapping as
258    /// `SpecDrifted`. Without this, the documented spec-side drift workflow
259    /// is silently broken.
260    #[test]
261    fn spec_node_edit_is_reported_as_spec_drifted() {
262        let dir = tempfile::tempdir().expect("tempdir");
263
264        let index_xml = r#"<?xml version="1.0"?>
265<spec:clayers xmlns:spec="urn:clayers:spec"
266              xmlns:idx="urn:clayers:index"
267              spec:spec="drift-test"
268              spec:version="0.1.0">
269  <idx:file href="content.xml"/>
270  <idx:file href="revision.xml"/>
271</spec:clayers>"#;
272        std::fs::write(dir.path().join("index.xml"), index_xml).expect("write index");
273
274        let revision_xml = r#"<?xml version="1.0"?>
275<spec:clayers xmlns:spec="urn:clayers:spec"
276              xmlns:rev="urn:clayers:revision"
277              spec:index="index.xml">
278  <rev:revision name="draft-1"/>
279</spec:clayers>"#;
280        std::fs::write(dir.path().join("revision.xml"), revision_xml).expect("write revision");
281
282        let content_xml = r#"<?xml version="1.0"?>
283<spec:clayers xmlns:spec="urn:clayers:spec"
284              xmlns:pr="urn:clayers:prose"
285              xmlns:art="urn:clayers:artifact"
286              xmlns:vcs="urn:clayers:vcs"
287              spec:index="index.xml">
288  <vcs:git id="repo-test" remote="https://example.com/test.git" default-branch="main"/>
289  <pr:section id="sec-tracked">
290    <pr:title>Tracked Section</pr:title>
291    <pr:p>Original content.</pr:p>
292  </pr:section>
293  <art:mapping id="map-tracked">
294    <art:spec-ref node="sec-tracked"
295                  revision="draft-1"
296                  node-hash="sha256:placeholder"/>
297    <art:artifact repo="repo-test" repo-revision="HEAD" path="README.md"/>
298    <art:coverage>full</art:coverage>
299  </art:mapping>
300</spec:clayers>"#;
301        std::fs::write(dir.path().join("content.xml"), content_xml).expect("write content");
302
303        // First, compute and record the correct node hash via the fixer.
304        let fix_report = crate::fix::fix_node_hashes(dir.path()).expect("fix failed");
305        assert!(
306            fix_report.fixed_count >= 1,
307            "fixer should record at least one node hash, got {}",
308            fix_report.fixed_count
309        );
310
311        // Drift check immediately after fixing should be clean.
312        let clean = check_drift(dir.path(), None).expect("clean drift check failed");
313        let map_status = clean
314            .mapping_drifts
315            .iter()
316            .find(|m| m.mapping_id == "map-tracked")
317            .expect("map-tracked missing from clean report");
318        assert!(
319            matches!(map_status.status, DriftStatus::Clean),
320            "expected Clean before edit, got {:?}",
321            map_status.status
322        );
323
324        // Edit the prose paragraph inside the tracked section. Read the file
325        // back from disk first because `fix_node_hashes` rewrote it with the
326        // computed hash; mutating the original string would also wipe the hash.
327        let on_disk = std::fs::read_to_string(dir.path().join("content.xml")).expect("read");
328        let edited = on_disk.replace("Original content.", "Edited content.");
329        assert_ne!(on_disk, edited, "edit should change file content");
330        std::fs::write(dir.path().join("content.xml"), edited).expect("rewrite content");
331
332        // Drift check should now report SpecDrifted for map-tracked.
333        let report = check_drift(dir.path(), None).expect("drift check failed");
334        let drifted = report
335            .mapping_drifts
336            .iter()
337            .find(|m| m.mapping_id == "map-tracked")
338            .expect("map-tracked missing from report");
339
340        match &drifted.status {
341            DriftStatus::SpecDrifted { stored_hash, current_hash } => {
342                assert_ne!(stored_hash, current_hash,
343                    "stored and current hashes should differ after edit");
344            }
345            other => panic!("expected SpecDrifted after edit, got {other:?}"),
346        }
347
348        assert_eq!(
349            report.drifted_count, 1,
350            "drifted_count should reflect the spec-side drift"
351        );
352    }
353}