Skip to main content

clayers_spec/
artifact.rs

1use std::path::{Path, PathBuf};
2
3use clayers_xml::ContentHash;
4use sha2::{Digest, Sha256};
5
6use crate::namespace;
7
8/// An artifact mapping parsed from spec XML.
9#[derive(Debug)]
10pub struct ArtifactMapping {
11    pub id: String,
12    pub spec_ref_node: String,
13    pub spec_ref_revision: String,
14    pub node_hash: Option<String>,
15    pub artifact_path: String,
16    pub artifact_repo: String,
17    pub ranges: Vec<ArtifactRange>,
18    pub coverage: String,
19    /// The XML file this mapping was parsed from.
20    pub source_file: PathBuf,
21}
22
23/// A range within an artifact file.
24#[derive(Debug)]
25pub struct ArtifactRange {
26    pub hash: Option<String>,
27    pub start_line: Option<u64>,
28    pub end_line: Option<u64>,
29    pub start_byte: Option<u64>,
30    pub end_byte: Option<u64>,
31}
32
33/// Collect all artifact mappings from spec files.
34///
35/// # Errors
36///
37/// Returns an error if files cannot be read or parsed.
38pub fn collect_artifact_mappings(
39    file_paths: &[impl AsRef<Path>],
40) -> Result<Vec<ArtifactMapping>, crate::Error> {
41    let mut mappings = Vec::new();
42
43    for file_path in file_paths {
44        let content = std::fs::read_to_string(file_path.as_ref())?;
45        let mut xot = xot::Xot::new();
46        let doc = xot.parse(&content).map_err(xot::Error::from)?;
47        let root = xot.document_element(doc)?;
48
49        let art_ns = xot.add_namespace(namespace::ARTIFACT);
50        let names = MappingNames {
51            mapping_tag: xot.add_name_ns("mapping", art_ns),
52            spec_ref_tag: xot.add_name_ns("spec-ref", art_ns),
53            artifact_tag: xot.add_name_ns("artifact", art_ns),
54            range_tag: xot.add_name_ns("range", art_ns),
55            coverage_tag: xot.add_name_ns("coverage", art_ns),
56            id_attr: xot.add_name("id"),
57            node_attr: xot.add_name("node"),
58            revision_attr: xot.add_name("revision"),
59            node_hash_attr: xot.add_name("node-hash"),
60            path_attr: xot.add_name("path"),
61            repo_attr: xot.add_name("repo"),
62            hash_attr: xot.add_name("hash"),
63            start_line_attr: xot.add_name("start-line"),
64            end_line_attr: xot.add_name("end-line"),
65            start_byte_attr: xot.add_name("start-byte"),
66            end_byte_attr: xot.add_name("end-byte"),
67        };
68
69        let start_idx = mappings.len();
70        collect_mappings(&xot, root, &names, &mut mappings);
71
72        // Set source_file on newly added mappings
73        for mapping in &mut mappings[start_idx..] {
74            mapping.source_file = file_path.as_ref().to_path_buf();
75        }
76    }
77
78    Ok(mappings)
79}
80
81/// Interned name IDs for artifact mapping XML elements and attributes.
82struct MappingNames {
83    mapping_tag: xot::NameId,
84    spec_ref_tag: xot::NameId,
85    artifact_tag: xot::NameId,
86    range_tag: xot::NameId,
87    coverage_tag: xot::NameId,
88    id_attr: xot::NameId,
89    node_attr: xot::NameId,
90    revision_attr: xot::NameId,
91    node_hash_attr: xot::NameId,
92    path_attr: xot::NameId,
93    repo_attr: xot::NameId,
94    hash_attr: xot::NameId,
95    start_line_attr: xot::NameId,
96    end_line_attr: xot::NameId,
97    start_byte_attr: xot::NameId,
98    end_byte_attr: xot::NameId,
99}
100
101fn collect_mappings(
102    xot: &xot::Xot,
103    node: xot::Node,
104    names: &MappingNames,
105    mappings: &mut Vec<ArtifactMapping>,
106) {
107    if xot.is_element(node)
108        && xot
109            .element(node)
110            .is_some_and(|e| e.name() == names.mapping_tag)
111    {
112        mappings.push(parse_single_mapping(xot, node, names));
113    }
114
115    for child in xot.children(node) {
116        collect_mappings(xot, child, names, mappings);
117    }
118}
119
120fn parse_single_mapping(xot: &xot::Xot, node: xot::Node, names: &MappingNames) -> ArtifactMapping {
121    let id = xot.get_attribute(node, names.id_attr)
122        .unwrap_or("")
123        .to_string();
124    let mut spec_ref_node = String::new();
125    let mut spec_ref_revision = String::new();
126    let mut node_hash = None;
127    let mut artifact_path = String::new();
128    let mut artifact_repo = String::new();
129    let mut ranges = Vec::new();
130    let mut coverage = String::new();
131
132    for child in xot.children(node) {
133        if !xot.is_element(child) {
134            continue;
135        }
136        let child_name = xot.element(child).map(xot::Element::name);
137        if child_name == Some(names.spec_ref_tag) {
138            spec_ref_node = xot.get_attribute(child, names.node_attr)
139                .unwrap_or("")
140                .to_string();
141            spec_ref_revision = xot.get_attribute(child, names.revision_attr)
142                .unwrap_or("")
143                .to_string();
144            node_hash = xot.get_attribute(child, names.node_hash_attr)
145                .map(String::from);
146        } else if child_name == Some(names.artifact_tag) {
147            artifact_path = xot.get_attribute(child, names.path_attr)
148                .unwrap_or("")
149                .to_string();
150            artifact_repo = xot.get_attribute(child, names.repo_attr)
151                .unwrap_or("")
152                .to_string();
153
154            for range_child in xot.children(child) {
155                if xot.is_element(range_child)
156                    && xot
157                        .element(range_child)
158                        .is_some_and(|e| e.name() == names.range_tag)
159                {
160                    ranges.push(ArtifactRange {
161                        hash: xot.get_attribute(range_child, names.hash_attr)
162                            .map(String::from),
163                        start_line: xot.get_attribute(range_child, names.start_line_attr)
164                            .and_then(|s| s.parse().ok()),
165                        end_line: xot.get_attribute(range_child, names.end_line_attr)
166                            .and_then(|s| s.parse().ok()),
167                        start_byte: xot.get_attribute(range_child, names.start_byte_attr)
168                            .and_then(|s| s.parse().ok()),
169                        end_byte: xot.get_attribute(range_child, names.end_byte_attr)
170                            .and_then(|s| s.parse().ok()),
171                    });
172                }
173            }
174        } else if child_name == Some(names.coverage_tag) {
175            coverage = collect_text_content(xot, child);
176        }
177    }
178
179    ArtifactMapping {
180        id,
181        spec_ref_node,
182        spec_ref_revision,
183        node_hash,
184        artifact_path,
185        artifact_repo,
186        ranges,
187        coverage,
188        source_file: PathBuf::new(),
189    }
190}
191
192fn collect_text_content(xot: &xot::Xot, node: xot::Node) -> String {
193    let mut text = String::new();
194    for child in xot.children(node) {
195        if let Some(t) = xot.text_str(child) {
196            text.push_str(t);
197        }
198    }
199    text.trim().to_string()
200}
201
202/// Compute SHA-256 hash of a file (whole-file addressing).
203///
204/// # Errors
205///
206/// Returns an error if the file cannot be read.
207pub fn hash_file(path: &Path) -> Result<ContentHash, crate::Error> {
208    let content = std::fs::read(path)?;
209    Ok(ContentHash::from_canonical(&Sha256::digest(&content)))
210}
211
212/// Extract content from a file using line-range addressing.
213///
214/// Lines are 1-based. Returns the content between `start_line` and `end_line` inclusive.
215///
216/// # Errors
217///
218/// Returns an error if the file cannot be read.
219pub fn extract_line_range(
220    path: &Path,
221    start_line: u64,
222    end_line: u64,
223) -> Result<String, crate::Error> {
224    let content = std::fs::read_to_string(path)?;
225    let lines: Vec<&str> = content.lines().collect();
226    #[allow(clippy::cast_possible_truncation)]
227    let start = start_line.saturating_sub(1) as usize;
228    #[allow(clippy::cast_possible_truncation)]
229    let end = std::cmp::min(end_line as usize, lines.len());
230
231    if start >= lines.len() {
232        return Ok(String::new());
233    }
234
235    Ok(lines[start..end].join("\n"))
236}
237
238/// Compute hash of a line range within a file.
239///
240/// # Errors
241///
242/// Returns an error if the file cannot be read.
243pub fn hash_line_range(
244    path: &Path,
245    start_line: u64,
246    end_line: u64,
247) -> Result<ContentHash, crate::Error> {
248    let text = extract_line_range(path, start_line, end_line)?;
249    Ok(ContentHash::from_canonical(text.as_bytes()))
250}
251
252/// Find the repository root by walking up from a path looking for `.git`.
253#[must_use]
254pub fn find_repo_root(start: &Path) -> Option<PathBuf> {
255    let dir = start.canonicalize().ok()?;
256    let mut dir = dir.as_path();
257    loop {
258        if dir.join(".git").exists() {
259            return Some(dir.to_path_buf());
260        }
261        dir = dir.parent()?;
262    }
263}
264
265/// Resolve an artifact path relative to the repo root or spec directory.
266#[must_use]
267pub fn resolve_artifact_path(
268    artifact_path: &str,
269    spec_dir: &Path,
270    repo_root: Option<&Path>,
271) -> PathBuf {
272    if let Some(root) = repo_root {
273        let candidate = root.join(artifact_path);
274        if candidate.exists() {
275            return candidate;
276        }
277    }
278    // Walk up from spec_dir
279    let mut dir = spec_dir.to_path_buf();
280    loop {
281        let candidate = dir.join(artifact_path);
282        if candidate.exists() {
283            return candidate;
284        }
285        if !dir.pop() {
286            break;
287        }
288    }
289    spec_dir.join(artifact_path)
290}
291
292#[cfg(test)]
293mod tests {
294    use super::*;
295    use std::path::PathBuf;
296
297    fn spec_dir() -> PathBuf {
298        PathBuf::from(env!("CARGO_MANIFEST_DIR"))
299            .join("../../clayers/clayers")
300            .canonicalize()
301            .expect("clayers/clayers/ not found")
302    }
303
304    #[test]
305    fn collect_mappings_from_shipped_spec() {
306        let file_paths = crate::discovery::discover_spec_files(&spec_dir().join("index.xml"))
307            .expect("discovery failed");
308        let mappings = collect_artifact_mappings(&file_paths).expect("collection failed");
309        assert!(
310            !mappings.is_empty(),
311            "shipped spec should have artifact mappings"
312        );
313        // At least one mapping should have valid structure
314        let valid = mappings
315            .iter()
316            .any(|m| !m.spec_ref_node.is_empty() && !m.artifact_path.is_empty());
317        assert!(
318            valid,
319            "at least one mapping should have spec-ref and artifact"
320        );
321    }
322
323    #[test]
324    fn hash_file_produces_consistent_hash() {
325        let path = spec_dir().join("index.xml");
326        let h1 = hash_file(&path).expect("hash failed");
327        let h2 = hash_file(&path).expect("hash failed");
328        assert_eq!(h1, h2);
329    }
330
331    #[test]
332    fn extract_line_range_correct() {
333        let dir = tempfile::tempdir().expect("tempdir");
334        let file = dir.path().join("test.txt");
335        std::fs::write(&file, "line1\nline2\nline3\nline4\nline5\n").expect("write");
336
337        let range = extract_line_range(&file, 2, 4).expect("extract failed");
338        assert_eq!(range, "line2\nline3\nline4");
339    }
340
341    #[test]
342    fn hash_line_range_differs_from_whole_file() {
343        let dir = tempfile::tempdir().expect("tempdir");
344        let file = dir.path().join("test.txt");
345        std::fs::write(&file, "line1\nline2\nline3\nline4\nline5\n").expect("write");
346
347        let whole = hash_file(&file).expect("hash failed");
348        let partial = hash_line_range(&file, 2, 4).expect("hash failed");
349        assert_ne!(whole, partial);
350    }
351}