Skip to main content

shape_runtime/
package_lock.rs

1//! Unified `shape.lock` model for deterministic dependency resolution and
2//! compile-time artifact caching.
3//!
4//! This is the single source of truth for:
5//! - resolved package dependencies
6//! - compile-time artifacts (schema inference, comptime outputs, generated metadata)
7
8use serde::{Deserialize, Serialize};
9use sha2::{Digest, Sha256};
10use std::collections::{BTreeMap, HashMap};
11use std::path::Path;
12
13use crate::project::DependencySpec;
14
15/// Top-level lockfile structure written to `shape.lock`.
16#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
17pub struct PackageLock {
18    /// Lockfile format version (currently "1").
19    pub version: String,
20    /// Locked packages in dependency order.
21    pub packages: Vec<LockedPackage>,
22    /// Cached compile-time artifacts.
23    #[serde(default, skip_serializing_if = "Vec::is_empty")]
24    pub artifacts: Vec<LockedArtifact>,
25}
26
27/// A single locked package entry.
28#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
29pub struct LockedPackage {
30    /// Package name (matches the key in `[dependencies]`).
31    pub name: String,
32    /// Resolved version string.
33    pub version: String,
34    /// How the package was resolved.
35    pub source: LockedSource,
36    /// SHA-256 hash of the package contents for integrity verification.
37    pub content_hash: String,
38    /// Names of direct dependencies of this package.
39    #[serde(default, skip_serializing_if = "Vec::is_empty")]
40    pub dependencies: Vec<String>,
41}
42
43/// Source from which a locked package was resolved.
44#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
45#[serde(tag = "type")]
46pub enum LockedSource {
47    /// Local filesystem path dependency.
48    Path { path: String },
49    /// Git repository dependency, pinned to a specific revision.
50    Git { url: String, rev: String },
51    /// Registry dependency (future).
52    Registry {
53        version: String,
54        #[serde(default)]
55        registry: Option<String>,
56        #[serde(default)]
57        path: Option<String>,
58    },
59}
60
61/// Reproducibility mode for a compile-time artifact.
62#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
63#[serde(tag = "mode", rename_all = "snake_case")]
64pub enum ArtifactDeterminism {
65    /// Artifact depends only on compiler inputs (source/deps/config) and is hermetic.
66    Hermetic,
67    /// Artifact depends on external mutable inputs.
68    /// Each entry must carry a fingerprint used for invalidation.
69    External {
70        fingerprints: BTreeMap<String, String>,
71    },
72}
73
74impl ArtifactDeterminism {
75    fn validate(&self) -> Result<(), String> {
76        match self {
77            ArtifactDeterminism::Hermetic => Ok(()),
78            ArtifactDeterminism::External { fingerprints } => {
79                if fingerprints.is_empty() {
80                    Err(
81                        "external artifact determinism requires at least one fingerprint"
82                            .to_string(),
83                    )
84                } else {
85                    Ok(())
86                }
87            }
88        }
89    }
90
91    fn augment_inputs(&self, inputs: &mut BTreeMap<String, String>) {
92        if let ArtifactDeterminism::External { fingerprints } = self {
93            for (key, value) in fingerprints {
94                let merged_key = format!("external::{key}");
95                inputs.entry(merged_key).or_insert_with(|| value.clone());
96            }
97        }
98    }
99}
100
101/// A generic compile-time artifact cached in `shape.lock`.
102#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
103pub struct LockedArtifact {
104    /// Namespace identifying artifact kind (e.g., `schema.infer`).
105    pub namespace: String,
106    /// Logical key within namespace (e.g., source path).
107    pub key: String,
108    /// Producer identifier (e.g., `shape-runtime/schema_inference@v1`).
109    pub producer: String,
110    /// Determinism/invalidation policy.
111    pub determinism: ArtifactDeterminism,
112    /// Explicit input components used for invalidation and debugging.
113    pub inputs: BTreeMap<String, String>,
114    /// Stable hash derived from inputs + determinism policy.
115    pub inputs_hash: String,
116    /// RFC3339 timestamp of artifact creation.
117    pub created_at: String,
118    /// Shape-wire payload encoded as JSON for TOML compatibility.
119    pub payload_json: String,
120}
121
122impl LockedArtifact {
123    /// Build a new artifact entry from a shape-wire payload.
124    pub fn new(
125        namespace: impl Into<String>,
126        key: impl Into<String>,
127        producer: impl Into<String>,
128        determinism: ArtifactDeterminism,
129        mut inputs: BTreeMap<String, String>,
130        payload: shape_wire::WireValue,
131    ) -> Result<Self, String> {
132        determinism.validate()?;
133        determinism.augment_inputs(&mut inputs);
134
135        let inputs_hash = PackageLock::hash_inputs(&inputs);
136        let payload_json =
137            serde_json::to_string(&payload).map_err(|e| format!("invalid wire payload: {e}"))?;
138
139        Ok(Self {
140            namespace: namespace.into(),
141            key: key.into(),
142            producer: producer.into(),
143            determinism,
144            inputs,
145            inputs_hash,
146            created_at: chrono::Utc::now().to_rfc3339(),
147            payload_json,
148        })
149    }
150
151    /// Decode the shape-wire payload.
152    pub fn payload(&self) -> Result<shape_wire::WireValue, String> {
153        serde_json::from_str(&self.payload_json)
154            .map_err(|e| format!("invalid artifact payload encoding: {e}"))
155    }
156}
157
158impl PackageLock {
159    const EXTERNAL_REQUIRED_NAMESPACES: [&'static str; 1] = ["schema.infer"];
160    const EXTERNAL_REQUIRED_NAMESPACE_PREFIXES: [&'static str; 2] =
161        ["external.", "comptime.external."];
162    const EXTERNAL_REQUIRED_PRODUCERS: [&'static str; 1] = ["shape-runtime/schema_inference@v1"];
163
164    /// Create a new empty lockfile.
165    pub fn new() -> Self {
166        Self {
167            version: "1".to_string(),
168            packages: Vec::new(),
169            artifacts: Vec::new(),
170        }
171    }
172
173    fn requires_external_determinism(namespace: &str, producer: &str) -> bool {
174        Self::EXTERNAL_REQUIRED_NAMESPACES.contains(&namespace)
175            || Self::EXTERNAL_REQUIRED_NAMESPACE_PREFIXES
176                .iter()
177                .any(|prefix| namespace.starts_with(prefix))
178            || Self::EXTERNAL_REQUIRED_PRODUCERS.contains(&producer)
179    }
180
181    /// Read a lockfile from the given path. Returns `None` if the file
182    /// doesn't exist or cannot be parsed.
183    pub fn read(path: &Path) -> Option<Self> {
184        let content = std::fs::read_to_string(path).ok()?;
185        let mut lock: Self = toml::from_str(&content).ok()?;
186        if lock.version.is_empty() {
187            lock.version = "1".to_string();
188        }
189        Some(lock)
190    }
191
192    /// Write the lockfile to the given path.
193    pub fn write(&self, path: &Path) -> std::io::Result<()> {
194        let content = toml::to_string_pretty(self).map_err(std::io::Error::other)?;
195        std::fs::write(path, content)
196    }
197
198    /// Check whether this lockfile is still fresh (matches the given deps).
199    ///
200    /// A lockfile is fresh if every dependency in the spec is present in the
201    /// lockfile and every locked package corresponds to a declared dependency.
202    pub fn is_fresh(&self, deps: &HashMap<String, DependencySpec>) -> bool {
203        for (name, spec) in deps {
204            let Some(locked) = self.packages.iter().find(|p| &p.name == name) else {
205                return false;
206            };
207
208            match spec {
209                DependencySpec::Version(req) => {
210                    if !locked_version_matches_req(&locked.version, req) {
211                        return false;
212                    }
213                }
214                DependencySpec::Detailed(detail) => {
215                    // Path/Git details are validated by source/path matching elsewhere.
216                    // For semver requirements, enforce lock compatibility.
217                    if detail.path.is_none()
218                        && detail.git.is_none()
219                        && let Some(req) = &detail.version
220                        && !locked_version_matches_req(&locked.version, req)
221                    {
222                        return false;
223                    }
224                }
225            }
226        }
227        for pkg in &self.packages {
228            if !deps.contains_key(&pkg.name) {
229                return false;
230            }
231        }
232        true
233    }
234
235    fn validate_artifact(artifact: &LockedArtifact) -> Result<(), String> {
236        artifact.determinism.validate()?;
237        let expected_hash =
238            Self::artifact_inputs_hash(artifact.inputs.clone(), &artifact.determinism)?;
239        if artifact.inputs_hash != expected_hash {
240            return Err(format!(
241                "artifact inputs_hash mismatch for {}:{}",
242                artifact.namespace, artifact.key
243            ));
244        }
245
246        if Self::requires_external_determinism(&artifact.namespace, &artifact.producer)
247            && !matches!(artifact.determinism, ArtifactDeterminism::External { .. })
248        {
249            return Err(format!(
250                "artifact {}:{} must declare external determinism fingerprints",
251                artifact.namespace, artifact.key
252            ));
253        }
254
255        Ok(())
256    }
257
258    /// Upsert a compile-time artifact by `(namespace, key)`.
259    pub fn upsert_artifact(&mut self, artifact: LockedArtifact) -> Result<(), String> {
260        Self::validate_artifact(&artifact)?;
261        if let Some(existing) = self
262            .artifacts
263            .iter_mut()
264            .find(|a| a.namespace == artifact.namespace && a.key == artifact.key)
265        {
266            *existing = artifact;
267        } else {
268            self.artifacts.push(artifact);
269        }
270        Ok(())
271    }
272
273    /// Lookup artifact by `(namespace, key, inputs_hash)`.
274    pub fn artifact(
275        &self,
276        namespace: &str,
277        key: &str,
278        inputs_hash: &str,
279    ) -> Option<&LockedArtifact> {
280        self.artifacts
281            .iter()
282            .find(|a| a.namespace == namespace && a.key == key && a.inputs_hash == inputs_hash)
283    }
284
285    /// Compute a stable SHA-256 hash for inputs map.
286    pub fn hash_inputs(inputs: &BTreeMap<String, String>) -> String {
287        let mut hasher = Sha256::new();
288        for (key, value) in inputs {
289            hasher.update(key.as_bytes());
290            hasher.update([0]);
291            hasher.update(value.as_bytes());
292            hasher.update([0xff]);
293        }
294        format!("sha256:{:x}", hasher.finalize())
295    }
296
297    /// Compute artifact inputs hash after applying determinism policy rules.
298    pub fn artifact_inputs_hash(
299        mut inputs: BTreeMap<String, String>,
300        determinism: &ArtifactDeterminism,
301    ) -> Result<String, String> {
302        determinism.validate()?;
303        determinism.augment_inputs(&mut inputs);
304        Ok(Self::hash_inputs(&inputs))
305    }
306
307    /// Compute a content hash for a directory or file at the given path.
308    ///
309    /// For files, hashes the file content. For directories, hashes the
310    /// concatenation of all `.shape` file contents (sorted by name).
311    pub fn hash_path(path: &Path) -> std::io::Result<String> {
312        let mut hasher = Sha256::new();
313
314        if path.is_file() {
315            let data = std::fs::read(path)?;
316            hasher.update(&data);
317        } else if path.is_dir() {
318            let mut entries: Vec<_> = walkdir::WalkDir::new(path)
319                .into_iter()
320                .filter_map(|e| e.ok())
321                .filter(|e| e.path().extension().is_some_and(|ext| ext == "shape"))
322                .collect();
323            entries.sort_by_key(|e| e.path().to_path_buf());
324            for entry in entries {
325                let data = std::fs::read(entry.path())?;
326                hasher.update(&data);
327            }
328        }
329
330        Ok(format!("{:x}", hasher.finalize()))
331    }
332}
333
334fn locked_version_matches_req(locked: &str, req: &str) -> bool {
335    let Ok(parsed_version) = semver::Version::parse(locked) else {
336        return false;
337    };
338    let Ok(version_req) = semver::VersionReq::parse(req) else {
339        return false;
340    };
341    version_req.matches(&parsed_version)
342}
343
344impl Default for PackageLock {
345    fn default() -> Self {
346        Self::new()
347    }
348}
349
350#[cfg(test)]
351mod tests {
352    use super::*;
353    use crate::project::DetailedDependency;
354
355    fn sample_lock() -> PackageLock {
356        PackageLock {
357            version: "1".to_string(),
358            packages: vec![
359                LockedPackage {
360                    name: "utils".to_string(),
361                    version: "0.1.0".to_string(),
362                    source: LockedSource::Path {
363                        path: "../utils".to_string(),
364                    },
365                    content_hash: "abc123".to_string(),
366                    dependencies: vec![],
367                },
368                LockedPackage {
369                    name: "finance".to_string(),
370                    version: "0.2.0".to_string(),
371                    source: LockedSource::Git {
372                        url: "https://github.com/example/finance.git".to_string(),
373                        rev: "deadbeef".to_string(),
374                    },
375                    content_hash: "def456".to_string(),
376                    dependencies: vec!["utils".to_string()],
377                },
378            ],
379            artifacts: vec![],
380        }
381    }
382
383    #[test]
384    fn test_write_read_roundtrip() {
385        let tmp = tempfile::tempdir().unwrap();
386        let lock_path = tmp.path().join("shape.lock");
387
388        let original = sample_lock();
389        original.write(&lock_path).unwrap();
390
391        let loaded = PackageLock::read(&lock_path);
392        assert!(loaded.is_some(), "Lockfile should be readable after write");
393        assert_eq!(loaded.unwrap(), original);
394    }
395
396    #[test]
397    fn test_read_missing_file() {
398        let result = PackageLock::read(Path::new("/nonexistent/shape.lock"));
399        assert!(result.is_none(), "Missing lockfile should return None");
400    }
401
402    #[test]
403    fn test_is_fresh_matching_deps() {
404        let lock = sample_lock();
405        let mut deps = HashMap::new();
406        deps.insert(
407            "utils".to_string(),
408            DependencySpec::Detailed(DetailedDependency {
409                version: None,
410                path: Some("../utils".to_string()),
411                git: None,
412                tag: None,
413                branch: None,
414                rev: None,
415                permissions: None,
416            }),
417        );
418        deps.insert(
419            "finance".to_string(),
420            DependencySpec::Detailed(DetailedDependency {
421                version: None,
422                path: None,
423                git: Some("https://github.com/example/finance.git".to_string()),
424                tag: None,
425                branch: None,
426                rev: Some("deadbeef".to_string()),
427                permissions: None,
428            }),
429        );
430
431        assert!(lock.is_fresh(&deps), "Lock should be fresh when deps match");
432    }
433
434    #[test]
435    fn test_is_fresh_missing_dep() {
436        let lock = sample_lock();
437        let mut deps = HashMap::new();
438        deps.insert(
439            "utils".to_string(),
440            DependencySpec::Version("0.1.0".to_string()),
441        );
442        deps.insert(
443            "finance".to_string(),
444            DependencySpec::Version("0.2.0".to_string()),
445        );
446        deps.insert(
447            "new-dep".to_string(),
448            DependencySpec::Version("1.0.0".to_string()),
449        );
450
451        assert!(
452            !lock.is_fresh(&deps),
453            "Lock should be stale when a new dep is added"
454        );
455    }
456
457    #[test]
458    fn test_is_fresh_removed_dep() {
459        let lock = sample_lock();
460        let mut deps = HashMap::new();
461        deps.insert(
462            "utils".to_string(),
463            DependencySpec::Version("0.1.0".to_string()),
464        );
465
466        assert!(
467            !lock.is_fresh(&deps),
468            "Lock should be stale when a dep is removed"
469        );
470    }
471
472    #[test]
473    fn test_hash_path_file() {
474        let tmp = tempfile::tempdir().unwrap();
475        let file = tmp.path().join("test.shape");
476        std::fs::write(&file, "let x = 1").unwrap();
477
478        let hash1 = PackageLock::hash_path(&file).unwrap();
479        let hash2 = PackageLock::hash_path(&file).unwrap();
480        assert_eq!(hash1, hash2, "Same content should produce same hash");
481        assert!(!hash1.is_empty(), "Hash should not be empty");
482    }
483
484    #[test]
485    fn test_hash_path_directory() {
486        let tmp = tempfile::tempdir().unwrap();
487        std::fs::write(tmp.path().join("a.shape"), "let a = 1").unwrap();
488        std::fs::write(tmp.path().join("b.shape"), "let b = 2").unwrap();
489        std::fs::write(tmp.path().join("README.md"), "not shape").unwrap();
490
491        let hash = PackageLock::hash_path(tmp.path()).unwrap();
492        assert!(!hash.is_empty(), "Directory hash should not be empty");
493    }
494
495    #[test]
496    fn test_artifact_external_requires_fingerprints() {
497        let err = LockedArtifact::new(
498            "schema.infer",
499            "data.csv",
500            "shape-runtime/schema_inference@v1",
501            ArtifactDeterminism::External {
502                fingerprints: BTreeMap::new(),
503            },
504            BTreeMap::new(),
505            shape_wire::WireValue::Null,
506        )
507        .unwrap_err();
508        assert!(err.contains("requires at least one fingerprint"));
509    }
510
511    #[test]
512    fn test_artifact_roundtrip_and_lookup() {
513        let mut inputs = BTreeMap::new();
514        inputs.insert("source".to_string(), "data.csv".to_string());
515        inputs.insert("file_hash".to_string(), "sha256:abc".to_string());
516
517        let mut fingerprints = BTreeMap::new();
518        fingerprints.insert("file:data.csv".to_string(), "sha256:abc".to_string());
519
520        let payload = shape_wire::WireValue::Object(BTreeMap::from([(
521            "ok".to_string(),
522            shape_wire::WireValue::Bool(true),
523        )]));
524        let artifact = LockedArtifact::new(
525            "schema.infer",
526            "data.csv",
527            "shape-runtime/schema_inference@v1",
528            ArtifactDeterminism::External { fingerprints },
529            inputs.clone(),
530            payload.clone(),
531        )
532        .expect("artifact should build");
533
534        let hash = artifact.inputs_hash.clone();
535        let mut lock = PackageLock::new();
536        lock.upsert_artifact(artifact).unwrap();
537
538        let found = lock
539            .artifact("schema.infer", "data.csv", &hash)
540            .expect("artifact should be found");
541        assert_eq!(found.payload().unwrap(), payload);
542    }
543
544    #[test]
545    fn test_schema_namespace_requires_external_determinism() {
546        let mut lock = PackageLock::new();
547        let artifact = LockedArtifact::new(
548            "schema.infer",
549            "data.csv",
550            "shape-runtime/schema_inference@v1",
551            ArtifactDeterminism::Hermetic,
552            BTreeMap::new(),
553            shape_wire::WireValue::Null,
554        )
555        .unwrap();
556
557        let err = lock.upsert_artifact(artifact).unwrap_err();
558        assert!(err.contains("must declare external determinism"));
559    }
560
561    #[test]
562    fn test_external_namespace_prefix_requires_external_determinism() {
563        let mut lock = PackageLock::new();
564        let artifact = LockedArtifact::new(
565            "external.datasource.schema",
566            "orders.csv",
567            "shape-ext/csv@v1",
568            ArtifactDeterminism::Hermetic,
569            BTreeMap::new(),
570            shape_wire::WireValue::Null,
571        )
572        .unwrap();
573
574        let err = lock.upsert_artifact(artifact).unwrap_err();
575        assert!(err.contains("external.datasource.schema:orders.csv"));
576    }
577
578    #[test]
579    fn test_artifacts_persist_through_lock_roundtrip() {
580        let tmp = tempfile::tempdir().unwrap();
581        let lock_path = tmp.path().join("shape.lock");
582
583        let mut inputs = BTreeMap::new();
584        inputs.insert("source".to_string(), "prices.csv".to_string());
585        inputs.insert("file_hash".to_string(), "sha256:def".to_string());
586
587        let mut fingerprints = BTreeMap::new();
588        fingerprints.insert("file:prices.csv".to_string(), "sha256:def".to_string());
589
590        let artifact = LockedArtifact::new(
591            "schema.infer",
592            "prices.csv",
593            "shape-runtime/schema_inference@v1",
594            ArtifactDeterminism::External { fingerprints },
595            inputs,
596            shape_wire::WireValue::String("cached".to_string()),
597        )
598        .expect("artifact should build");
599        let hash = artifact.inputs_hash.clone();
600
601        let mut lock = sample_lock();
602        lock.upsert_artifact(artifact).unwrap();
603        lock.write(&lock_path).unwrap();
604
605        let loaded = PackageLock::read(&lock_path).expect("lockfile should parse");
606        let cached = loaded
607            .artifact("schema.infer", "prices.csv", &hash)
608            .expect("artifact should roundtrip");
609        assert_eq!(
610            cached.payload().unwrap(),
611            shape_wire::WireValue::String("cached".to_string())
612        );
613    }
614}