Skip to main content

shape_runtime/
package_lock.rs

1//! Unified `shape.lock` model for deterministic dependency resolution and
2//! compile-time artifact caching.
3//!
4//! This is the single source of truth for:
5//! - resolved package dependencies
6//! - compile-time artifacts (schema inference, comptime outputs, generated metadata)
7
8use serde::{Deserialize, Serialize};
9use sha2::{Digest, Sha256};
10use std::collections::{BTreeMap, HashMap};
11use std::path::Path;
12
13use crate::project::DependencySpec;
14
15/// Top-level lockfile structure written to `shape.lock`.
16#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
17pub struct PackageLock {
18    /// Lockfile format version (currently "1").
19    pub version: String,
20    /// Locked packages in dependency order.
21    pub packages: Vec<LockedPackage>,
22    /// Cached compile-time artifacts.
23    #[serde(default, skip_serializing_if = "Vec::is_empty")]
24    pub artifacts: Vec<LockedArtifact>,
25}
26
27/// A single locked package entry.
28#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
29pub struct LockedPackage {
30    /// Package name (matches the key in `[dependencies]`).
31    pub name: String,
32    /// Resolved version string.
33    pub version: String,
34    /// How the package was resolved.
35    pub source: LockedSource,
36    /// SHA-256 hash of the package contents for integrity verification.
37    pub content_hash: String,
38    /// Names of direct dependencies of this package.
39    #[serde(default, skip_serializing_if = "Vec::is_empty")]
40    pub dependencies: Vec<String>,
41}
42
43/// Source from which a locked package was resolved.
44#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
45#[serde(tag = "type")]
46pub enum LockedSource {
47    /// Local filesystem path dependency.
48    Path { path: String },
49    /// Git repository dependency, pinned to a specific revision.
50    Git { url: String, rev: String },
51    /// Registry dependency (future).
52    Registry {
53        version: String,
54        #[serde(default)]
55        registry: Option<String>,
56        #[serde(default)]
57        path: Option<String>,
58    },
59}
60
61/// Reproducibility mode for a compile-time artifact.
62#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
63#[serde(tag = "mode", rename_all = "snake_case")]
64pub enum ArtifactDeterminism {
65    /// Artifact depends only on compiler inputs (source/deps/config) and is hermetic.
66    Hermetic,
67    /// Artifact depends on external mutable inputs.
68    /// Each entry must carry a fingerprint used for invalidation.
69    External {
70        fingerprints: BTreeMap<String, String>,
71    },
72}
73
74impl ArtifactDeterminism {
75    fn validate(&self) -> Result<(), String> {
76        match self {
77            ArtifactDeterminism::Hermetic => Ok(()),
78            ArtifactDeterminism::External { fingerprints } => {
79                if fingerprints.is_empty() {
80                    Err(
81                        "external artifact determinism requires at least one fingerprint"
82                            .to_string(),
83                    )
84                } else {
85                    Ok(())
86                }
87            }
88        }
89    }
90
91    fn augment_inputs(&self, inputs: &mut BTreeMap<String, String>) {
92        if let ArtifactDeterminism::External { fingerprints } = self {
93            for (key, value) in fingerprints {
94                let merged_key = format!("external::{key}");
95                inputs.entry(merged_key).or_insert_with(|| value.clone());
96            }
97        }
98    }
99}
100
101/// A generic compile-time artifact cached in `shape.lock`.
102#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
103pub struct LockedArtifact {
104    /// Namespace identifying artifact kind (e.g., `schema.infer`).
105    pub namespace: String,
106    /// Logical key within namespace (e.g., source path).
107    pub key: String,
108    /// Producer identifier (e.g., `shape-runtime/schema_inference@v1`).
109    pub producer: String,
110    /// Determinism/invalidation policy.
111    pub determinism: ArtifactDeterminism,
112    /// Explicit input components used for invalidation and debugging.
113    pub inputs: BTreeMap<String, String>,
114    /// Stable hash derived from inputs + determinism policy.
115    pub inputs_hash: String,
116    /// RFC3339 timestamp of artifact creation.
117    pub created_at: String,
118    /// Shape-wire payload encoded as JSON for TOML compatibility.
119    pub payload_json: String,
120}
121
122impl LockedArtifact {
123    /// Build a new artifact entry from a shape-wire payload.
124    pub fn new(
125        namespace: impl Into<String>,
126        key: impl Into<String>,
127        producer: impl Into<String>,
128        determinism: ArtifactDeterminism,
129        mut inputs: BTreeMap<String, String>,
130        payload: shape_wire::WireValue,
131    ) -> Result<Self, String> {
132        determinism.validate()?;
133        determinism.augment_inputs(&mut inputs);
134
135        let inputs_hash = PackageLock::hash_inputs(&inputs);
136        let payload_json =
137            serde_json::to_string(&payload).map_err(|e| format!("invalid wire payload: {e}"))?;
138
139        Ok(Self {
140            namespace: namespace.into(),
141            key: key.into(),
142            producer: producer.into(),
143            determinism,
144            inputs,
145            inputs_hash,
146            created_at: chrono::Utc::now().to_rfc3339(),
147            payload_json,
148        })
149    }
150
151    /// Decode the shape-wire payload.
152    pub fn payload(&self) -> Result<shape_wire::WireValue, String> {
153        serde_json::from_str(&self.payload_json)
154            .map_err(|e| format!("invalid artifact payload encoding: {e}"))
155    }
156}
157
158impl PackageLock {
159    const EXTERNAL_REQUIRED_NAMESPACES: [&'static str; 1] = ["schema.infer"];
160    const EXTERNAL_REQUIRED_NAMESPACE_PREFIXES: [&'static str; 2] =
161        ["external.", "comptime.external."];
162    const EXTERNAL_REQUIRED_PRODUCERS: [&'static str; 1] = ["shape-runtime/schema_inference@v1"];
163
164    /// Create a new empty lockfile.
165    pub fn new() -> Self {
166        Self {
167            version: "1".to_string(),
168            packages: Vec::new(),
169            artifacts: Vec::new(),
170        }
171    }
172
173    fn requires_external_determinism(namespace: &str, producer: &str) -> bool {
174        Self::EXTERNAL_REQUIRED_NAMESPACES.contains(&namespace)
175            || Self::EXTERNAL_REQUIRED_NAMESPACE_PREFIXES
176                .iter()
177                .any(|prefix| namespace.starts_with(prefix))
178            || Self::EXTERNAL_REQUIRED_PRODUCERS.contains(&producer)
179    }
180
181    /// Read a lockfile from the given path. Returns `None` if the file
182    /// doesn't exist or cannot be parsed.
183    pub fn read(path: &Path) -> Option<Self> {
184        let content = std::fs::read_to_string(path).ok()?;
185        let mut lock: Self = toml::from_str(&content).ok()?;
186        if lock.version.is_empty() {
187            lock.version = "1".to_string();
188        }
189        Some(lock)
190    }
191
192    /// Write the lockfile to the given path.
193    pub fn write(&self, path: &Path) -> std::io::Result<()> {
194        let content = toml::to_string_pretty(self).map_err(std::io::Error::other)?;
195        std::fs::write(path, content)
196    }
197
198    /// Check whether this lockfile is still fresh (matches the given deps).
199    ///
200    /// A lockfile is fresh if every dependency in the spec is present in the
201    /// lockfile and every locked package corresponds to a declared dependency.
202    pub fn is_fresh(&self, deps: &HashMap<String, DependencySpec>) -> bool {
203        for (name, spec) in deps {
204            let Some(locked) = self.packages.iter().find(|p| &p.name == name) else {
205                return false;
206            };
207
208            match spec {
209                DependencySpec::Version(req) => {
210                    if !locked_version_matches_req(&locked.version, req) {
211                        return false;
212                    }
213                }
214                DependencySpec::Detailed(detail) => {
215                    // Path/Git details are validated by source/path matching elsewhere.
216                    // For semver requirements, enforce lock compatibility.
217                    if detail.path.is_none()
218                        && detail.git.is_none()
219                        && let Some(req) = &detail.version
220                        && !locked_version_matches_req(&locked.version, req)
221                    {
222                        return false;
223                    }
224                }
225            }
226        }
227        for pkg in &self.packages {
228            if !deps.contains_key(&pkg.name) {
229                return false;
230            }
231        }
232        true
233    }
234
235    fn validate_artifact(artifact: &LockedArtifact) -> Result<(), String> {
236        artifact.determinism.validate()?;
237        let expected_hash =
238            Self::artifact_inputs_hash(artifact.inputs.clone(), &artifact.determinism)?;
239        if artifact.inputs_hash != expected_hash {
240            return Err(format!(
241                "artifact inputs_hash mismatch for {}:{}",
242                artifact.namespace, artifact.key
243            ));
244        }
245
246        if Self::requires_external_determinism(&artifact.namespace, &artifact.producer)
247            && !matches!(artifact.determinism, ArtifactDeterminism::External { .. })
248        {
249            return Err(format!(
250                "artifact {}:{} must declare external determinism fingerprints",
251                artifact.namespace, artifact.key
252            ));
253        }
254
255        Ok(())
256    }
257
258    /// Upsert a compile-time artifact by `(namespace, key)`.
259    pub fn upsert_artifact(&mut self, artifact: LockedArtifact) -> Result<(), String> {
260        Self::validate_artifact(&artifact)?;
261        if let Some(existing) = self
262            .artifacts
263            .iter_mut()
264            .find(|a| a.namespace == artifact.namespace && a.key == artifact.key)
265        {
266            *existing = artifact;
267        } else {
268            self.artifacts.push(artifact);
269        }
270        Ok(())
271    }
272
273    /// Upsert a compile-time artifact by `(namespace, key, inputs_hash)`.
274    ///
275    /// This is used for host-bound artifacts such as native dependency locks,
276    /// where multiple variants for the same logical key may coexist across
277    /// targets or fingerprints in a single committed lockfile.
278    pub fn upsert_artifact_variant(&mut self, artifact: LockedArtifact) -> Result<(), String> {
279        Self::validate_artifact(&artifact)?;
280        if let Some(existing) = self.artifacts.iter_mut().find(|a| {
281            a.namespace == artifact.namespace
282                && a.key == artifact.key
283                && a.inputs_hash == artifact.inputs_hash
284        }) {
285            *existing = artifact;
286        } else {
287            self.artifacts.push(artifact);
288        }
289        Ok(())
290    }
291
292    /// Lookup artifact by `(namespace, key, inputs_hash)`.
293    pub fn artifact(
294        &self,
295        namespace: &str,
296        key: &str,
297        inputs_hash: &str,
298    ) -> Option<&LockedArtifact> {
299        self.artifacts
300            .iter()
301            .find(|a| a.namespace == namespace && a.key == key && a.inputs_hash == inputs_hash)
302    }
303
304    /// Compute a stable SHA-256 hash for inputs map.
305    pub fn hash_inputs(inputs: &BTreeMap<String, String>) -> String {
306        let mut hasher = Sha256::new();
307        for (key, value) in inputs {
308            hasher.update(key.as_bytes());
309            hasher.update([0]);
310            hasher.update(value.as_bytes());
311            hasher.update([0xff]);
312        }
313        format!("sha256:{:x}", hasher.finalize())
314    }
315
316    /// Compute artifact inputs hash after applying determinism policy rules.
317    pub fn artifact_inputs_hash(
318        mut inputs: BTreeMap<String, String>,
319        determinism: &ArtifactDeterminism,
320    ) -> Result<String, String> {
321        determinism.validate()?;
322        determinism.augment_inputs(&mut inputs);
323        Ok(Self::hash_inputs(&inputs))
324    }
325
326    /// Compute a content hash for a directory or file at the given path.
327    ///
328    /// For files, hashes the file content. For directories, hashes the
329    /// concatenation of all `.shape` file contents (sorted by name).
330    pub fn hash_path(path: &Path) -> std::io::Result<String> {
331        let mut hasher = Sha256::new();
332
333        if path.is_file() {
334            let data = std::fs::read(path)?;
335            hasher.update(&data);
336        } else if path.is_dir() {
337            let mut entries: Vec<_> = walkdir::WalkDir::new(path)
338                .into_iter()
339                .filter_map(|e| e.ok())
340                .filter(|e| e.path().extension().is_some_and(|ext| ext == "shape"))
341                .collect();
342            entries.sort_by_key(|e| e.path().to_path_buf());
343            for entry in entries {
344                let data = std::fs::read(entry.path())?;
345                hasher.update(&data);
346            }
347        }
348
349        Ok(format!("{:x}", hasher.finalize()))
350    }
351}
352
353fn locked_version_matches_req(locked: &str, req: &str) -> bool {
354    let Ok(parsed_version) = semver::Version::parse(locked) else {
355        return false;
356    };
357    let Ok(version_req) = semver::VersionReq::parse(req) else {
358        return false;
359    };
360    version_req.matches(&parsed_version)
361}
362
363impl Default for PackageLock {
364    fn default() -> Self {
365        Self::new()
366    }
367}
368
369#[cfg(test)]
370mod tests {
371    use super::*;
372    use crate::project::DetailedDependency;
373
374    fn sample_lock() -> PackageLock {
375        PackageLock {
376            version: "1".to_string(),
377            packages: vec![
378                LockedPackage {
379                    name: "utils".to_string(),
380                    version: "0.1.0".to_string(),
381                    source: LockedSource::Path {
382                        path: "../utils".to_string(),
383                    },
384                    content_hash: "abc123".to_string(),
385                    dependencies: vec![],
386                },
387                LockedPackage {
388                    name: "finance".to_string(),
389                    version: "0.2.0".to_string(),
390                    source: LockedSource::Git {
391                        url: "https://github.com/example/finance.git".to_string(),
392                        rev: "deadbeef".to_string(),
393                    },
394                    content_hash: "def456".to_string(),
395                    dependencies: vec!["utils".to_string()],
396                },
397            ],
398            artifacts: vec![],
399        }
400    }
401
402    #[test]
403    fn test_write_read_roundtrip() {
404        let tmp = tempfile::tempdir().unwrap();
405        let lock_path = tmp.path().join("shape.lock");
406
407        let original = sample_lock();
408        original.write(&lock_path).unwrap();
409
410        let loaded = PackageLock::read(&lock_path);
411        assert!(loaded.is_some(), "Lockfile should be readable after write");
412        assert_eq!(loaded.unwrap(), original);
413    }
414
415    #[test]
416    fn test_read_missing_file() {
417        let result = PackageLock::read(Path::new("/nonexistent/shape.lock"));
418        assert!(result.is_none(), "Missing lockfile should return None");
419    }
420
421    #[test]
422    fn test_is_fresh_matching_deps() {
423        let lock = sample_lock();
424        let mut deps = HashMap::new();
425        deps.insert(
426            "utils".to_string(),
427            DependencySpec::Detailed(DetailedDependency {
428                version: None,
429                path: Some("../utils".to_string()),
430                git: None,
431                tag: None,
432                branch: None,
433                rev: None,
434                permissions: None,
435            }),
436        );
437        deps.insert(
438            "finance".to_string(),
439            DependencySpec::Detailed(DetailedDependency {
440                version: None,
441                path: None,
442                git: Some("https://github.com/example/finance.git".to_string()),
443                tag: None,
444                branch: None,
445                rev: Some("deadbeef".to_string()),
446                permissions: None,
447            }),
448        );
449
450        assert!(lock.is_fresh(&deps), "Lock should be fresh when deps match");
451    }
452
453    #[test]
454    fn test_is_fresh_missing_dep() {
455        let lock = sample_lock();
456        let mut deps = HashMap::new();
457        deps.insert(
458            "utils".to_string(),
459            DependencySpec::Version("0.1.0".to_string()),
460        );
461        deps.insert(
462            "finance".to_string(),
463            DependencySpec::Version("0.2.0".to_string()),
464        );
465        deps.insert(
466            "new-dep".to_string(),
467            DependencySpec::Version("1.0.0".to_string()),
468        );
469
470        assert!(
471            !lock.is_fresh(&deps),
472            "Lock should be stale when a new dep is added"
473        );
474    }
475
476    #[test]
477    fn test_is_fresh_removed_dep() {
478        let lock = sample_lock();
479        let mut deps = HashMap::new();
480        deps.insert(
481            "utils".to_string(),
482            DependencySpec::Version("0.1.0".to_string()),
483        );
484
485        assert!(
486            !lock.is_fresh(&deps),
487            "Lock should be stale when a dep is removed"
488        );
489    }
490
491    #[test]
492    fn test_hash_path_file() {
493        let tmp = tempfile::tempdir().unwrap();
494        let file = tmp.path().join("test.shape");
495        std::fs::write(&file, "let x = 1").unwrap();
496
497        let hash1 = PackageLock::hash_path(&file).unwrap();
498        let hash2 = PackageLock::hash_path(&file).unwrap();
499        assert_eq!(hash1, hash2, "Same content should produce same hash");
500        assert!(!hash1.is_empty(), "Hash should not be empty");
501    }
502
503    #[test]
504    fn test_hash_path_directory() {
505        let tmp = tempfile::tempdir().unwrap();
506        std::fs::write(tmp.path().join("a.shape"), "let a = 1").unwrap();
507        std::fs::write(tmp.path().join("b.shape"), "let b = 2").unwrap();
508        std::fs::write(tmp.path().join("README.md"), "not shape").unwrap();
509
510        let hash = PackageLock::hash_path(tmp.path()).unwrap();
511        assert!(!hash.is_empty(), "Directory hash should not be empty");
512    }
513
514    #[test]
515    fn test_artifact_external_requires_fingerprints() {
516        let err = LockedArtifact::new(
517            "schema.infer",
518            "data.csv",
519            "shape-runtime/schema_inference@v1",
520            ArtifactDeterminism::External {
521                fingerprints: BTreeMap::new(),
522            },
523            BTreeMap::new(),
524            shape_wire::WireValue::Null,
525        )
526        .unwrap_err();
527        assert!(err.contains("requires at least one fingerprint"));
528    }
529
530    #[test]
531    fn test_artifact_roundtrip_and_lookup() {
532        let mut inputs = BTreeMap::new();
533        inputs.insert("source".to_string(), "data.csv".to_string());
534        inputs.insert("file_hash".to_string(), "sha256:abc".to_string());
535
536        let mut fingerprints = BTreeMap::new();
537        fingerprints.insert("file:data.csv".to_string(), "sha256:abc".to_string());
538
539        let payload = shape_wire::WireValue::Object(BTreeMap::from([(
540            "ok".to_string(),
541            shape_wire::WireValue::Bool(true),
542        )]));
543        let artifact = LockedArtifact::new(
544            "schema.infer",
545            "data.csv",
546            "shape-runtime/schema_inference@v1",
547            ArtifactDeterminism::External { fingerprints },
548            inputs.clone(),
549            payload.clone(),
550        )
551        .expect("artifact should build");
552
553        let hash = artifact.inputs_hash.clone();
554        let mut lock = PackageLock::new();
555        lock.upsert_artifact(artifact).unwrap();
556
557        let found = lock
558            .artifact("schema.infer", "data.csv", &hash)
559            .expect("artifact should be found");
560        assert_eq!(found.payload().unwrap(), payload);
561    }
562
563    #[test]
564    fn test_upsert_artifact_variant_keeps_multiple_fingerprints() {
565        let mut inputs_a = BTreeMap::new();
566        inputs_a.insert("target".to_string(), "linux-x86_64-gnu".to_string());
567        let mut fp_a = BTreeMap::new();
568        fp_a.insert(
569            "native:linux-x86_64-gnu:duckdb@0.1.0:duckdb:system".to_string(),
570            "system-name:libduckdb.so:version:1.0.0".to_string(),
571        );
572        let artifact_a = LockedArtifact::new(
573            "external.native.library",
574            "duckdb@0.1.0::duckdb",
575            "shape-runtime/native_resolution@v1",
576            ArtifactDeterminism::External { fingerprints: fp_a },
577            inputs_a,
578            shape_wire::WireValue::String("linux".to_string()),
579        )
580        .expect("artifact should build");
581        let hash_a = artifact_a.inputs_hash.clone();
582
583        let mut inputs_b = BTreeMap::new();
584        inputs_b.insert("target".to_string(), "darwin-aarch64".to_string());
585        let mut fp_b = BTreeMap::new();
586        fp_b.insert(
587            "native:darwin-aarch64:duckdb@0.1.0:duckdb:system".to_string(),
588            "system-name:libduckdb.dylib:version:1.0.0".to_string(),
589        );
590        let artifact_b = LockedArtifact::new(
591            "external.native.library",
592            "duckdb@0.1.0::duckdb",
593            "shape-runtime/native_resolution@v1",
594            ArtifactDeterminism::External { fingerprints: fp_b },
595            inputs_b,
596            shape_wire::WireValue::String("darwin".to_string()),
597        )
598        .expect("artifact should build");
599        let hash_b = artifact_b.inputs_hash.clone();
600
601        let mut lock = PackageLock::new();
602        lock.upsert_artifact_variant(artifact_a).unwrap();
603        lock.upsert_artifact_variant(artifact_b).unwrap();
604
605        assert!(
606            lock.artifact("external.native.library", "duckdb@0.1.0::duckdb", &hash_a)
607                .is_some()
608        );
609        assert!(
610            lock.artifact("external.native.library", "duckdb@0.1.0::duckdb", &hash_b)
611                .is_some()
612        );
613        assert_eq!(lock.artifacts.len(), 2);
614    }
615
616    #[test]
617    fn test_schema_namespace_requires_external_determinism() {
618        let mut lock = PackageLock::new();
619        let artifact = LockedArtifact::new(
620            "schema.infer",
621            "data.csv",
622            "shape-runtime/schema_inference@v1",
623            ArtifactDeterminism::Hermetic,
624            BTreeMap::new(),
625            shape_wire::WireValue::Null,
626        )
627        .unwrap();
628
629        let err = lock.upsert_artifact(artifact).unwrap_err();
630        assert!(err.contains("must declare external determinism"));
631    }
632
633    #[test]
634    fn test_external_namespace_prefix_requires_external_determinism() {
635        let mut lock = PackageLock::new();
636        let artifact = LockedArtifact::new(
637            "external.datasource.schema",
638            "orders.csv",
639            "shape-ext/csv@v1",
640            ArtifactDeterminism::Hermetic,
641            BTreeMap::new(),
642            shape_wire::WireValue::Null,
643        )
644        .unwrap();
645
646        let err = lock.upsert_artifact(artifact).unwrap_err();
647        assert!(err.contains("external.datasource.schema:orders.csv"));
648    }
649
650    #[test]
651    fn test_artifacts_persist_through_lock_roundtrip() {
652        let tmp = tempfile::tempdir().unwrap();
653        let lock_path = tmp.path().join("shape.lock");
654
655        let mut inputs = BTreeMap::new();
656        inputs.insert("source".to_string(), "prices.csv".to_string());
657        inputs.insert("file_hash".to_string(), "sha256:def".to_string());
658
659        let mut fingerprints = BTreeMap::new();
660        fingerprints.insert("file:prices.csv".to_string(), "sha256:def".to_string());
661
662        let artifact = LockedArtifact::new(
663            "schema.infer",
664            "prices.csv",
665            "shape-runtime/schema_inference@v1",
666            ArtifactDeterminism::External { fingerprints },
667            inputs,
668            shape_wire::WireValue::String("cached".to_string()),
669        )
670        .expect("artifact should build");
671        let hash = artifact.inputs_hash.clone();
672
673        let mut lock = sample_lock();
674        lock.upsert_artifact(artifact).unwrap();
675        lock.write(&lock_path).unwrap();
676
677        let loaded = PackageLock::read(&lock_path).expect("lockfile should parse");
678        let cached = loaded
679            .artifact("schema.infer", "prices.csv", &hash)
680            .expect("artifact should roundtrip");
681        assert_eq!(
682            cached.payload().unwrap(),
683            shape_wire::WireValue::String("cached".to_string())
684        );
685    }
686}