1use serde::{Deserialize, Serialize};
9use sha2::{Digest, Sha256};
10use std::collections::{BTreeMap, HashMap};
11use std::path::Path;
12
13use crate::project::DependencySpec;
14
15#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
17pub struct PackageLock {
18 pub version: String,
20 pub packages: Vec<LockedPackage>,
22 #[serde(default, skip_serializing_if = "Vec::is_empty")]
24 pub artifacts: Vec<LockedArtifact>,
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
29pub struct LockedPackage {
30 pub name: String,
32 pub version: String,
34 pub source: LockedSource,
36 pub content_hash: String,
38 #[serde(default, skip_serializing_if = "Vec::is_empty")]
40 pub dependencies: Vec<String>,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
45#[serde(tag = "type")]
46pub enum LockedSource {
47 Path { path: String },
49 Git { url: String, rev: String },
51 Registry {
53 version: String,
54 #[serde(default)]
55 registry: Option<String>,
56 #[serde(default)]
57 path: Option<String>,
58 },
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
63#[serde(tag = "mode", rename_all = "snake_case")]
64pub enum ArtifactDeterminism {
65 Hermetic,
67 External {
70 fingerprints: BTreeMap<String, String>,
71 },
72}
73
74impl ArtifactDeterminism {
75 fn validate(&self) -> Result<(), String> {
76 match self {
77 ArtifactDeterminism::Hermetic => Ok(()),
78 ArtifactDeterminism::External { fingerprints } => {
79 if fingerprints.is_empty() {
80 Err(
81 "external artifact determinism requires at least one fingerprint"
82 .to_string(),
83 )
84 } else {
85 Ok(())
86 }
87 }
88 }
89 }
90
91 fn augment_inputs(&self, inputs: &mut BTreeMap<String, String>) {
92 if let ArtifactDeterminism::External { fingerprints } = self {
93 for (key, value) in fingerprints {
94 let merged_key = format!("external::{key}");
95 inputs.entry(merged_key).or_insert_with(|| value.clone());
96 }
97 }
98 }
99}
100
101#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
103pub struct LockedArtifact {
104 pub namespace: String,
106 pub key: String,
108 pub producer: String,
110 pub determinism: ArtifactDeterminism,
112 pub inputs: BTreeMap<String, String>,
114 pub inputs_hash: String,
116 pub created_at: String,
118 pub payload_json: String,
120}
121
122impl LockedArtifact {
123 pub fn new(
125 namespace: impl Into<String>,
126 key: impl Into<String>,
127 producer: impl Into<String>,
128 determinism: ArtifactDeterminism,
129 mut inputs: BTreeMap<String, String>,
130 payload: shape_wire::WireValue,
131 ) -> Result<Self, String> {
132 determinism.validate()?;
133 determinism.augment_inputs(&mut inputs);
134
135 let inputs_hash = PackageLock::hash_inputs(&inputs);
136 let payload_json =
137 serde_json::to_string(&payload).map_err(|e| format!("invalid wire payload: {e}"))?;
138
139 Ok(Self {
140 namespace: namespace.into(),
141 key: key.into(),
142 producer: producer.into(),
143 determinism,
144 inputs,
145 inputs_hash,
146 created_at: chrono::Utc::now().to_rfc3339(),
147 payload_json,
148 })
149 }
150
151 pub fn payload(&self) -> Result<shape_wire::WireValue, String> {
153 serde_json::from_str(&self.payload_json)
154 .map_err(|e| format!("invalid artifact payload encoding: {e}"))
155 }
156}
157
158impl PackageLock {
159 const EXTERNAL_REQUIRED_NAMESPACES: [&'static str; 1] = ["schema.infer"];
160 const EXTERNAL_REQUIRED_NAMESPACE_PREFIXES: [&'static str; 2] =
161 ["external.", "comptime.external."];
162 const EXTERNAL_REQUIRED_PRODUCERS: [&'static str; 1] = ["shape-runtime/schema_inference@v1"];
163
164 pub fn new() -> Self {
166 Self {
167 version: "1".to_string(),
168 packages: Vec::new(),
169 artifacts: Vec::new(),
170 }
171 }
172
173 fn requires_external_determinism(namespace: &str, producer: &str) -> bool {
174 Self::EXTERNAL_REQUIRED_NAMESPACES.contains(&namespace)
175 || Self::EXTERNAL_REQUIRED_NAMESPACE_PREFIXES
176 .iter()
177 .any(|prefix| namespace.starts_with(prefix))
178 || Self::EXTERNAL_REQUIRED_PRODUCERS.contains(&producer)
179 }
180
181 pub fn read(path: &Path) -> Option<Self> {
184 let content = std::fs::read_to_string(path).ok()?;
185 let mut lock: Self = toml::from_str(&content).ok()?;
186 if lock.version.is_empty() {
187 lock.version = "1".to_string();
188 }
189 Some(lock)
190 }
191
192 pub fn write(&self, path: &Path) -> std::io::Result<()> {
194 let content = toml::to_string_pretty(self).map_err(std::io::Error::other)?;
195 std::fs::write(path, content)
196 }
197
198 pub fn is_fresh(&self, deps: &HashMap<String, DependencySpec>) -> bool {
203 for (name, spec) in deps {
204 let Some(locked) = self.packages.iter().find(|p| &p.name == name) else {
205 return false;
206 };
207
208 match spec {
209 DependencySpec::Version(req) => {
210 if !locked_version_matches_req(&locked.version, req) {
211 return false;
212 }
213 }
214 DependencySpec::Detailed(detail) => {
215 if detail.path.is_none()
218 && detail.git.is_none()
219 && let Some(req) = &detail.version
220 && !locked_version_matches_req(&locked.version, req)
221 {
222 return false;
223 }
224 }
225 }
226 }
227 for pkg in &self.packages {
228 if !deps.contains_key(&pkg.name) {
229 return false;
230 }
231 }
232 true
233 }
234
235 fn validate_artifact(artifact: &LockedArtifact) -> Result<(), String> {
236 artifact.determinism.validate()?;
237 let expected_hash =
238 Self::artifact_inputs_hash(artifact.inputs.clone(), &artifact.determinism)?;
239 if artifact.inputs_hash != expected_hash {
240 return Err(format!(
241 "artifact inputs_hash mismatch for {}:{}",
242 artifact.namespace, artifact.key
243 ));
244 }
245
246 if Self::requires_external_determinism(&artifact.namespace, &artifact.producer)
247 && !matches!(artifact.determinism, ArtifactDeterminism::External { .. })
248 {
249 return Err(format!(
250 "artifact {}:{} must declare external determinism fingerprints",
251 artifact.namespace, artifact.key
252 ));
253 }
254
255 Ok(())
256 }
257
258 pub fn upsert_artifact(&mut self, artifact: LockedArtifact) -> Result<(), String> {
260 Self::validate_artifact(&artifact)?;
261 if let Some(existing) = self
262 .artifacts
263 .iter_mut()
264 .find(|a| a.namespace == artifact.namespace && a.key == artifact.key)
265 {
266 *existing = artifact;
267 } else {
268 self.artifacts.push(artifact);
269 }
270 Ok(())
271 }
272
273 pub fn upsert_artifact_variant(&mut self, artifact: LockedArtifact) -> Result<(), String> {
279 Self::validate_artifact(&artifact)?;
280 if let Some(existing) = self.artifacts.iter_mut().find(|a| {
281 a.namespace == artifact.namespace
282 && a.key == artifact.key
283 && a.inputs_hash == artifact.inputs_hash
284 }) {
285 *existing = artifact;
286 } else {
287 self.artifacts.push(artifact);
288 }
289 Ok(())
290 }
291
292 pub fn artifact(
294 &self,
295 namespace: &str,
296 key: &str,
297 inputs_hash: &str,
298 ) -> Option<&LockedArtifact> {
299 self.artifacts
300 .iter()
301 .find(|a| a.namespace == namespace && a.key == key && a.inputs_hash == inputs_hash)
302 }
303
304 pub fn hash_inputs(inputs: &BTreeMap<String, String>) -> String {
306 let mut hasher = Sha256::new();
307 for (key, value) in inputs {
308 hasher.update(key.as_bytes());
309 hasher.update([0]);
310 hasher.update(value.as_bytes());
311 hasher.update([0xff]);
312 }
313 format!("sha256:{:x}", hasher.finalize())
314 }
315
316 pub fn artifact_inputs_hash(
318 mut inputs: BTreeMap<String, String>,
319 determinism: &ArtifactDeterminism,
320 ) -> Result<String, String> {
321 determinism.validate()?;
322 determinism.augment_inputs(&mut inputs);
323 Ok(Self::hash_inputs(&inputs))
324 }
325
326 pub fn hash_path(path: &Path) -> std::io::Result<String> {
331 let mut hasher = Sha256::new();
332
333 if path.is_file() {
334 let data = std::fs::read(path)?;
335 hasher.update(&data);
336 } else if path.is_dir() {
337 let mut entries: Vec<_> = walkdir::WalkDir::new(path)
338 .into_iter()
339 .filter_map(|e| e.ok())
340 .filter(|e| e.path().extension().is_some_and(|ext| ext == "shape"))
341 .collect();
342 entries.sort_by_key(|e| e.path().to_path_buf());
343 for entry in entries {
344 let data = std::fs::read(entry.path())?;
345 hasher.update(&data);
346 }
347 }
348
349 Ok(format!("{:x}", hasher.finalize()))
350 }
351}
352
353fn locked_version_matches_req(locked: &str, req: &str) -> bool {
354 let Ok(parsed_version) = semver::Version::parse(locked) else {
355 return false;
356 };
357 let Ok(version_req) = semver::VersionReq::parse(req) else {
358 return false;
359 };
360 version_req.matches(&parsed_version)
361}
362
363impl Default for PackageLock {
364 fn default() -> Self {
365 Self::new()
366 }
367}
368
369#[cfg(test)]
370mod tests {
371 use super::*;
372 use crate::project::DetailedDependency;
373
374 fn sample_lock() -> PackageLock {
375 PackageLock {
376 version: "1".to_string(),
377 packages: vec![
378 LockedPackage {
379 name: "utils".to_string(),
380 version: "0.1.0".to_string(),
381 source: LockedSource::Path {
382 path: "../utils".to_string(),
383 },
384 content_hash: "abc123".to_string(),
385 dependencies: vec![],
386 },
387 LockedPackage {
388 name: "finance".to_string(),
389 version: "0.2.0".to_string(),
390 source: LockedSource::Git {
391 url: "https://github.com/example/finance.git".to_string(),
392 rev: "deadbeef".to_string(),
393 },
394 content_hash: "def456".to_string(),
395 dependencies: vec!["utils".to_string()],
396 },
397 ],
398 artifacts: vec![],
399 }
400 }
401
402 #[test]
403 fn test_write_read_roundtrip() {
404 let tmp = tempfile::tempdir().unwrap();
405 let lock_path = tmp.path().join("shape.lock");
406
407 let original = sample_lock();
408 original.write(&lock_path).unwrap();
409
410 let loaded = PackageLock::read(&lock_path);
411 assert!(loaded.is_some(), "Lockfile should be readable after write");
412 assert_eq!(loaded.unwrap(), original);
413 }
414
415 #[test]
416 fn test_read_missing_file() {
417 let result = PackageLock::read(Path::new("/nonexistent/shape.lock"));
418 assert!(result.is_none(), "Missing lockfile should return None");
419 }
420
421 #[test]
422 fn test_is_fresh_matching_deps() {
423 let lock = sample_lock();
424 let mut deps = HashMap::new();
425 deps.insert(
426 "utils".to_string(),
427 DependencySpec::Detailed(DetailedDependency {
428 version: None,
429 path: Some("../utils".to_string()),
430 git: None,
431 tag: None,
432 branch: None,
433 rev: None,
434 permissions: None,
435 }),
436 );
437 deps.insert(
438 "finance".to_string(),
439 DependencySpec::Detailed(DetailedDependency {
440 version: None,
441 path: None,
442 git: Some("https://github.com/example/finance.git".to_string()),
443 tag: None,
444 branch: None,
445 rev: Some("deadbeef".to_string()),
446 permissions: None,
447 }),
448 );
449
450 assert!(lock.is_fresh(&deps), "Lock should be fresh when deps match");
451 }
452
453 #[test]
454 fn test_is_fresh_missing_dep() {
455 let lock = sample_lock();
456 let mut deps = HashMap::new();
457 deps.insert(
458 "utils".to_string(),
459 DependencySpec::Version("0.1.0".to_string()),
460 );
461 deps.insert(
462 "finance".to_string(),
463 DependencySpec::Version("0.2.0".to_string()),
464 );
465 deps.insert(
466 "new-dep".to_string(),
467 DependencySpec::Version("1.0.0".to_string()),
468 );
469
470 assert!(
471 !lock.is_fresh(&deps),
472 "Lock should be stale when a new dep is added"
473 );
474 }
475
476 #[test]
477 fn test_is_fresh_removed_dep() {
478 let lock = sample_lock();
479 let mut deps = HashMap::new();
480 deps.insert(
481 "utils".to_string(),
482 DependencySpec::Version("0.1.0".to_string()),
483 );
484
485 assert!(
486 !lock.is_fresh(&deps),
487 "Lock should be stale when a dep is removed"
488 );
489 }
490
491 #[test]
492 fn test_hash_path_file() {
493 let tmp = tempfile::tempdir().unwrap();
494 let file = tmp.path().join("test.shape");
495 std::fs::write(&file, "let x = 1").unwrap();
496
497 let hash1 = PackageLock::hash_path(&file).unwrap();
498 let hash2 = PackageLock::hash_path(&file).unwrap();
499 assert_eq!(hash1, hash2, "Same content should produce same hash");
500 assert!(!hash1.is_empty(), "Hash should not be empty");
501 }
502
503 #[test]
504 fn test_hash_path_directory() {
505 let tmp = tempfile::tempdir().unwrap();
506 std::fs::write(tmp.path().join("a.shape"), "let a = 1").unwrap();
507 std::fs::write(tmp.path().join("b.shape"), "let b = 2").unwrap();
508 std::fs::write(tmp.path().join("README.md"), "not shape").unwrap();
509
510 let hash = PackageLock::hash_path(tmp.path()).unwrap();
511 assert!(!hash.is_empty(), "Directory hash should not be empty");
512 }
513
514 #[test]
515 fn test_artifact_external_requires_fingerprints() {
516 let err = LockedArtifact::new(
517 "schema.infer",
518 "data.csv",
519 "shape-runtime/schema_inference@v1",
520 ArtifactDeterminism::External {
521 fingerprints: BTreeMap::new(),
522 },
523 BTreeMap::new(),
524 shape_wire::WireValue::Null,
525 )
526 .unwrap_err();
527 assert!(err.contains("requires at least one fingerprint"));
528 }
529
530 #[test]
531 fn test_artifact_roundtrip_and_lookup() {
532 let mut inputs = BTreeMap::new();
533 inputs.insert("source".to_string(), "data.csv".to_string());
534 inputs.insert("file_hash".to_string(), "sha256:abc".to_string());
535
536 let mut fingerprints = BTreeMap::new();
537 fingerprints.insert("file:data.csv".to_string(), "sha256:abc".to_string());
538
539 let payload = shape_wire::WireValue::Object(BTreeMap::from([(
540 "ok".to_string(),
541 shape_wire::WireValue::Bool(true),
542 )]));
543 let artifact = LockedArtifact::new(
544 "schema.infer",
545 "data.csv",
546 "shape-runtime/schema_inference@v1",
547 ArtifactDeterminism::External { fingerprints },
548 inputs.clone(),
549 payload.clone(),
550 )
551 .expect("artifact should build");
552
553 let hash = artifact.inputs_hash.clone();
554 let mut lock = PackageLock::new();
555 lock.upsert_artifact(artifact).unwrap();
556
557 let found = lock
558 .artifact("schema.infer", "data.csv", &hash)
559 .expect("artifact should be found");
560 assert_eq!(found.payload().unwrap(), payload);
561 }
562
563 #[test]
564 fn test_upsert_artifact_variant_keeps_multiple_fingerprints() {
565 let mut inputs_a = BTreeMap::new();
566 inputs_a.insert("target".to_string(), "linux-x86_64-gnu".to_string());
567 let mut fp_a = BTreeMap::new();
568 fp_a.insert(
569 "native:linux-x86_64-gnu:duckdb@0.1.0:duckdb:system".to_string(),
570 "system-name:libduckdb.so:version:1.0.0".to_string(),
571 );
572 let artifact_a = LockedArtifact::new(
573 "external.native.library",
574 "duckdb@0.1.0::duckdb",
575 "shape-runtime/native_resolution@v1",
576 ArtifactDeterminism::External { fingerprints: fp_a },
577 inputs_a,
578 shape_wire::WireValue::String("linux".to_string()),
579 )
580 .expect("artifact should build");
581 let hash_a = artifact_a.inputs_hash.clone();
582
583 let mut inputs_b = BTreeMap::new();
584 inputs_b.insert("target".to_string(), "darwin-aarch64".to_string());
585 let mut fp_b = BTreeMap::new();
586 fp_b.insert(
587 "native:darwin-aarch64:duckdb@0.1.0:duckdb:system".to_string(),
588 "system-name:libduckdb.dylib:version:1.0.0".to_string(),
589 );
590 let artifact_b = LockedArtifact::new(
591 "external.native.library",
592 "duckdb@0.1.0::duckdb",
593 "shape-runtime/native_resolution@v1",
594 ArtifactDeterminism::External { fingerprints: fp_b },
595 inputs_b,
596 shape_wire::WireValue::String("darwin".to_string()),
597 )
598 .expect("artifact should build");
599 let hash_b = artifact_b.inputs_hash.clone();
600
601 let mut lock = PackageLock::new();
602 lock.upsert_artifact_variant(artifact_a).unwrap();
603 lock.upsert_artifact_variant(artifact_b).unwrap();
604
605 assert!(
606 lock.artifact("external.native.library", "duckdb@0.1.0::duckdb", &hash_a)
607 .is_some()
608 );
609 assert!(
610 lock.artifact("external.native.library", "duckdb@0.1.0::duckdb", &hash_b)
611 .is_some()
612 );
613 assert_eq!(lock.artifacts.len(), 2);
614 }
615
616 #[test]
617 fn test_schema_namespace_requires_external_determinism() {
618 let mut lock = PackageLock::new();
619 let artifact = LockedArtifact::new(
620 "schema.infer",
621 "data.csv",
622 "shape-runtime/schema_inference@v1",
623 ArtifactDeterminism::Hermetic,
624 BTreeMap::new(),
625 shape_wire::WireValue::Null,
626 )
627 .unwrap();
628
629 let err = lock.upsert_artifact(artifact).unwrap_err();
630 assert!(err.contains("must declare external determinism"));
631 }
632
633 #[test]
634 fn test_external_namespace_prefix_requires_external_determinism() {
635 let mut lock = PackageLock::new();
636 let artifact = LockedArtifact::new(
637 "external.datasource.schema",
638 "orders.csv",
639 "shape-ext/csv@v1",
640 ArtifactDeterminism::Hermetic,
641 BTreeMap::new(),
642 shape_wire::WireValue::Null,
643 )
644 .unwrap();
645
646 let err = lock.upsert_artifact(artifact).unwrap_err();
647 assert!(err.contains("external.datasource.schema:orders.csv"));
648 }
649
650 #[test]
651 fn test_artifacts_persist_through_lock_roundtrip() {
652 let tmp = tempfile::tempdir().unwrap();
653 let lock_path = tmp.path().join("shape.lock");
654
655 let mut inputs = BTreeMap::new();
656 inputs.insert("source".to_string(), "prices.csv".to_string());
657 inputs.insert("file_hash".to_string(), "sha256:def".to_string());
658
659 let mut fingerprints = BTreeMap::new();
660 fingerprints.insert("file:prices.csv".to_string(), "sha256:def".to_string());
661
662 let artifact = LockedArtifact::new(
663 "schema.infer",
664 "prices.csv",
665 "shape-runtime/schema_inference@v1",
666 ArtifactDeterminism::External { fingerprints },
667 inputs,
668 shape_wire::WireValue::String("cached".to_string()),
669 )
670 .expect("artifact should build");
671 let hash = artifact.inputs_hash.clone();
672
673 let mut lock = sample_lock();
674 lock.upsert_artifact(artifact).unwrap();
675 lock.write(&lock_path).unwrap();
676
677 let loaded = PackageLock::read(&lock_path).expect("lockfile should parse");
678 let cached = loaded
679 .artifact("schema.infer", "prices.csv", &hash)
680 .expect("artifact should roundtrip");
681 assert_eq!(
682 cached.payload().unwrap(),
683 shape_wire::WireValue::String("cached".to_string())
684 );
685 }
686}