Skip to main content

bv_core/
lockfile.rs

1use std::collections::BTreeMap;
2
3use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5
6use crate::error::{BvError, Result};
7
8pub const LOCKFILE_FORMAT_VERSION: u32 = 1;
9
10pub type BinaryIndex = BTreeMap<String, String>;
11
12// SpecKind
13
14#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
15#[serde(rename_all = "snake_case")]
16pub enum SpecKind {
17    /// Single squashed image as pulled from biocontainers / a legacy registry.
18    #[default]
19    LegacyImage,
20    /// Factored OCI image where each conda package is its own layer.
21    FactoredOci,
22}
23
24// CondaPackagePin
25
26/// Exact conda package that a layer was built from.
27#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
28pub struct CondaPackagePin {
29    pub name: String,
30    pub version: String,
31    pub build: String,
32    pub channel: String,
33    /// sha256 of the .conda / .tar.bz2 archive (hex, no prefix).
34    pub sha256: String,
35}
36
37// LayerDescriptor
38
39/// One OCI layer entry in a lockfile tool record.
40#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
41pub struct LayerDescriptor {
42    /// Content digest of the compressed layer blob (e.g. `sha256:abc...`).
43    pub digest: String,
44    pub size: u64,
45    pub media_type: String,
46    /// Present only for `factored_oci` layers that correspond to a single conda package.
47    #[serde(default, skip_serializing_if = "Option::is_none")]
48    pub conda_package: Option<CondaPackagePin>,
49}
50
51impl LayerDescriptor {
52    pub fn new_zstd(digest: impl Into<String>, size: u64) -> Self {
53        Self {
54            digest: digest.into(),
55            size,
56            media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
57            conda_package: None,
58        }
59    }
60
61    pub fn new_gzip(digest: impl Into<String>, size: u64) -> Self {
62        Self {
63            digest: digest.into(),
64            size,
65            media_type: "application/vnd.oci.image.layer.v1.tar+gzip".into(),
66            conda_package: None,
67        }
68    }
69}
70
71// ReferenceDataPin
72
73/// Per-dataset pin stored inside a lockfile entry.
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct ReferenceDataPin {
76    pub id: String,
77    pub version: String,
78    pub sha256: String,
79}
80
81// LockfileEntry
82
83/// One resolved tool entry in `bv.lock`.
84///
85/// Stability fields used by `bv lock --check` to detect drift:
86/// `tool_id`, `version`, `image_digest`, `manifest_sha256`,
87/// and the `digest` of every layer for `factored_oci` entries.
88/// Timestamps and sizes are informational only.
89#[derive(Debug, Clone, Serialize, Deserialize)]
90#[serde(deny_unknown_fields)]
91pub struct LockfileEntry {
92    pub tool_id: String,
93    /// Version requirement as declared in `bv.toml` (e.g. `=2.14.0`, `^2`, or `*`).
94    #[serde(default, skip_serializing_if = "String::is_empty")]
95    pub declared_version_req: String,
96    /// Resolved semver (e.g. `2.14.0`).
97    pub version: String,
98    /// How the image was built; drives the pull path and layer verification strategy.
99    #[serde(default, skip_serializing_if = "SpecKind::is_legacy")]
100    pub spec_kind: SpecKind,
101    /// Canonical OCI reference from the manifest (e.g. `ncbi/blast:2.14.0`).
102    pub image_reference: String,
103    /// Content digest of the pulled image (e.g. `sha256:abc123...`).
104    pub image_digest: String,
105    /// SHA-256 of the manifest TOML at resolve time; used for drift detection.
106    #[serde(default, skip_serializing_if = "String::is_empty")]
107    pub manifest_sha256: String,
108    pub image_size_bytes: Option<u64>,
109    /// Per-layer descriptors (ordered as they appear in the OCI manifest).
110    /// Empty for `legacy_image` entries.
111    #[serde(default, skip_serializing_if = "Vec::is_empty")]
112    pub layers: Vec<LayerDescriptor>,
113    pub resolved_at: DateTime<Utc>,
114    #[serde(default)]
115    pub reference_data_pins: BTreeMap<String, ReferenceDataPin>,
116    /// Binary names this tool contributes to the binary index.
117    #[serde(default, skip_serializing_if = "Vec::is_empty")]
118    pub binaries: Vec<String>,
119}
120
121impl SpecKind {
122    pub fn is_legacy(&self) -> bool {
123        matches!(self, SpecKind::LegacyImage)
124    }
125}
126
127impl LockfileEntry {
128    /// True when two entries represent the same resolved state.
129    /// Ignores timestamps, sizes, and declared_version_req.
130    /// For `factored_oci` entries, all layer digests must also match.
131    pub fn is_equivalent(&self, other: &Self) -> bool {
132        if self.tool_id != other.tool_id
133            || self.version != other.version
134            || self.image_digest != other.image_digest
135        {
136            return false;
137        }
138        if !self.manifest_sha256.is_empty()
139            && !other.manifest_sha256.is_empty()
140            && self.manifest_sha256 != other.manifest_sha256
141        {
142            return false;
143        }
144        if self.layers.len() != other.layers.len() {
145            return false;
146        }
147        self.layers
148            .iter()
149            .zip(other.layers.iter())
150            .all(|(a, b)| a.digest == b.digest)
151    }
152}
153
154// LockfileMetadata
155
156/// Informational metadata written to `bv.lock` by `bv lock`.
157#[derive(Debug, Clone, Serialize, Deserialize)]
158pub struct LockfileMetadata {
159    pub bv_version: String,
160    pub generated_at: DateTime<Utc>,
161    #[serde(skip_serializing_if = "Option::is_none")]
162    pub hardware_summary: Option<String>,
163}
164
165impl Default for LockfileMetadata {
166    fn default() -> Self {
167        Self {
168            bv_version: env!("CARGO_PKG_VERSION").to_string(),
169            generated_at: Utc::now(),
170            hardware_summary: None,
171        }
172    }
173}
174
175// Lockfile
176
177/// The full `bv.lock` file.
178///
179/// Format is stable: `bv lock --check` fails if the generated lockfile
180/// would differ from the on-disk one on any stability field.
181#[derive(Debug, Clone, Serialize, Deserialize)]
182#[serde(deny_unknown_fields)]
183pub struct Lockfile {
184    pub version: u32,
185    #[serde(default)]
186    pub metadata: LockfileMetadata,
187    #[serde(default)]
188    pub tools: BTreeMap<String, LockfileEntry>,
189    /// Derived routing table: binary name -> tool id.
190    /// Rebuilt by `rebuild_binary_index` whenever tools change.
191    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
192    pub binary_index: BinaryIndex,
193}
194
195impl Lockfile {
196    pub fn new() -> Self {
197        Self {
198            version: LOCKFILE_FORMAT_VERSION,
199            metadata: LockfileMetadata::default(),
200            tools: BTreeMap::new(),
201            binary_index: BTreeMap::new(),
202        }
203    }
204
205    pub fn from_toml_str(s: &str) -> Result<Self> {
206        let lockfile: Self = toml::from_str(s).map_err(|e| BvError::LockfileParse(e.to_string()))?;
207        if lockfile.version > LOCKFILE_FORMAT_VERSION {
208            return Err(BvError::LockfileParse(format!(
209                "bv.lock uses format version {}, but this bv only supports up to version {}.\n\
210                Upgrade bv: curl -fsSL https://raw.githubusercontent.com/tejasprabhune/bv/main/install.sh | sh",
211                lockfile.version, LOCKFILE_FORMAT_VERSION
212            )));
213        }
214        Ok(lockfile)
215    }
216
217    pub fn to_toml_string(&self) -> Result<String> {
218        toml::to_string_pretty(self).map_err(|e| BvError::LockfileParse(e.to_string()))
219    }
220
221    /// Rebuild `binary_index` from each tool's `binaries` list.
222    ///
223    /// `overrides` maps binary name to the tool id that wins when two tools
224    /// expose the same name. Without an override, a collision returns `Err`.
225    pub fn rebuild_binary_index(
226        &mut self,
227        overrides: &BTreeMap<String, String>,
228    ) -> std::result::Result<(), String> {
229        let mut index: BinaryIndex = BTreeMap::new();
230        let mut collisions: Vec<String> = Vec::new();
231
232        let mut sorted: Vec<_> = self.tools.iter().collect();
233        sorted.sort_by_key(|(id, _)| id.as_str());
234
235        for (tool_id, entry) in &sorted {
236            for binary in &entry.binaries {
237                if let Some(winner) = overrides.get(binary) {
238                    index.insert(binary.clone(), winner.clone());
239                } else if let Some(existing) = index.insert(binary.clone(), tool_id.to_string())
240                    && existing != tool_id.as_str()
241                {
242                    collisions.push(format!(
243                        "'{binary}' exposed by both '{existing}' and '{tool_id}'"
244                    ));
245                    index.insert(binary.clone(), existing);
246                }
247            }
248        }
249
250        if !collisions.is_empty() {
251            return Err(collisions.join(", "));
252        }
253        self.binary_index = index;
254        Ok(())
255    }
256
257    /// True when both lockfiles describe the same set of tools at the same
258    /// resolved versions and digests.
259    pub fn is_equivalent_to(&self, other: &Self) -> bool {
260        if self.tools.len() != other.tools.len() {
261            return false;
262        }
263        for (id, entry) in &self.tools {
264            match other.tools.get(id) {
265                Some(other_entry) => {
266                    if !entry.is_equivalent(other_entry) {
267                        return false;
268                    }
269                }
270                None => return false,
271            }
272        }
273        true
274    }
275}
276
277impl Default for Lockfile {
278    fn default() -> Self {
279        Self::new()
280    }
281}
282
283#[cfg(test)]
284mod tests {
285    use super::*;
286
287    fn entry(id: &str, version: &str, digest: &str) -> LockfileEntry {
288        LockfileEntry {
289            tool_id: id.to_string(),
290            declared_version_req: String::new(),
291            version: version.to_string(),
292            spec_kind: SpecKind::LegacyImage,
293            image_reference: format!("registry/{id}:{version}"),
294            image_digest: digest.to_string(),
295            manifest_sha256: format!("sha256:m-{id}"),
296            image_size_bytes: None,
297            layers: vec![],
298            resolved_at: chrono::DateTime::<chrono::Utc>::from_timestamp(1700000000, 0).unwrap(),
299            reference_data_pins: BTreeMap::new(),
300            binaries: vec![format!("{id}-bin")],
301        }
302    }
303
304    fn factored_entry(id: &str) -> LockfileEntry {
305        LockfileEntry {
306            tool_id: id.to_string(),
307            declared_version_req: "=1.0.0".into(),
308            version: "1.0.0".into(),
309            spec_kind: SpecKind::FactoredOci,
310            image_reference: format!("registry/{id}:1.0.0"),
311            image_digest: format!("sha256:img-{id}"),
312            manifest_sha256: format!("sha256:man-{id}"),
313            image_size_bytes: None,
314            layers: vec![
315                LayerDescriptor {
316                    digest: "sha256:shared-openssl".into(),
317                    size: 10_000_000,
318                    media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
319                    conda_package: Some(CondaPackagePin {
320                        name: "openssl".into(),
321                        version: "3.2.1".into(),
322                        build: "h0_0".into(),
323                        channel: "conda-forge".into(),
324                        sha256: "abcd".into(),
325                    }),
326                },
327                LayerDescriptor {
328                    digest: format!("sha256:pkg-{id}"),
329                    size: 20_000_000,
330                    media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
331                    conda_package: None,
332                },
333            ],
334            resolved_at: chrono::DateTime::<chrono::Utc>::from_timestamp(1700000000, 0).unwrap(),
335            reference_data_pins: BTreeMap::new(),
336            binaries: vec![id.to_string()],
337        }
338    }
339
340    /// Regression: lockfile serialization must be byte-deterministic so
341    /// `bv lock --check` can compare against the on-disk file.
342    #[test]
343    fn to_toml_string_is_deterministic() {
344        let mut lock = Lockfile::new();
345        for id in ["zebra", "alpha", "mango", "beta", "tango"] {
346            lock.tools.insert(
347                id.to_string(),
348                entry(id, "1.0.0", &format!("sha256:d-{id}")),
349            );
350            lock.binary_index
351                .insert(format!("{id}-bin"), id.to_string());
352        }
353
354        let s1 = lock.to_toml_string().unwrap();
355        for _ in 0..32 {
356            assert_eq!(s1, lock.to_toml_string().unwrap(), "non-deterministic output");
357        }
358        // Tools must appear in lexicographic order.
359        let alpha = s1.find("\"alpha\"").unwrap();
360        let beta = s1.find("\"beta\"").unwrap();
361        let mango = s1.find("\"mango\"").unwrap();
362        let tango = s1.find("\"tango\"").unwrap();
363        let zebra = s1.find("\"zebra\"").unwrap();
364        assert!(alpha < beta && beta < mango && mango < tango && tango < zebra);
365    }
366
367    #[test]
368    fn spec_kind_legacy_is_skipped_in_serialization() {
369        let mut lock = Lockfile::new();
370        lock.tools.insert("tool".into(), entry("tool", "1.0.0", "sha256:abc"));
371        let s = lock.to_toml_string().unwrap();
372        // Legacy entries must not emit spec_kind to keep backward compat.
373        assert!(!s.contains("spec_kind"), "legacy entries must not emit spec_kind: {s}");
374    }
375
376    #[test]
377    fn factored_entry_round_trips() {
378        let mut lock = Lockfile::new();
379        lock.tools.insert("samtools".into(), factored_entry("samtools"));
380        let s = lock.to_toml_string().unwrap();
381        let back = Lockfile::from_toml_str(&s).unwrap();
382        let e = &back.tools["samtools"];
383        assert_eq!(e.spec_kind, SpecKind::FactoredOci);
384        assert_eq!(e.layers.len(), 2);
385        assert_eq!(e.layers[0].conda_package.as_ref().unwrap().name, "openssl");
386    }
387
388    #[test]
389    fn is_equivalent_checks_layer_digests() {
390        let a = factored_entry("samtools");
391        let mut b = a.clone();
392        b.layers[0].digest = "sha256:different".into();
393        assert!(!a.is_equivalent(&b));
394    }
395
396    #[test]
397    fn rejects_future_format_version() {
398        let toml = r#"
399version = 99
400
401[metadata]
402bv_version = "0.0.0"
403generated_at = "2024-01-01T00:00:00Z"
404"#;
405        let err = Lockfile::from_toml_str(toml).unwrap_err();
406        let msg = err.to_string();
407        assert!(msg.contains("format version"), "expected 'format version' in error: {msg}");
408    }
409
410    #[test]
411    fn is_equivalent_ignores_timestamps() {
412        let a = factored_entry("samtools");
413        let mut b = a.clone();
414        b.resolved_at = chrono::DateTime::<chrono::Utc>::from_timestamp(1_800_000_000, 0).unwrap();
415        assert!(a.is_equivalent(&b));
416    }
417}
418
419#[cfg(test)]
420mod prop_tests {
421    use proptest::prelude::*;
422
423    use super::*;
424
425    fn arb_tool_id() -> impl Strategy<Value = String> {
426        "[a-z][a-z0-9_-]{1,15}".prop_map(|s| s)
427    }
428
429    fn arb_digest() -> impl Strategy<Value = String> {
430        "[0-9a-f]{64}".prop_map(|hex| format!("sha256:{hex}"))
431    }
432
433    fn arb_version() -> impl Strategy<Value = String> {
434        (0u32..20, 0u32..20, 0u32..20).prop_map(|(a, b, c)| format!("{a}.{b}.{c}"))
435    }
436
437    fn arb_layer() -> impl Strategy<Value = LayerDescriptor> {
438        (arb_digest(), 0u64..10_000_000u64).prop_map(|(digest, size)| LayerDescriptor {
439            digest,
440            size,
441            media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
442            conda_package: None,
443        })
444    }
445
446    prop_compose! {
447        fn arb_entry()(
448            id in arb_tool_id(),
449            version in arb_version(),
450            digest in arb_digest(),
451            manifest_sha256 in arb_digest(),
452            size in proptest::option::of(0u64..10_000_000_000u64),
453            layers in proptest::collection::vec(arb_layer(), 0..6),
454        ) -> (String, LockfileEntry) {
455            let spec_kind = if layers.is_empty() { SpecKind::LegacyImage } else { SpecKind::FactoredOci };
456            let entry = LockfileEntry {
457                tool_id: id.clone(),
458                declared_version_req: format!("={version}"),
459                version: version.clone(),
460                spec_kind,
461                image_reference: format!("registry/{id}:{version}"),
462                image_digest: digest,
463                manifest_sha256,
464                image_size_bytes: size,
465                layers,
466                resolved_at: chrono::DateTime::<chrono::Utc>::from_timestamp(1700000000, 0).unwrap(),
467                reference_data_pins: BTreeMap::new(),
468                binaries: vec![id.clone()],
469            };
470            (id, entry)
471        }
472    }
473
474    prop_compose! {
475        fn arb_lockfile()(
476            entries in proptest::collection::vec(arb_entry(), 0..10),
477        ) -> Lockfile {
478            let mut lock = Lockfile::new();
479            for (id, entry) in entries {
480                lock.tools.insert(id, entry);
481            }
482            lock
483        }
484    }
485
486    proptest! {
487        /// Round-trip through TOML must be lossless on all stability fields.
488        #[test]
489        fn round_trip_preserves_all_fields(lock in arb_lockfile()) {
490            let serialized = lock.to_toml_string().expect("serialize");
491            let deserialized = Lockfile::from_toml_str(&serialized).expect("deserialize");
492
493            prop_assert_eq!(lock.version, deserialized.version);
494            prop_assert_eq!(lock.tools.len(), deserialized.tools.len());
495
496            for (id, orig) in &lock.tools {
497                let restored = deserialized.tools.get(id).expect("tool present after round-trip");
498                prop_assert_eq!(&orig.tool_id, &restored.tool_id);
499                prop_assert_eq!(&orig.version, &restored.version);
500                prop_assert_eq!(&orig.image_reference, &restored.image_reference);
501                prop_assert_eq!(&orig.image_digest, &restored.image_digest);
502                prop_assert_eq!(&orig.manifest_sha256, &restored.manifest_sha256);
503                prop_assert_eq!(orig.image_size_bytes, restored.image_size_bytes);
504                prop_assert_eq!(orig.layers.len(), restored.layers.len());
505                for (la, lb) in orig.layers.iter().zip(restored.layers.iter()) {
506                    prop_assert_eq!(&la.digest, &lb.digest);
507                    prop_assert_eq!(la.size, lb.size);
508                }
509            }
510        }
511
512        /// Serialization is deterministic: calling to_toml_string twice gives identical bytes.
513        #[test]
514        fn serialization_is_deterministic(lock in arb_lockfile()) {
515            let s1 = lock.to_toml_string().expect("first serialize");
516            let s2 = lock.to_toml_string().expect("second serialize");
517            prop_assert_eq!(s1, s2);
518        }
519
520        /// Tool map keys appear in sorted (BTreeMap) order in the output.
521        #[test]
522        fn tool_keys_are_sorted(lock in arb_lockfile()) {
523            if lock.tools.len() < 2 { return Ok(()); }
524            let s = lock.to_toml_string().expect("serialize");
525            let keys: Vec<&str> = lock.tools.keys().map(|k| k.as_str()).collect();
526            let positions: Vec<usize> = keys
527                .iter()
528                .filter_map(|k| s.find(&format!("\"{k}\"")))
529                .collect();
530            prop_assert_eq!(positions.len(), lock.tools.len(), "all keys present");
531            let mut sorted = positions.clone();
532            sorted.sort_unstable();
533            prop_assert_eq!(positions, sorted, "keys appear in sorted order");
534        }
535
536        /// No floating-point values appear in the serialized output.
537        #[test]
538        fn no_floats_in_output(lock in arb_lockfile()) {
539            let s = lock.to_toml_string().expect("serialize");
540            let has_float = s.lines().any(|line| {
541                let line = line.trim();
542                if line.contains('"') || line.contains('T') { return false; }
543                if let Some(rhs) = line.split_once('=').map(|(_, v)| v.trim()) {
544                    return rhs.starts_with(|c: char| c.is_ascii_digit()) && rhs.contains('.');
545                }
546                false
547            });
548            prop_assert!(!has_float, "float found in lockfile output:\n{s}");
549        }
550
551        /// Timestamps must be UTC ISO-8601 strings, not bare integers.
552        #[test]
553        fn timestamps_are_iso8601_utc(lock in arb_lockfile()) {
554            let s = lock.to_toml_string().expect("serialize");
555            for key in ["resolved_at", "generated_at"] {
556                if let Some(line) = s.lines().find(|l| l.contains(key)) {
557                    prop_assert!(
558                        line.contains('Z') || line.contains("+00:00"),
559                        "timestamp not UTC: {line}"
560                    );
561                }
562            }
563        }
564    }
565}