Skip to main content

bv_core/
lockfile.rs

1use std::collections::BTreeMap;
2
3use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5
6use crate::error::{BvError, Result};
7
8pub const LOCKFILE_FORMAT_VERSION: u32 = 1;
9
10pub type BinaryIndex = BTreeMap<String, String>;
11
12// SpecKind
13
14#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
15#[serde(rename_all = "snake_case")]
16pub enum SpecKind {
17    /// Single squashed image as pulled from biocontainers / a legacy registry.
18    #[default]
19    LegacyImage,
20    /// Factored OCI image where each conda package is its own layer.
21    FactoredOci,
22}
23
24// CondaPackagePin
25
26/// Exact conda package that a layer was built from.
27#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
28pub struct CondaPackagePin {
29    pub name: String,
30    pub version: String,
31    pub build: String,
32    pub channel: String,
33    /// sha256 of the .conda / .tar.bz2 archive (hex, no prefix).
34    pub sha256: String,
35}
36
37// LayerDescriptor
38
39/// One OCI layer entry in a lockfile tool record.
40#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
41pub struct LayerDescriptor {
42    /// Content digest of the compressed layer blob (e.g. `sha256:abc...`).
43    pub digest: String,
44    pub size: u64,
45    pub media_type: String,
46    /// Present only for `factored_oci` layers that correspond to a single conda package.
47    #[serde(default, skip_serializing_if = "Option::is_none")]
48    pub conda_package: Option<CondaPackagePin>,
49}
50
51impl LayerDescriptor {
52    pub fn new_zstd(digest: impl Into<String>, size: u64) -> Self {
53        Self {
54            digest: digest.into(),
55            size,
56            media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
57            conda_package: None,
58        }
59    }
60
61    pub fn new_gzip(digest: impl Into<String>, size: u64) -> Self {
62        Self {
63            digest: digest.into(),
64            size,
65            media_type: "application/vnd.oci.image.layer.v1.tar+gzip".into(),
66            conda_package: None,
67        }
68    }
69}
70
71// ReferenceDataPin
72
73/// Per-dataset pin stored inside a lockfile entry.
74#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct ReferenceDataPin {
76    pub id: String,
77    pub version: String,
78    pub sha256: String,
79}
80
81// LockfileEntry
82
83/// One resolved tool entry in `bv.lock`.
84///
85/// Stability fields used by `bv lock --check` to detect drift:
86/// `tool_id`, `version`, `image_digest`, `manifest_sha256`,
87/// and the `digest` of every layer for `factored_oci` entries.
88/// Timestamps and sizes are informational only.
89#[derive(Debug, Clone, Serialize, Deserialize)]
90#[serde(deny_unknown_fields)]
91pub struct LockfileEntry {
92    pub tool_id: String,
93    /// Version requirement as declared in `bv.toml` (e.g. `=2.14.0`, `^2`, or `*`).
94    #[serde(default, skip_serializing_if = "String::is_empty")]
95    pub declared_version_req: String,
96    /// Resolved semver (e.g. `2.14.0`).
97    pub version: String,
98    /// How the image was built; drives the pull path and layer verification strategy.
99    #[serde(default, skip_serializing_if = "SpecKind::is_legacy")]
100    pub spec_kind: SpecKind,
101    /// Canonical OCI reference from the manifest (e.g. `ncbi/blast:2.14.0`).
102    pub image_reference: String,
103    /// Content digest of the pulled image (e.g. `sha256:abc123...`).
104    pub image_digest: String,
105    /// SHA-256 of the manifest TOML at resolve time; used for drift detection.
106    #[serde(default, skip_serializing_if = "String::is_empty")]
107    pub manifest_sha256: String,
108    pub image_size_bytes: Option<u64>,
109    /// Per-layer descriptors (ordered as they appear in the OCI manifest).
110    /// Empty for `legacy_image` entries.
111    #[serde(default, skip_serializing_if = "Vec::is_empty")]
112    pub layers: Vec<LayerDescriptor>,
113    pub resolved_at: DateTime<Utc>,
114    #[serde(default)]
115    pub reference_data_pins: BTreeMap<String, ReferenceDataPin>,
116    /// Binary names this tool contributes to the binary index.
117    #[serde(default, skip_serializing_if = "Vec::is_empty")]
118    pub binaries: Vec<String>,
119}
120
121impl SpecKind {
122    pub fn is_legacy(&self) -> bool {
123        matches!(self, SpecKind::LegacyImage)
124    }
125}
126
127impl LockfileEntry {
128    /// True when two entries represent the same resolved state.
129    /// Ignores timestamps, sizes, and declared_version_req.
130    /// For `factored_oci` entries, all layer digests must also match.
131    pub fn is_equivalent(&self, other: &Self) -> bool {
132        if self.tool_id != other.tool_id
133            || self.version != other.version
134            || self.image_digest != other.image_digest
135        {
136            return false;
137        }
138        if !self.manifest_sha256.is_empty()
139            && !other.manifest_sha256.is_empty()
140            && self.manifest_sha256 != other.manifest_sha256
141        {
142            return false;
143        }
144        if self.layers.len() != other.layers.len() {
145            return false;
146        }
147        self.layers
148            .iter()
149            .zip(other.layers.iter())
150            .all(|(a, b)| a.digest == b.digest)
151    }
152}
153
154// LockfileMetadata
155
156/// Informational metadata written to `bv.lock` by `bv lock`.
157#[derive(Debug, Clone, Serialize, Deserialize)]
158pub struct LockfileMetadata {
159    pub bv_version: String,
160    pub generated_at: DateTime<Utc>,
161    #[serde(skip_serializing_if = "Option::is_none")]
162    pub hardware_summary: Option<String>,
163}
164
165impl Default for LockfileMetadata {
166    fn default() -> Self {
167        Self {
168            bv_version: env!("CARGO_PKG_VERSION").to_string(),
169            generated_at: Utc::now(),
170            hardware_summary: None,
171        }
172    }
173}
174
175// Lockfile
176
177/// The full `bv.lock` file.
178///
179/// Format is stable: `bv lock --check` fails if the generated lockfile
180/// would differ from the on-disk one on any stability field.
181#[derive(Debug, Clone, Serialize, Deserialize)]
182#[serde(deny_unknown_fields)]
183pub struct Lockfile {
184    pub version: u32,
185    #[serde(default)]
186    pub metadata: LockfileMetadata,
187    #[serde(default)]
188    pub tools: BTreeMap<String, LockfileEntry>,
189    /// Derived routing table: binary name -> tool id.
190    /// Rebuilt by `rebuild_binary_index` whenever tools change.
191    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
192    pub binary_index: BinaryIndex,
193}
194
195impl Lockfile {
196    pub fn new() -> Self {
197        Self {
198            version: LOCKFILE_FORMAT_VERSION,
199            metadata: LockfileMetadata::default(),
200            tools: BTreeMap::new(),
201            binary_index: BTreeMap::new(),
202        }
203    }
204
205    pub fn from_toml_str(s: &str) -> Result<Self> {
206        let lockfile: Self =
207            toml::from_str(s).map_err(|e| BvError::LockfileParse(e.to_string()))?;
208        if lockfile.version > LOCKFILE_FORMAT_VERSION {
209            return Err(BvError::LockfileParse(format!(
210                "bv.lock uses format version {}, but this bv only supports up to version {}.\n\
211                Upgrade bv: curl -fsSL https://raw.githubusercontent.com/tejasprabhune/bv/main/install.sh | sh",
212                lockfile.version, LOCKFILE_FORMAT_VERSION
213            )));
214        }
215        Ok(lockfile)
216    }
217
218    pub fn to_toml_string(&self) -> Result<String> {
219        toml::to_string_pretty(self).map_err(|e| BvError::LockfileParse(e.to_string()))
220    }
221
222    /// Rebuild `binary_index` from each tool's `binaries` list.
223    ///
224    /// `overrides` maps binary name to the tool id that wins when two tools
225    /// expose the same name. Without an override, a collision returns `Err`.
226    pub fn rebuild_binary_index(
227        &mut self,
228        overrides: &BTreeMap<String, String>,
229    ) -> std::result::Result<(), String> {
230        let mut index: BinaryIndex = BTreeMap::new();
231        let mut collisions: Vec<String> = Vec::new();
232
233        let mut sorted: Vec<_> = self.tools.iter().collect();
234        sorted.sort_by_key(|(id, _)| id.as_str());
235
236        for (tool_id, entry) in &sorted {
237            for binary in &entry.binaries {
238                if let Some(winner) = overrides.get(binary) {
239                    index.insert(binary.clone(), winner.clone());
240                } else if let Some(existing) = index.insert(binary.clone(), tool_id.to_string())
241                    && existing != tool_id.as_str()
242                {
243                    collisions.push(format!(
244                        "'{binary}' exposed by both '{existing}' and '{tool_id}'"
245                    ));
246                    index.insert(binary.clone(), existing);
247                }
248            }
249        }
250
251        if !collisions.is_empty() {
252            return Err(collisions.join(", "));
253        }
254        self.binary_index = index;
255        Ok(())
256    }
257
258    /// True when both lockfiles describe the same set of tools at the same
259    /// resolved versions and digests.
260    pub fn is_equivalent_to(&self, other: &Self) -> bool {
261        if self.tools.len() != other.tools.len() {
262            return false;
263        }
264        for (id, entry) in &self.tools {
265            match other.tools.get(id) {
266                Some(other_entry) => {
267                    if !entry.is_equivalent(other_entry) {
268                        return false;
269                    }
270                }
271                None => return false,
272            }
273        }
274        true
275    }
276}
277
278impl Default for Lockfile {
279    fn default() -> Self {
280        Self::new()
281    }
282}
283
284#[cfg(test)]
285mod tests {
286    use super::*;
287
288    fn entry(id: &str, version: &str, digest: &str) -> LockfileEntry {
289        LockfileEntry {
290            tool_id: id.to_string(),
291            declared_version_req: String::new(),
292            version: version.to_string(),
293            spec_kind: SpecKind::LegacyImage,
294            image_reference: format!("registry/{id}:{version}"),
295            image_digest: digest.to_string(),
296            manifest_sha256: format!("sha256:m-{id}"),
297            image_size_bytes: None,
298            layers: vec![],
299            resolved_at: chrono::DateTime::<chrono::Utc>::from_timestamp(1700000000, 0).unwrap(),
300            reference_data_pins: BTreeMap::new(),
301            binaries: vec![format!("{id}-bin")],
302        }
303    }
304
305    fn factored_entry(id: &str) -> LockfileEntry {
306        LockfileEntry {
307            tool_id: id.to_string(),
308            declared_version_req: "=1.0.0".into(),
309            version: "1.0.0".into(),
310            spec_kind: SpecKind::FactoredOci,
311            image_reference: format!("registry/{id}:1.0.0"),
312            image_digest: format!("sha256:img-{id}"),
313            manifest_sha256: format!("sha256:man-{id}"),
314            image_size_bytes: None,
315            layers: vec![
316                LayerDescriptor {
317                    digest: "sha256:shared-openssl".into(),
318                    size: 10_000_000,
319                    media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
320                    conda_package: Some(CondaPackagePin {
321                        name: "openssl".into(),
322                        version: "3.2.1".into(),
323                        build: "h0_0".into(),
324                        channel: "conda-forge".into(),
325                        sha256: "abcd".into(),
326                    }),
327                },
328                LayerDescriptor {
329                    digest: format!("sha256:pkg-{id}"),
330                    size: 20_000_000,
331                    media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
332                    conda_package: None,
333                },
334            ],
335            resolved_at: chrono::DateTime::<chrono::Utc>::from_timestamp(1700000000, 0).unwrap(),
336            reference_data_pins: BTreeMap::new(),
337            binaries: vec![id.to_string()],
338        }
339    }
340
341    /// Regression: lockfile serialization must be byte-deterministic so
342    /// `bv lock --check` can compare against the on-disk file.
343    #[test]
344    fn to_toml_string_is_deterministic() {
345        let mut lock = Lockfile::new();
346        for id in ["zebra", "alpha", "mango", "beta", "tango"] {
347            lock.tools.insert(
348                id.to_string(),
349                entry(id, "1.0.0", &format!("sha256:d-{id}")),
350            );
351            lock.binary_index
352                .insert(format!("{id}-bin"), id.to_string());
353        }
354
355        let s1 = lock.to_toml_string().unwrap();
356        for _ in 0..32 {
357            assert_eq!(
358                s1,
359                lock.to_toml_string().unwrap(),
360                "non-deterministic output"
361            );
362        }
363        // Tools must appear in lexicographic order.
364        let alpha = s1.find("\"alpha\"").unwrap();
365        let beta = s1.find("\"beta\"").unwrap();
366        let mango = s1.find("\"mango\"").unwrap();
367        let tango = s1.find("\"tango\"").unwrap();
368        let zebra = s1.find("\"zebra\"").unwrap();
369        assert!(alpha < beta && beta < mango && mango < tango && tango < zebra);
370    }
371
372    #[test]
373    fn spec_kind_legacy_is_skipped_in_serialization() {
374        let mut lock = Lockfile::new();
375        lock.tools
376            .insert("tool".into(), entry("tool", "1.0.0", "sha256:abc"));
377        let s = lock.to_toml_string().unwrap();
378        // Legacy entries must not emit spec_kind to keep backward compat.
379        assert!(
380            !s.contains("spec_kind"),
381            "legacy entries must not emit spec_kind: {s}"
382        );
383    }
384
385    #[test]
386    fn factored_entry_round_trips() {
387        let mut lock = Lockfile::new();
388        lock.tools
389            .insert("samtools".into(), factored_entry("samtools"));
390        let s = lock.to_toml_string().unwrap();
391        let back = Lockfile::from_toml_str(&s).unwrap();
392        let e = &back.tools["samtools"];
393        assert_eq!(e.spec_kind, SpecKind::FactoredOci);
394        assert_eq!(e.layers.len(), 2);
395        assert_eq!(e.layers[0].conda_package.as_ref().unwrap().name, "openssl");
396    }
397
398    #[test]
399    fn is_equivalent_checks_layer_digests() {
400        let a = factored_entry("samtools");
401        let mut b = a.clone();
402        b.layers[0].digest = "sha256:different".into();
403        assert!(!a.is_equivalent(&b));
404    }
405
406    #[test]
407    fn rejects_future_format_version() {
408        let toml = r#"
409version = 99
410
411[metadata]
412bv_version = "0.0.0"
413generated_at = "2024-01-01T00:00:00Z"
414"#;
415        let err = Lockfile::from_toml_str(toml).unwrap_err();
416        let msg = err.to_string();
417        assert!(
418            msg.contains("format version"),
419            "expected 'format version' in error: {msg}"
420        );
421    }
422
423    #[test]
424    fn is_equivalent_ignores_timestamps() {
425        let a = factored_entry("samtools");
426        let mut b = a.clone();
427        b.resolved_at = chrono::DateTime::<chrono::Utc>::from_timestamp(1_800_000_000, 0).unwrap();
428        assert!(a.is_equivalent(&b));
429    }
430}
431
432#[cfg(test)]
433mod prop_tests {
434    use proptest::prelude::*;
435
436    use super::*;
437
438    fn arb_tool_id() -> impl Strategy<Value = String> {
439        "[a-z][a-z0-9_-]{1,15}".prop_map(|s| s)
440    }
441
442    fn arb_digest() -> impl Strategy<Value = String> {
443        "[0-9a-f]{64}".prop_map(|hex| format!("sha256:{hex}"))
444    }
445
446    fn arb_version() -> impl Strategy<Value = String> {
447        (0u32..20, 0u32..20, 0u32..20).prop_map(|(a, b, c)| format!("{a}.{b}.{c}"))
448    }
449
450    fn arb_layer() -> impl Strategy<Value = LayerDescriptor> {
451        (arb_digest(), 0u64..10_000_000u64).prop_map(|(digest, size)| LayerDescriptor {
452            digest,
453            size,
454            media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
455            conda_package: None,
456        })
457    }
458
459    prop_compose! {
460        fn arb_entry()(
461            id in arb_tool_id(),
462            version in arb_version(),
463            digest in arb_digest(),
464            manifest_sha256 in arb_digest(),
465            size in proptest::option::of(0u64..10_000_000_000u64),
466            layers in proptest::collection::vec(arb_layer(), 0..6),
467        ) -> (String, LockfileEntry) {
468            let spec_kind = if layers.is_empty() { SpecKind::LegacyImage } else { SpecKind::FactoredOci };
469            let entry = LockfileEntry {
470                tool_id: id.clone(),
471                declared_version_req: format!("={version}"),
472                version: version.clone(),
473                spec_kind,
474                image_reference: format!("registry/{id}:{version}"),
475                image_digest: digest,
476                manifest_sha256,
477                image_size_bytes: size,
478                layers,
479                resolved_at: chrono::DateTime::<chrono::Utc>::from_timestamp(1700000000, 0).unwrap(),
480                reference_data_pins: BTreeMap::new(),
481                binaries: vec![id.clone()],
482            };
483            (id, entry)
484        }
485    }
486
487    prop_compose! {
488        fn arb_lockfile()(
489            entries in proptest::collection::vec(arb_entry(), 0..10),
490        ) -> Lockfile {
491            let mut lock = Lockfile::new();
492            for (id, entry) in entries {
493                lock.tools.insert(id, entry);
494            }
495            lock
496        }
497    }
498
499    proptest! {
500        /// Round-trip through TOML must be lossless on all stability fields.
501        #[test]
502        fn round_trip_preserves_all_fields(lock in arb_lockfile()) {
503            let serialized = lock.to_toml_string().expect("serialize");
504            let deserialized = Lockfile::from_toml_str(&serialized).expect("deserialize");
505
506            prop_assert_eq!(lock.version, deserialized.version);
507            prop_assert_eq!(lock.tools.len(), deserialized.tools.len());
508
509            for (id, orig) in &lock.tools {
510                let restored = deserialized.tools.get(id).expect("tool present after round-trip");
511                prop_assert_eq!(&orig.tool_id, &restored.tool_id);
512                prop_assert_eq!(&orig.version, &restored.version);
513                prop_assert_eq!(&orig.image_reference, &restored.image_reference);
514                prop_assert_eq!(&orig.image_digest, &restored.image_digest);
515                prop_assert_eq!(&orig.manifest_sha256, &restored.manifest_sha256);
516                prop_assert_eq!(orig.image_size_bytes, restored.image_size_bytes);
517                prop_assert_eq!(orig.layers.len(), restored.layers.len());
518                for (la, lb) in orig.layers.iter().zip(restored.layers.iter()) {
519                    prop_assert_eq!(&la.digest, &lb.digest);
520                    prop_assert_eq!(la.size, lb.size);
521                }
522            }
523        }
524
525        /// Serialization is deterministic: calling to_toml_string twice gives identical bytes.
526        #[test]
527        fn serialization_is_deterministic(lock in arb_lockfile()) {
528            let s1 = lock.to_toml_string().expect("first serialize");
529            let s2 = lock.to_toml_string().expect("second serialize");
530            prop_assert_eq!(s1, s2);
531        }
532
533        /// Tool map keys appear in sorted (BTreeMap) order in the output.
534        #[test]
535        fn tool_keys_are_sorted(lock in arb_lockfile()) {
536            if lock.tools.len() < 2 { return Ok(()); }
537            let s = lock.to_toml_string().expect("serialize");
538            let keys: Vec<&str> = lock.tools.keys().map(|k| k.as_str()).collect();
539            let positions: Vec<usize> = keys
540                .iter()
541                .filter_map(|k| s.find(&format!("\"{k}\"")))
542                .collect();
543            prop_assert_eq!(positions.len(), lock.tools.len(), "all keys present");
544            let mut sorted = positions.clone();
545            sorted.sort_unstable();
546            prop_assert_eq!(positions, sorted, "keys appear in sorted order");
547        }
548
549        /// No floating-point values appear in the serialized output.
550        #[test]
551        fn no_floats_in_output(lock in arb_lockfile()) {
552            let s = lock.to_toml_string().expect("serialize");
553            let has_float = s.lines().any(|line| {
554                let line = line.trim();
555                if line.contains('"') || line.contains('T') { return false; }
556                if let Some(rhs) = line.split_once('=').map(|(_, v)| v.trim()) {
557                    return rhs.starts_with(|c: char| c.is_ascii_digit()) && rhs.contains('.');
558                }
559                false
560            });
561            prop_assert!(!has_float, "float found in lockfile output:\n{s}");
562        }
563
564        /// Timestamps must be UTC ISO-8601 strings, not bare integers.
565        #[test]
566        fn timestamps_are_iso8601_utc(lock in arb_lockfile()) {
567            let s = lock.to_toml_string().expect("serialize");
568            for key in ["resolved_at", "generated_at"] {
569                if let Some(line) = s.lines().find(|l| l.contains(key)) {
570                    prop_assert!(
571                        line.contains('Z') || line.contains("+00:00"),
572                        "timestamp not UTC: {line}"
573                    );
574                }
575            }
576        }
577    }
578}