Skip to main content

bv_core/
lockfile.rs

1use std::collections::BTreeMap;
2
3use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5
6use crate::error::{BvError, Result};
7
8pub type BinaryIndex = BTreeMap<String, String>;
9
10// SpecKind
11
12#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
13#[serde(rename_all = "snake_case")]
14pub enum SpecKind {
15    /// Single squashed image as pulled from biocontainers / a legacy registry.
16    #[default]
17    LegacyImage,
18    /// Factored OCI image where each conda package is its own layer.
19    FactoredOci,
20}
21
22// CondaPackagePin
23
24/// Exact conda package that a layer was built from.
25#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
26pub struct CondaPackagePin {
27    pub name: String,
28    pub version: String,
29    pub build: String,
30    pub channel: String,
31    /// sha256 of the .conda / .tar.bz2 archive (hex, no prefix).
32    pub sha256: String,
33}
34
35// LayerDescriptor
36
37/// One OCI layer entry in a lockfile tool record.
38#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
39pub struct LayerDescriptor {
40    /// Content digest of the compressed layer blob (e.g. `sha256:abc...`).
41    pub digest: String,
42    pub size: u64,
43    pub media_type: String,
44    /// Present only for `factored_oci` layers that correspond to a single conda package.
45    #[serde(default, skip_serializing_if = "Option::is_none")]
46    pub conda_package: Option<CondaPackagePin>,
47}
48
49impl LayerDescriptor {
50    pub fn new_zstd(digest: impl Into<String>, size: u64) -> Self {
51        Self {
52            digest: digest.into(),
53            size,
54            media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
55            conda_package: None,
56        }
57    }
58
59    pub fn new_gzip(digest: impl Into<String>, size: u64) -> Self {
60        Self {
61            digest: digest.into(),
62            size,
63            media_type: "application/vnd.oci.image.layer.v1.tar+gzip".into(),
64            conda_package: None,
65        }
66    }
67}
68
69// ReferenceDataPin
70
71/// Per-dataset pin stored inside a lockfile entry.
72#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct ReferenceDataPin {
74    pub id: String,
75    pub version: String,
76    pub sha256: String,
77}
78
79// LockfileEntry
80
81/// One resolved tool entry in `bv.lock`.
82///
83/// Stability fields used by `bv lock --check` to detect drift:
84/// `tool_id`, `version`, `image_digest`, `manifest_sha256`,
85/// and the `digest` of every layer for `factored_oci` entries.
86/// Timestamps and sizes are informational only.
87#[derive(Debug, Clone, Serialize, Deserialize)]
88#[serde(deny_unknown_fields)]
89pub struct LockfileEntry {
90    pub tool_id: String,
91    /// Version requirement as declared in `bv.toml` (e.g. `=2.14.0`, `^2`, or `*`).
92    #[serde(default, skip_serializing_if = "String::is_empty")]
93    pub declared_version_req: String,
94    /// Resolved semver (e.g. `2.14.0`).
95    pub version: String,
96    /// How the image was built; drives the pull path and layer verification strategy.
97    #[serde(default, skip_serializing_if = "SpecKind::is_legacy")]
98    pub spec_kind: SpecKind,
99    /// Canonical OCI reference from the manifest (e.g. `ncbi/blast:2.14.0`).
100    pub image_reference: String,
101    /// Content digest of the pulled image (e.g. `sha256:abc123...`).
102    pub image_digest: String,
103    /// SHA-256 of the manifest TOML at resolve time; used for drift detection.
104    #[serde(default, skip_serializing_if = "String::is_empty")]
105    pub manifest_sha256: String,
106    pub image_size_bytes: Option<u64>,
107    /// Per-layer descriptors (ordered as they appear in the OCI manifest).
108    /// Empty for `legacy_image` entries.
109    #[serde(default, skip_serializing_if = "Vec::is_empty")]
110    pub layers: Vec<LayerDescriptor>,
111    pub resolved_at: DateTime<Utc>,
112    #[serde(default)]
113    pub reference_data_pins: BTreeMap<String, ReferenceDataPin>,
114    /// Binary names this tool contributes to the binary index.
115    #[serde(default, skip_serializing_if = "Vec::is_empty")]
116    pub binaries: Vec<String>,
117}
118
119impl SpecKind {
120    pub fn is_legacy(&self) -> bool {
121        matches!(self, SpecKind::LegacyImage)
122    }
123}
124
125impl LockfileEntry {
126    /// True when two entries represent the same resolved state.
127    /// Ignores timestamps, sizes, and declared_version_req.
128    /// For `factored_oci` entries, all layer digests must also match.
129    pub fn is_equivalent(&self, other: &Self) -> bool {
130        if self.tool_id != other.tool_id
131            || self.version != other.version
132            || self.image_digest != other.image_digest
133        {
134            return false;
135        }
136        if !self.manifest_sha256.is_empty()
137            && !other.manifest_sha256.is_empty()
138            && self.manifest_sha256 != other.manifest_sha256
139        {
140            return false;
141        }
142        if self.layers.len() != other.layers.len() {
143            return false;
144        }
145        self.layers
146            .iter()
147            .zip(other.layers.iter())
148            .all(|(a, b)| a.digest == b.digest)
149    }
150}
151
152// LockfileMetadata
153
154/// Informational metadata written to `bv.lock` by `bv lock`.
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct LockfileMetadata {
157    pub bv_version: String,
158    pub generated_at: DateTime<Utc>,
159    #[serde(skip_serializing_if = "Option::is_none")]
160    pub hardware_summary: Option<String>,
161}
162
163impl Default for LockfileMetadata {
164    fn default() -> Self {
165        Self {
166            bv_version: env!("CARGO_PKG_VERSION").to_string(),
167            generated_at: Utc::now(),
168            hardware_summary: None,
169        }
170    }
171}
172
173// Lockfile
174
175/// The full `bv.lock` file.
176///
177/// Format is stable: `bv lock --check` fails if the generated lockfile
178/// would differ from the on-disk one on any stability field.
179#[derive(Debug, Clone, Serialize, Deserialize)]
180#[serde(deny_unknown_fields)]
181pub struct Lockfile {
182    pub version: u32,
183    #[serde(default)]
184    pub metadata: LockfileMetadata,
185    #[serde(default)]
186    pub tools: BTreeMap<String, LockfileEntry>,
187    /// Derived routing table: binary name -> tool id.
188    /// Rebuilt by `rebuild_binary_index` whenever tools change.
189    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
190    pub binary_index: BinaryIndex,
191}
192
193impl Lockfile {
194    pub fn new() -> Self {
195        Self {
196            version: 1,
197            metadata: LockfileMetadata::default(),
198            tools: BTreeMap::new(),
199            binary_index: BTreeMap::new(),
200        }
201    }
202
203    pub fn from_toml_str(s: &str) -> Result<Self> {
204        toml::from_str(s).map_err(|e| BvError::LockfileParse(e.to_string()))
205    }
206
207    pub fn to_toml_string(&self) -> Result<String> {
208        toml::to_string_pretty(self).map_err(|e| BvError::LockfileParse(e.to_string()))
209    }
210
211    /// Rebuild `binary_index` from each tool's `binaries` list.
212    ///
213    /// `overrides` maps binary name to the tool id that wins when two tools
214    /// expose the same name. Without an override, a collision returns `Err`.
215    pub fn rebuild_binary_index(
216        &mut self,
217        overrides: &BTreeMap<String, String>,
218    ) -> std::result::Result<(), String> {
219        let mut index: BinaryIndex = BTreeMap::new();
220        let mut collisions: Vec<String> = Vec::new();
221
222        let mut sorted: Vec<_> = self.tools.iter().collect();
223        sorted.sort_by_key(|(id, _)| id.as_str());
224
225        for (tool_id, entry) in &sorted {
226            for binary in &entry.binaries {
227                if let Some(winner) = overrides.get(binary) {
228                    index.insert(binary.clone(), winner.clone());
229                } else if let Some(existing) = index.insert(binary.clone(), tool_id.to_string())
230                    && existing != tool_id.as_str()
231                {
232                    collisions.push(format!(
233                        "'{binary}' exposed by both '{existing}' and '{tool_id}'"
234                    ));
235                    index.insert(binary.clone(), existing);
236                }
237            }
238        }
239
240        if !collisions.is_empty() {
241            return Err(collisions.join(", "));
242        }
243        self.binary_index = index;
244        Ok(())
245    }
246
247    /// True when both lockfiles describe the same set of tools at the same
248    /// resolved versions and digests.
249    pub fn is_equivalent_to(&self, other: &Self) -> bool {
250        if self.tools.len() != other.tools.len() {
251            return false;
252        }
253        for (id, entry) in &self.tools {
254            match other.tools.get(id) {
255                Some(other_entry) => {
256                    if !entry.is_equivalent(other_entry) {
257                        return false;
258                    }
259                }
260                None => return false,
261            }
262        }
263        true
264    }
265}
266
267impl Default for Lockfile {
268    fn default() -> Self {
269        Self::new()
270    }
271}
272
273#[cfg(test)]
274mod tests {
275    use super::*;
276
277    fn entry(id: &str, version: &str, digest: &str) -> LockfileEntry {
278        LockfileEntry {
279            tool_id: id.to_string(),
280            declared_version_req: String::new(),
281            version: version.to_string(),
282            spec_kind: SpecKind::LegacyImage,
283            image_reference: format!("registry/{id}:{version}"),
284            image_digest: digest.to_string(),
285            manifest_sha256: format!("sha256:m-{id}"),
286            image_size_bytes: None,
287            layers: vec![],
288            resolved_at: chrono::DateTime::<chrono::Utc>::from_timestamp(1700000000, 0).unwrap(),
289            reference_data_pins: BTreeMap::new(),
290            binaries: vec![format!("{id}-bin")],
291        }
292    }
293
294    fn factored_entry(id: &str) -> LockfileEntry {
295        LockfileEntry {
296            tool_id: id.to_string(),
297            declared_version_req: "=1.0.0".into(),
298            version: "1.0.0".into(),
299            spec_kind: SpecKind::FactoredOci,
300            image_reference: format!("registry/{id}:1.0.0"),
301            image_digest: format!("sha256:img-{id}"),
302            manifest_sha256: format!("sha256:man-{id}"),
303            image_size_bytes: None,
304            layers: vec![
305                LayerDescriptor {
306                    digest: "sha256:shared-openssl".into(),
307                    size: 10_000_000,
308                    media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
309                    conda_package: Some(CondaPackagePin {
310                        name: "openssl".into(),
311                        version: "3.2.1".into(),
312                        build: "h0_0".into(),
313                        channel: "conda-forge".into(),
314                        sha256: "abcd".into(),
315                    }),
316                },
317                LayerDescriptor {
318                    digest: format!("sha256:pkg-{id}"),
319                    size: 20_000_000,
320                    media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
321                    conda_package: None,
322                },
323            ],
324            resolved_at: chrono::DateTime::<chrono::Utc>::from_timestamp(1700000000, 0).unwrap(),
325            reference_data_pins: BTreeMap::new(),
326            binaries: vec![id.to_string()],
327        }
328    }
329
330    /// Regression: lockfile serialization must be byte-deterministic so
331    /// `bv lock --check` can compare against the on-disk file.
332    #[test]
333    fn to_toml_string_is_deterministic() {
334        let mut lock = Lockfile::new();
335        for id in ["zebra", "alpha", "mango", "beta", "tango"] {
336            lock.tools.insert(
337                id.to_string(),
338                entry(id, "1.0.0", &format!("sha256:d-{id}")),
339            );
340            lock.binary_index
341                .insert(format!("{id}-bin"), id.to_string());
342        }
343
344        let s1 = lock.to_toml_string().unwrap();
345        for _ in 0..32 {
346            assert_eq!(s1, lock.to_toml_string().unwrap(), "non-deterministic output");
347        }
348        // Tools must appear in lexicographic order.
349        let alpha = s1.find("\"alpha\"").unwrap();
350        let beta = s1.find("\"beta\"").unwrap();
351        let mango = s1.find("\"mango\"").unwrap();
352        let tango = s1.find("\"tango\"").unwrap();
353        let zebra = s1.find("\"zebra\"").unwrap();
354        assert!(alpha < beta && beta < mango && mango < tango && tango < zebra);
355    }
356
357    #[test]
358    fn spec_kind_legacy_is_skipped_in_serialization() {
359        let mut lock = Lockfile::new();
360        lock.tools.insert("tool".into(), entry("tool", "1.0.0", "sha256:abc"));
361        let s = lock.to_toml_string().unwrap();
362        // Legacy entries must not emit spec_kind to keep backward compat.
363        assert!(!s.contains("spec_kind"), "legacy entries must not emit spec_kind: {s}");
364    }
365
366    #[test]
367    fn factored_entry_round_trips() {
368        let mut lock = Lockfile::new();
369        lock.tools.insert("samtools".into(), factored_entry("samtools"));
370        let s = lock.to_toml_string().unwrap();
371        let back = Lockfile::from_toml_str(&s).unwrap();
372        let e = &back.tools["samtools"];
373        assert_eq!(e.spec_kind, SpecKind::FactoredOci);
374        assert_eq!(e.layers.len(), 2);
375        assert_eq!(e.layers[0].conda_package.as_ref().unwrap().name, "openssl");
376    }
377
378    #[test]
379    fn is_equivalent_checks_layer_digests() {
380        let a = factored_entry("samtools");
381        let mut b = a.clone();
382        b.layers[0].digest = "sha256:different".into();
383        assert!(!a.is_equivalent(&b));
384    }
385
386    #[test]
387    fn is_equivalent_ignores_timestamps() {
388        let a = factored_entry("samtools");
389        let mut b = a.clone();
390        b.resolved_at = chrono::DateTime::<chrono::Utc>::from_timestamp(1_800_000_000, 0).unwrap();
391        assert!(a.is_equivalent(&b));
392    }
393}
394
395#[cfg(test)]
396mod prop_tests {
397    use proptest::prelude::*;
398
399    use super::*;
400
401    fn arb_tool_id() -> impl Strategy<Value = String> {
402        "[a-z][a-z0-9_-]{1,15}".prop_map(|s| s)
403    }
404
405    fn arb_digest() -> impl Strategy<Value = String> {
406        "[0-9a-f]{64}".prop_map(|hex| format!("sha256:{hex}"))
407    }
408
409    fn arb_version() -> impl Strategy<Value = String> {
410        (0u32..20, 0u32..20, 0u32..20).prop_map(|(a, b, c)| format!("{a}.{b}.{c}"))
411    }
412
413    fn arb_layer() -> impl Strategy<Value = LayerDescriptor> {
414        (arb_digest(), 0u64..10_000_000u64).prop_map(|(digest, size)| LayerDescriptor {
415            digest,
416            size,
417            media_type: "application/vnd.oci.image.layer.v1.tar+zstd".into(),
418            conda_package: None,
419        })
420    }
421
422    prop_compose! {
423        fn arb_entry()(
424            id in arb_tool_id(),
425            version in arb_version(),
426            digest in arb_digest(),
427            manifest_sha256 in arb_digest(),
428            size in proptest::option::of(0u64..10_000_000_000u64),
429            layers in proptest::collection::vec(arb_layer(), 0..6),
430        ) -> (String, LockfileEntry) {
431            let spec_kind = if layers.is_empty() { SpecKind::LegacyImage } else { SpecKind::FactoredOci };
432            let entry = LockfileEntry {
433                tool_id: id.clone(),
434                declared_version_req: format!("={version}"),
435                version: version.clone(),
436                spec_kind,
437                image_reference: format!("registry/{id}:{version}"),
438                image_digest: digest,
439                manifest_sha256,
440                image_size_bytes: size,
441                layers,
442                resolved_at: chrono::DateTime::<chrono::Utc>::from_timestamp(1700000000, 0).unwrap(),
443                reference_data_pins: BTreeMap::new(),
444                binaries: vec![id.clone()],
445            };
446            (id, entry)
447        }
448    }
449
450    prop_compose! {
451        fn arb_lockfile()(
452            entries in proptest::collection::vec(arb_entry(), 0..10),
453        ) -> Lockfile {
454            let mut lock = Lockfile::new();
455            for (id, entry) in entries {
456                lock.tools.insert(id, entry);
457            }
458            lock
459        }
460    }
461
462    proptest! {
463        /// Round-trip through TOML must be lossless on all stability fields.
464        #[test]
465        fn round_trip_preserves_all_fields(lock in arb_lockfile()) {
466            let serialized = lock.to_toml_string().expect("serialize");
467            let deserialized = Lockfile::from_toml_str(&serialized).expect("deserialize");
468
469            prop_assert_eq!(lock.version, deserialized.version);
470            prop_assert_eq!(lock.tools.len(), deserialized.tools.len());
471
472            for (id, orig) in &lock.tools {
473                let restored = deserialized.tools.get(id).expect("tool present after round-trip");
474                prop_assert_eq!(&orig.tool_id, &restored.tool_id);
475                prop_assert_eq!(&orig.version, &restored.version);
476                prop_assert_eq!(&orig.image_reference, &restored.image_reference);
477                prop_assert_eq!(&orig.image_digest, &restored.image_digest);
478                prop_assert_eq!(&orig.manifest_sha256, &restored.manifest_sha256);
479                prop_assert_eq!(orig.image_size_bytes, restored.image_size_bytes);
480                prop_assert_eq!(orig.layers.len(), restored.layers.len());
481                for (la, lb) in orig.layers.iter().zip(restored.layers.iter()) {
482                    prop_assert_eq!(&la.digest, &lb.digest);
483                    prop_assert_eq!(la.size, lb.size);
484                }
485            }
486        }
487
488        /// Serialization is deterministic: calling to_toml_string twice gives identical bytes.
489        #[test]
490        fn serialization_is_deterministic(lock in arb_lockfile()) {
491            let s1 = lock.to_toml_string().expect("first serialize");
492            let s2 = lock.to_toml_string().expect("second serialize");
493            prop_assert_eq!(s1, s2);
494        }
495
496        /// Tool map keys appear in sorted (BTreeMap) order in the output.
497        #[test]
498        fn tool_keys_are_sorted(lock in arb_lockfile()) {
499            if lock.tools.len() < 2 { return Ok(()); }
500            let s = lock.to_toml_string().expect("serialize");
501            let keys: Vec<&str> = lock.tools.keys().map(|k| k.as_str()).collect();
502            let positions: Vec<usize> = keys
503                .iter()
504                .filter_map(|k| s.find(&format!("\"{k}\"")))
505                .collect();
506            prop_assert_eq!(positions.len(), lock.tools.len(), "all keys present");
507            let mut sorted = positions.clone();
508            sorted.sort_unstable();
509            prop_assert_eq!(positions, sorted, "keys appear in sorted order");
510        }
511
512        /// No floating-point values appear in the serialized output.
513        #[test]
514        fn no_floats_in_output(lock in arb_lockfile()) {
515            let s = lock.to_toml_string().expect("serialize");
516            let has_float = s.lines().any(|line| {
517                let line = line.trim();
518                if line.contains('"') || line.contains('T') { return false; }
519                if let Some(rhs) = line.split_once('=').map(|(_, v)| v.trim()) {
520                    return rhs.starts_with(|c: char| c.is_ascii_digit()) && rhs.contains('.');
521                }
522                false
523            });
524            prop_assert!(!has_float, "float found in lockfile output:\n{s}");
525        }
526
527        /// Timestamps must be UTC ISO-8601 strings, not bare integers.
528        #[test]
529        fn timestamps_are_iso8601_utc(lock in arb_lockfile()) {
530            let s = lock.to_toml_string().expect("serialize");
531            for key in ["resolved_at", "generated_at"] {
532                if let Some(line) = s.lines().find(|l| l.contains(key)) {
533                    prop_assert!(
534                        line.contains('Z') || line.contains("+00:00"),
535                        "timestamp not UTC: {line}"
536                    );
537                }
538            }
539        }
540    }
541}