bv-core 0.1.17

Core types for biov: manifests, lockfile, cache layout, and error types
Documentation
use std::collections::BTreeMap;

use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};

use crate::error::{BvError, Result};

pub type BinaryIndex = BTreeMap<String, String>;

/// Per-dataset pin stored inside a lockfile entry.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReferenceDataPin {
    pub id: String,
    pub version: String,
    pub sha256: String,
}

/// One resolved tool entry in `bv.lock`.
///
/// Stability fields used by `bv lock --check` to detect drift:
/// `tool_id`, `version`, `image_digest`, `manifest_sha256`.
/// Timestamps and sizes are informational only.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct LockfileEntry {
    pub tool_id: String,
    /// Version requirement as declared in `bv.toml` (e.g. `=2.14.0`, `^2`, or `*`).
    #[serde(default, skip_serializing_if = "String::is_empty")]
    pub declared_version_req: String,
    /// Resolved semver (e.g. `2.14.0`).
    pub version: String,
    /// Canonical OCI reference from the manifest (e.g. `ncbi/blast:2.14.0`).
    pub image_reference: String,
    /// Content digest of the pulled image (e.g. `sha256:abc123...`).
    pub image_digest: String,
    /// SHA-256 of the manifest TOML at resolve time; used for drift detection.
    #[serde(default, skip_serializing_if = "String::is_empty")]
    pub manifest_sha256: String,
    pub image_size_bytes: Option<u64>,
    pub resolved_at: DateTime<Utc>,
    #[serde(default)]
    pub reference_data_pins: BTreeMap<String, ReferenceDataPin>,
    /// Binary names this tool contributes to the binary index.
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub binaries: Vec<String>,
}

impl LockfileEntry {
    /// True when two entries represent the same resolved state.
    /// Ignores timestamps, sizes, and declared_version_req.
    pub fn is_equivalent(&self, other: &Self) -> bool {
        self.tool_id == other.tool_id
            && self.version == other.version
            && self.image_digest == other.image_digest
            && (self.manifest_sha256.is_empty()
                || other.manifest_sha256.is_empty()
                || self.manifest_sha256 == other.manifest_sha256)
    }
}

/// Informational metadata written to `bv.lock` by `bv lock`.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LockfileMetadata {
    pub bv_version: String,
    pub generated_at: DateTime<Utc>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub hardware_summary: Option<String>,
}

impl Default for LockfileMetadata {
    fn default() -> Self {
        Self {
            bv_version: env!("CARGO_PKG_VERSION").to_string(),
            generated_at: Utc::now(),
            hardware_summary: None,
        }
    }
}

/// The full `bv.lock` file (schema version 1).
///
/// Format is stable: `bv lock --check` fails if the generated lockfile
/// would differ from the on-disk one on any stability field.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct Lockfile {
    /// Schema version; currently always `1`.
    pub version: u32,
    #[serde(default)]
    pub metadata: LockfileMetadata,
    #[serde(default)]
    pub tools: BTreeMap<String, LockfileEntry>,
    /// Derived routing table: binary name -> tool id.
    /// Rebuilt by `rebuild_binary_index` whenever tools change.
    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
    pub binary_index: BinaryIndex,
}

impl Lockfile {
    pub fn new() -> Self {
        Self {
            version: 1,
            metadata: LockfileMetadata::default(),
            tools: BTreeMap::new(),
            binary_index: BTreeMap::new(),
        }
    }

    pub fn from_toml_str(s: &str) -> Result<Self> {
        toml::from_str(s).map_err(|e| BvError::LockfileParse(e.to_string()))
    }

    pub fn to_toml_string(&self) -> Result<String> {
        toml::to_string_pretty(self).map_err(|e| BvError::LockfileParse(e.to_string()))
    }

    /// Rebuild `binary_index` from each tool's `binaries` list.
    ///
    /// `overrides` maps binary name to the tool id that wins when two tools
    /// expose the same name. Without an override, a collision returns `Err`.
    pub fn rebuild_binary_index(
        &mut self,
        overrides: &BTreeMap<String, String>,
    ) -> std::result::Result<(), String> {
        let mut index: BinaryIndex = BTreeMap::new();
        let mut collisions: Vec<String> = Vec::new();

        let mut sorted: Vec<_> = self.tools.iter().collect();
        sorted.sort_by_key(|(id, _)| id.as_str());

        for (tool_id, entry) in &sorted {
            for binary in &entry.binaries {
                if let Some(winner) = overrides.get(binary) {
                    index.insert(binary.clone(), winner.clone());
                } else if let Some(existing) = index.insert(binary.clone(), tool_id.to_string())
                    && existing != tool_id.as_str()
                {
                    collisions.push(format!(
                        "'{binary}' exposed by both '{existing}' and '{tool_id}'"
                    ));
                    index.insert(binary.clone(), existing);
                }
            }
        }

        if !collisions.is_empty() {
            return Err(collisions.join(", "));
        }
        self.binary_index = index;
        Ok(())
    }

    /// True when both lockfiles describe the same set of tools at the same
    /// resolved versions and digests.
    pub fn is_equivalent_to(&self, other: &Self) -> bool {
        if self.tools.len() != other.tools.len() {
            return false;
        }
        for (id, entry) in &self.tools {
            match other.tools.get(id) {
                Some(other_entry) => {
                    if !entry.is_equivalent(other_entry) {
                        return false;
                    }
                }
                None => return false,
            }
        }
        true
    }
}

impl Default for Lockfile {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn entry(id: &str, version: &str, digest: &str) -> LockfileEntry {
        LockfileEntry {
            tool_id: id.to_string(),
            declared_version_req: String::new(),
            version: version.to_string(),
            image_reference: format!("registry/{id}:{version}"),
            image_digest: digest.to_string(),
            manifest_sha256: format!("sha256:m-{id}"),
            image_size_bytes: None,
            resolved_at: chrono::DateTime::<chrono::Utc>::from_timestamp(1700000000, 0).unwrap(),
            reference_data_pins: BTreeMap::new(),
            binaries: vec![format!("{id}-bin")],
        }
    }

    /// Regression: lockfile serialization must be byte-deterministic so
    /// `bv lock --check` can compare against the on-disk file. HashMap
    /// iteration order is randomized; BTreeMap is stable.
    #[test]
    fn to_toml_string_is_deterministic() {
        let mut lock = Lockfile::new();
        for id in ["zebra", "alpha", "mango", "beta", "tango"] {
            lock.tools.insert(
                id.to_string(),
                entry(id, "1.0.0", &format!("sha256:d-{id}")),
            );
            lock.binary_index
                .insert(format!("{id}-bin"), id.to_string());
        }

        let s1 = lock.to_toml_string().unwrap();
        for _ in 0..32 {
            assert_eq!(s1, lock.to_toml_string().unwrap(), "non-deterministic output");
        }
        // Tools must appear in lexicographic order.
        let alpha = s1.find("\"alpha\"").unwrap();
        let beta = s1.find("\"beta\"").unwrap();
        let mango = s1.find("\"mango\"").unwrap();
        let tango = s1.find("\"tango\"").unwrap();
        let zebra = s1.find("\"zebra\"").unwrap();
        assert!(alpha < beta && beta < mango && mango < tango && tango < zebra);
    }
}