Skip to main content

bv_core/
manifest.rs

1use std::collections::HashMap;
2use std::fmt;
3use std::path::PathBuf;
4use std::str::FromStr;
5
6use serde::{Deserialize, Serialize};
7
8use bv_types::{Cardinality, TypeRef};
9
10use crate::error::{BvError, Result};
11
12/// Quality and governance tier for a tool in the registry.
13#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
14#[serde(rename_all = "lowercase")]
15pub enum Tier {
16    /// Typed I/O complete, conformance tests pass, from a recognized publisher, actively maintained.
17    Core,
18    /// Typed I/O present (may be partial), basic checks pass.
19    #[default]
20    Community,
21    /// Basic checks pass; may lack typed I/O. Hidden from default search results.
22    Experimental,
23}
24
25impl Tier {
26    pub fn as_str(&self) -> &'static str {
27        match self {
28            Tier::Core => "core",
29            Tier::Community => "community",
30            Tier::Experimental => "experimental",
31        }
32    }
33}
34
35impl fmt::Display for Tier {
36    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37        f.write_str(self.as_str())
38    }
39}
40
41/// Structured CUDA version with ordering (`12.1 < 12.4 < 13.0`).
42#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
43pub struct CudaVersion {
44    pub major: u32,
45    pub minor: u32,
46}
47
48impl fmt::Display for CudaVersion {
49    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
50        write!(f, "{}.{}", self.major, self.minor)
51    }
52}
53
54impl FromStr for CudaVersion {
55    type Err = String;
56
57    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
58        let (maj, min) = s
59            .split_once('.')
60            .ok_or_else(|| format!("expected 'major.minor', got '{s}'"))?;
61        Ok(CudaVersion {
62            major: maj
63                .parse()
64                .map_err(|_| format!("invalid major version '{maj}'"))?,
65            minor: min
66                .parse()
67                .map_err(|_| format!("invalid minor version '{min}'"))?,
68        })
69    }
70}
71
72impl TryFrom<String> for CudaVersion {
73    type Error = String;
74    fn try_from(s: String) -> std::result::Result<Self, Self::Error> {
75        s.parse()
76    }
77}
78
79impl From<CudaVersion> for String {
80    fn from(v: CudaVersion) -> String {
81        v.to_string()
82    }
83}
84
85impl Serialize for CudaVersion {
86    fn serialize<S: serde::Serializer>(&self, s: S) -> std::result::Result<S::Ok, S::Error> {
87        s.serialize_str(&self.to_string())
88    }
89}
90
91impl<'de> Deserialize<'de> for CudaVersion {
92    fn deserialize<D: serde::Deserializer<'de>>(d: D) -> std::result::Result<Self, D::Error> {
93        let s = String::deserialize(d)?;
94        s.parse().map_err(serde::de::Error::custom)
95    }
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct GpuSpec {
100    pub required: bool,
101    pub min_vram_gb: Option<u32>,
102    pub cuda_version: Option<CudaVersion>,
103}
104
105#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct HardwareSpec {
107    pub gpu: Option<GpuSpec>,
108    pub cpu_cores: Option<u32>,
109    pub ram_gb: Option<f64>,
110    pub disk_gb: Option<f64>,
111}
112
113impl HardwareSpec {
114    /// Check this manifest's requirements against the host's detected hardware.
115    /// Returns every requirement that is not satisfied.
116    pub fn check_against(
117        &self,
118        detected: &crate::hardware::DetectedHardware,
119    ) -> Vec<crate::hardware::HardwareMismatch> {
120        use crate::hardware::HardwareMismatch;
121        let mut out = Vec::new();
122
123        if let Some(gpu_req) = &self.gpu
124            && gpu_req.required
125        {
126            if detected.gpus.is_empty() {
127                out.push(HardwareMismatch::NoGpu);
128            } else {
129                if let Some(min_vram) = gpu_req.min_vram_gb {
130                    let best_vram_mb = detected.gpus.iter().map(|g| g.vram_mb).max().unwrap_or(0);
131                    let best_vram_gb = (best_vram_mb as f64 / 1024.0).floor() as u32;
132                    if best_vram_gb < min_vram {
133                        out.push(HardwareMismatch::InsufficientVram {
134                            required_gb: min_vram,
135                            available_gb: best_vram_gb,
136                        });
137                    }
138                }
139                if let Some(min_cuda) = &gpu_req.cuda_version {
140                    let best_cuda = detected
141                        .gpus
142                        .iter()
143                        .filter_map(|g| g.cuda_version.as_ref())
144                        .max();
145                    match best_cuda {
146                        None => out.push(HardwareMismatch::NoCuda {
147                            required: min_cuda.clone(),
148                        }),
149                        Some(avail) if avail < min_cuda => {
150                            out.push(HardwareMismatch::CudaTooOld {
151                                required: min_cuda.clone(),
152                                available: avail.clone(),
153                            });
154                        }
155                        _ => {}
156                    }
157                }
158            }
159        }
160
161        if let Some(min_ram) = self.ram_gb {
162            let avail = detected.ram_gb();
163            if avail < min_ram {
164                out.push(HardwareMismatch::InsufficientRam {
165                    required_gb: min_ram,
166                    available_gb: avail,
167                });
168            }
169        }
170
171        if let Some(min_disk) = self.disk_gb {
172            let avail = detected.disk_free_gb();
173            if avail < min_disk {
174                out.push(HardwareMismatch::InsufficientDisk {
175                    required_gb: min_disk,
176                    available_gb: avail,
177                });
178            }
179        }
180
181        out
182    }
183}
184
185#[derive(Debug, Clone, Serialize, Deserialize)]
186pub struct ImageSpec {
187    /// Runtime backend, e.g. `"docker"` or `"apptainer"`.
188    pub backend: String,
189    /// Canonical OCI reference, e.g. `"biocontainers/bwa:0.7.17"`.
190    pub reference: String,
191    /// Optional pinned digest for reproducibility.
192    pub digest: Option<String>,
193}
194
195#[derive(Debug, Clone, Serialize, Deserialize)]
196pub struct ReferenceDataSpec {
197    pub id: String,
198    pub version: String,
199    pub required: bool,
200    /// Container path where the dataset directory is mounted read-only.
201    #[serde(default, skip_serializing_if = "Option::is_none")]
202    pub mount_path: Option<String>,
203    /// Approximate compressed size in bytes.
204    #[serde(default, skip_serializing_if = "Option::is_none")]
205    pub size_bytes: Option<u64>,
206}
207
208/// Typed I/O port declaration for a tool.
209#[derive(Debug, Clone, Serialize, Deserialize)]
210pub struct IoSpec {
211    pub name: String,
212    /// Type reference, e.g. `"fasta"` or `"fasta[protein]"`.
213    #[serde(rename = "type")]
214    pub r#type: TypeRef,
215    /// How many values this port accepts.
216    #[serde(default)]
217    pub cardinality: Cardinality,
218    /// Absolute path inside the container where this value is mounted.
219    #[serde(default, skip_serializing_if = "Option::is_none")]
220    pub mount: Option<PathBuf>,
221    #[serde(default, skip_serializing_if = "Option::is_none")]
222    pub description: Option<String>,
223    #[serde(default, skip_serializing_if = "Option::is_none")]
224    pub default: Option<String>,
225}
226
227#[derive(Debug, Clone, Serialize, Deserialize)]
228pub struct EntrypointSpec {
229    pub command: String,
230    pub args_template: Option<String>,
231    #[serde(default)]
232    pub env: HashMap<String, String>,
233}
234
235/// Binary names that the tool's container exposes on PATH.
236///
237/// Omitting this block defaults to `exposed = [entrypoint.command]` for
238/// single-binary tools that do not need to declare anything extra.
239#[derive(Debug, Clone, Serialize, Deserialize)]
240pub struct BinariesSpec {
241    pub exposed: Vec<String>,
242}
243
244/// Canonical inputs and expected outputs used by the conformance test runner.
245#[derive(Debug, Clone, Serialize, Deserialize)]
246pub struct TestSpec {
247    /// Map of port name to a `test://` URI identifying the canonical input.
248    #[serde(default)]
249    pub inputs: std::collections::HashMap<String, String>,
250    /// Port names whose output files must exist and pass type-level checks.
251    #[serde(default)]
252    pub expected_outputs: Vec<String>,
253    /// Additional CLI args appended to the entrypoint during test runs.
254    #[serde(default)]
255    pub extra_args: Vec<String>,
256    /// Seconds before the conformance run is killed.
257    #[serde(default = "default_timeout")]
258    pub timeout_seconds: u64,
259    /// When true, skipped in fast CI and run only on a separate slow schedule.
260    #[serde(default)]
261    pub slow: bool,
262}
263
264fn default_timeout() -> u64 {
265    60
266}
267
268/// Optional Sigstore/cosign signature metadata.
269#[derive(Debug, Clone, Serialize, Deserialize)]
270pub struct SignatureSpec {
271    /// `"sigstore"` to verify the OCI image signature with cosign.
272    #[serde(default, skip_serializing_if = "Option::is_none")]
273    pub image: Option<String>,
274    /// `"sigstore"` to verify the manifest's commit signature.
275    #[serde(default, skip_serializing_if = "Option::is_none")]
276    pub manifest: Option<String>,
277}
278
279#[derive(Debug, Clone, Serialize, Deserialize)]
280pub struct ToolManifest {
281    pub id: String,
282    pub version: String,
283    pub description: Option<String>,
284    pub homepage: Option<String>,
285    pub license: Option<String>,
286    /// Governance tier. Defaults to `community` for new submissions.
287    #[serde(default)]
288    pub tier: Tier,
289    /// GitHub handles of maintainers, e.g. `"github:alice"`.
290    #[serde(default, skip_serializing_if = "Vec::is_empty")]
291    pub maintainers: Vec<String>,
292    /// Set to `true` when a tool is superseded or no longer maintained.
293    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
294    pub deprecated: bool,
295    pub image: ImageSpec,
296    pub hardware: HardwareSpec,
297    #[serde(default)]
298    pub reference_data: HashMap<String, ReferenceDataSpec>,
299    /// Typed inputs. Optional; manifests without this section parse unchanged.
300    #[serde(default)]
301    pub inputs: Vec<IoSpec>,
302    /// Typed outputs. Optional; manifests without this section parse unchanged.
303    #[serde(default)]
304    pub outputs: Vec<IoSpec>,
305    pub entrypoint: EntrypointSpec,
306    /// Container paths the tool writes to during normal execution and that
307    /// should therefore be bound to writable host directories. Critical on
308    /// apptainer (read-only SIF root), nice-to-have on docker (lets caches
309    /// outlive `docker rm`). Tool authors declare these; users can override
310    /// the host side via `[[cache]]` in `bv.toml`.
311    #[serde(default, skip_serializing_if = "Vec::is_empty")]
312    pub cache_paths: Vec<String>,
313    /// Binary names this tool exposes on PATH inside its container.
314    /// Omit for single-binary tools; defaults to `[entrypoint.command]`.
315    #[serde(default, skip_serializing_if = "Option::is_none")]
316    pub binaries: Option<BinariesSpec>,
317    /// Conformance test block; used by `bv conformance check`.
318    #[serde(default, skip_serializing_if = "Option::is_none")]
319    pub test: Option<TestSpec>,
320    /// Sigstore / cosign signature declarations.
321    #[serde(default, skip_serializing_if = "Option::is_none")]
322    pub signatures: Option<SignatureSpec>,
323}
324
325impl ToolManifest {
326    pub fn has_typed_io(&self) -> bool {
327        !self.inputs.is_empty() || !self.outputs.is_empty()
328    }
329
330    /// Returns the effective list of binary names this tool exposes.
331    ///
332    /// When no `[tool.binaries]` block is present, defaults to
333    /// `[entrypoint.command]` (the basename component for path-style commands).
334    pub fn effective_binaries(&self) -> Vec<&str> {
335        match &self.binaries {
336            Some(b) => b.exposed.iter().map(|s| s.as_str()).collect(),
337            None => {
338                let cmd = &self.entrypoint.command;
339                let name = cmd
340                    .rfind('/')
341                    .map(|i| &cmd[i + 1..])
342                    .unwrap_or(cmd.as_str());
343                vec![name]
344            }
345        }
346    }
347}
348
349/// Top-level manifest, corresponding to a single `.toml` file in the registry.
350#[derive(Debug, Clone, Serialize, Deserialize)]
351pub struct Manifest {
352    pub tool: ToolManifest,
353}
354
355#[derive(Debug)]
356pub struct ValidationError {
357    pub field: String,
358    pub message: String,
359}
360
361impl fmt::Display for ValidationError {
362    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
363        write!(f, "{}: {}", self.field, self.message)
364    }
365}
366
367impl Manifest {
368    pub fn from_toml_str(s: &str) -> Result<Self> {
369        let m: Manifest = toml::from_str(s).map_err(|e| BvError::ManifestParse(e.to_string()))?;
370        m.validate_types()?;
371        Ok(m)
372    }
373
374    pub fn to_toml_string(&self) -> Result<String> {
375        toml::to_string_pretty(self).map_err(|e| BvError::ManifestParse(e.to_string()))
376    }
377
378    /// Validates that all TypeRefs in inputs/outputs exist in the bv-types vocabulary.
379    fn validate_types(&self) -> Result<()> {
380        let t = &self.tool;
381        for (side, specs) in [("inputs", &t.inputs), ("outputs", &t.outputs)] {
382            for spec in specs {
383                let id = spec.r#type.base_id();
384                if bv_types::lookup(id).is_none() {
385                    let suggestion = bv_types::suggest(id)
386                        .map(|s| format!(", did you mean `{s}`?"))
387                        .unwrap_or_default();
388                    return Err(BvError::ManifestParse(format!(
389                        "tool.{side}[{}]: unknown type `{id}`{suggestion}",
390                        spec.name
391                    )));
392                }
393            }
394        }
395        Ok(())
396    }
397
398    /// Returns a list of validation errors, or `Ok(())` if the manifest is valid.
399    pub fn validate(&self) -> std::result::Result<(), Vec<ValidationError>> {
400        let mut errors = Vec::new();
401        let t = &self.tool;
402
403        if t.id.is_empty() {
404            errors.push(ValidationError {
405                field: "tool.id".into(),
406                message: "must not be empty".into(),
407            });
408        }
409        if t.version.is_empty() {
410            errors.push(ValidationError {
411                field: "tool.version".into(),
412                message: "must not be empty".into(),
413            });
414        }
415        if t.image.backend.is_empty() {
416            errors.push(ValidationError {
417                field: "tool.image.backend".into(),
418                message: "must not be empty".into(),
419            });
420        }
421        if t.image.reference.is_empty() {
422            errors.push(ValidationError {
423                field: "tool.image.reference".into(),
424                message: "must not be empty".into(),
425            });
426        }
427        if t.entrypoint.command.is_empty() {
428            errors.push(ValidationError {
429                field: "tool.entrypoint.command".into(),
430                message: "must not be empty".into(),
431            });
432        }
433
434        for spec in &t.inputs {
435            if let Some(mount) = &spec.mount
436                && !mount.is_absolute()
437            {
438                errors.push(ValidationError {
439                    field: format!("tool.inputs[{}].mount", spec.name),
440                    message: "must be an absolute path".into(),
441                });
442            }
443        }
444        for spec in &t.outputs {
445            if let Some(mount) = &spec.mount
446                && !mount.is_absolute()
447            {
448                errors.push(ValidationError {
449                    field: format!("tool.outputs[{}].mount", spec.name),
450                    message: "must be an absolute path".into(),
451                });
452            }
453        }
454
455        if let Some(binaries) = &t.binaries {
456            let mut seen = std::collections::HashSet::new();
457            for name in &binaries.exposed {
458                if !seen.insert(name.as_str()) {
459                    errors.push(ValidationError {
460                        field: "tool.binaries.exposed".into(),
461                        message: format!("duplicate binary name '{name}'"),
462                    });
463                }
464            }
465            if !binaries.exposed.is_empty() {
466                let cmd = &t.entrypoint.command;
467                let basename = cmd.rfind('/').map(|i| &cmd[i + 1..]).unwrap_or(cmd);
468                if !binaries.exposed.iter().any(|b| b == basename) {
469                    errors.push(ValidationError {
470                        field: "tool.binaries.exposed".into(),
471                        message: format!(
472                            "entrypoint command '{basename}' must be listed in exposed"
473                        ),
474                    });
475                }
476            }
477        }
478
479        if errors.is_empty() {
480            Ok(())
481        } else {
482            Err(errors)
483        }
484    }
485}
486
487#[cfg(test)]
488mod tests {
489    use super::*;
490
491    const SAMPLE: &str = r#"
492[tool]
493id = "bwa"
494version = "0.7.17"
495description = "BWA short-read aligner"
496homepage = "http://bio-bwa.sourceforge.net/"
497license = "GPL-3.0"
498
499[tool.image]
500backend = "docker"
501reference = "biocontainers/bwa:0.7.17--h5bf99c6_8"
502digest = "sha256:abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890ab"
503
504[tool.hardware]
505cpu_cores = 8
506ram_gb = 32.0
507disk_gb = 50.0
508
509[tool.hardware.gpu]
510required = false
511
512[[tool.inputs]]
513name = "reads_r1"
514type = "fastq"
515cardinality = "one"
516description = "Forward reads"
517
518[[tool.inputs]]
519name = "reads_r2"
520type = "fastq"
521cardinality = "optional"
522description = "Reverse reads (paired-end)"
523
524[[tool.outputs]]
525name = "alignment"
526type = "bam"
527description = "Aligned reads"
528
529[tool.entrypoint]
530command = "bwa"
531args_template = "mem -t {cpu_cores} {reference} {reads_r1} {reads_r2}"
532
533[tool.entrypoint.env]
534MALLOC_ARENA_MAX = "4"
535"#;
536
537    const SAMPLE_NO_IO: &str = r#"
538[tool]
539id = "mytool"
540version = "1.0.0"
541
542[tool.image]
543backend = "docker"
544reference = "example/mytool:1.0.0"
545
546[tool.hardware]
547
548[tool.entrypoint]
549command = "mytool"
550"#;
551
552    #[test]
553    fn round_trip() {
554        let manifest = Manifest::from_toml_str(SAMPLE).expect("parse failed");
555        assert_eq!(manifest.tool.id, "bwa");
556        assert_eq!(manifest.tool.version, "0.7.17");
557        assert_eq!(manifest.tool.image.backend, "docker");
558        assert_eq!(manifest.tool.inputs.len(), 2);
559        assert_eq!(manifest.tool.outputs.len(), 1);
560        assert_eq!(manifest.tool.inputs[0].cardinality, Cardinality::One);
561        assert_eq!(manifest.tool.inputs[1].cardinality, Cardinality::Optional);
562
563        let serialised = manifest.to_toml_string().expect("serialise failed");
564        let reparsed = Manifest::from_toml_str(&serialised).expect("reparse failed");
565        assert_eq!(reparsed.tool.id, manifest.tool.id);
566        assert_eq!(reparsed.tool.version, manifest.tool.version);
567    }
568
569    #[test]
570    fn no_io_parses_unchanged() {
571        let m = Manifest::from_toml_str(SAMPLE_NO_IO).expect("parse failed");
572        assert!(m.tool.inputs.is_empty());
573        assert!(m.tool.outputs.is_empty());
574        assert!(!m.tool.has_typed_io());
575    }
576
577    #[test]
578    fn typeref_params_parsed() {
579        let s = r#"
580[tool]
581id = "t"
582version = "1.0.0"
583
584[tool.image]
585backend = "docker"
586reference = "example/t:1.0.0"
587
588[tool.hardware]
589
590[[tool.inputs]]
591name = "seqs"
592type = "fasta[protein]"
593cardinality = "one"
594
595[tool.entrypoint]
596command = "t"
597"#;
598        let m = Manifest::from_toml_str(s).unwrap();
599        assert_eq!(m.tool.inputs[0].r#type.params, vec!["protein"]);
600    }
601
602    #[test]
603    fn unknown_type_error() {
604        let s = r#"
605[tool]
606id = "t"
607version = "1.0.0"
608
609[tool.image]
610backend = "docker"
611reference = "example/t:1.0.0"
612
613[tool.hardware]
614
615[[tool.inputs]]
616name = "seqs"
617type = "protien_fasta"
618cardinality = "one"
619
620[tool.entrypoint]
621command = "t"
622"#;
623        let err = Manifest::from_toml_str(s).unwrap_err();
624        let msg = err.to_string();
625        assert!(msg.contains("unknown type"), "got: {msg}");
626    }
627
628    #[test]
629    fn cuda_version_ordering() {
630        let v12_1: CudaVersion = "12.1".parse().unwrap();
631        let v12_4: CudaVersion = "12.4".parse().unwrap();
632        let v13_0: CudaVersion = "13.0".parse().unwrap();
633        assert!(v12_1 < v12_4);
634        assert!(v12_4 < v13_0);
635        assert_eq!(v12_1, "12.1".parse::<CudaVersion>().unwrap());
636    }
637
638    #[test]
639    fn validate_catches_empty_id() {
640        let mut manifest = Manifest::from_toml_str(SAMPLE).unwrap();
641        manifest.tool.id = String::new();
642        let errs = manifest.validate().unwrap_err();
643        assert!(errs.iter().any(|e| e.field == "tool.id"));
644    }
645
646    #[test]
647    fn registry_manifests_parse() {
648        let registry = concat!(env!("CARGO_MANIFEST_DIR"), "/../../bv-registry/tools");
649        let Ok(read) = std::fs::read_dir(registry) else {
650            return;
651        };
652        for entry in read {
653            let tool_dir = entry.unwrap().path();
654            if !tool_dir.is_dir() {
655                continue;
656            }
657            for version_entry in std::fs::read_dir(&tool_dir).unwrap() {
658                let path = version_entry.unwrap().path();
659                if path.extension().is_some_and(|e| e == "toml") {
660                    let s = std::fs::read_to_string(&path)
661                        .unwrap_or_else(|_| panic!("failed to read {}", path.display()));
662                    Manifest::from_toml_str(&s)
663                        .unwrap_or_else(|e| panic!("{}: {e}", path.display()));
664                }
665            }
666        }
667    }
668}