Skip to main content

bv_core/
manifest.rs

1use std::collections::HashMap;
2use std::fmt;
3use std::path::PathBuf;
4use std::str::FromStr;
5
6use serde::{Deserialize, Serialize};
7
8use bv_types::{Cardinality, TypeRef};
9
10use crate::error::{BvError, Result};
11
12/// Quality and governance tier for a tool in the registry.
13#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
14#[serde(rename_all = "lowercase")]
15pub enum Tier {
16    /// Typed I/O complete, conformance tests pass, from a recognized publisher, actively maintained.
17    Core,
18    /// Typed I/O present (may be partial), basic checks pass.
19    #[default]
20    Community,
21    /// Basic checks pass; may lack typed I/O. Hidden from default search results.
22    Experimental,
23}
24
25impl Tier {
26    pub fn as_str(&self) -> &'static str {
27        match self {
28            Tier::Core => "core",
29            Tier::Community => "community",
30            Tier::Experimental => "experimental",
31        }
32    }
33}
34
35impl fmt::Display for Tier {
36    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37        f.write_str(self.as_str())
38    }
39}
40
41/// Structured CUDA version with ordering (`12.1 < 12.4 < 13.0`).
42#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
43pub struct CudaVersion {
44    pub major: u32,
45    pub minor: u32,
46}
47
48impl fmt::Display for CudaVersion {
49    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
50        write!(f, "{}.{}", self.major, self.minor)
51    }
52}
53
54impl FromStr for CudaVersion {
55    type Err = String;
56
57    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
58        let (maj, min) = s
59            .split_once('.')
60            .ok_or_else(|| format!("expected 'major.minor', got '{s}'"))?;
61        Ok(CudaVersion {
62            major: maj
63                .parse()
64                .map_err(|_| format!("invalid major version '{maj}'"))?,
65            minor: min
66                .parse()
67                .map_err(|_| format!("invalid minor version '{min}'"))?,
68        })
69    }
70}
71
72impl TryFrom<String> for CudaVersion {
73    type Error = String;
74    fn try_from(s: String) -> std::result::Result<Self, Self::Error> {
75        s.parse()
76    }
77}
78
79impl From<CudaVersion> for String {
80    fn from(v: CudaVersion) -> String {
81        v.to_string()
82    }
83}
84
85impl Serialize for CudaVersion {
86    fn serialize<S: serde::Serializer>(&self, s: S) -> std::result::Result<S::Ok, S::Error> {
87        s.serialize_str(&self.to_string())
88    }
89}
90
91impl<'de> Deserialize<'de> for CudaVersion {
92    fn deserialize<D: serde::Deserializer<'de>>(d: D) -> std::result::Result<Self, D::Error> {
93        let s = String::deserialize(d)?;
94        s.parse().map_err(serde::de::Error::custom)
95    }
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct GpuSpec {
100    pub required: bool,
101    pub min_vram_gb: Option<u32>,
102    pub cuda_version: Option<CudaVersion>,
103}
104
105#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct HardwareSpec {
107    pub gpu: Option<GpuSpec>,
108    pub cpu_cores: Option<u32>,
109    pub ram_gb: Option<f64>,
110    pub disk_gb: Option<f64>,
111}
112
113impl HardwareSpec {
114    /// Check this manifest's requirements against the host's detected hardware.
115    /// Returns every requirement that is not satisfied.
116    pub fn check_against(
117        &self,
118        detected: &crate::hardware::DetectedHardware,
119    ) -> Vec<crate::hardware::HardwareMismatch> {
120        use crate::hardware::HardwareMismatch;
121        let mut out = Vec::new();
122
123        if let Some(gpu_req) = &self.gpu
124            && gpu_req.required
125        {
126            if detected.gpus.is_empty() {
127                out.push(HardwareMismatch::NoGpu);
128            } else {
129                if let Some(min_vram) = gpu_req.min_vram_gb {
130                    let best_vram_mb = detected.gpus.iter().map(|g| g.vram_mb).max().unwrap_or(0);
131                    let best_vram_gb = (best_vram_mb as f64 / 1024.0).floor() as u32;
132                    if best_vram_gb < min_vram {
133                        out.push(HardwareMismatch::InsufficientVram {
134                            required_gb: min_vram,
135                            available_gb: best_vram_gb,
136                        });
137                    }
138                }
139                if let Some(min_cuda) = &gpu_req.cuda_version {
140                    let best_cuda = detected
141                        .gpus
142                        .iter()
143                        .filter_map(|g| g.cuda_version.as_ref())
144                        .max();
145                    match best_cuda {
146                        None => out.push(HardwareMismatch::NoCuda {
147                            required: min_cuda.clone(),
148                        }),
149                        Some(avail) if avail < min_cuda => {
150                            out.push(HardwareMismatch::CudaTooOld {
151                                required: min_cuda.clone(),
152                                available: avail.clone(),
153                            });
154                        }
155                        _ => {}
156                    }
157                }
158            }
159        }
160
161        if let Some(min_ram) = self.ram_gb {
162            let avail = detected.ram_gb();
163            if avail < min_ram {
164                out.push(HardwareMismatch::InsufficientRam {
165                    required_gb: min_ram,
166                    available_gb: avail,
167                });
168            }
169        }
170
171        if let Some(min_disk) = self.disk_gb {
172            let avail = detected.disk_free_gb();
173            if avail < min_disk {
174                out.push(HardwareMismatch::InsufficientDisk {
175                    required_gb: min_disk,
176                    available_gb: avail,
177                });
178            }
179        }
180
181        out
182    }
183}
184
185#[derive(Debug, Clone, Serialize, Deserialize)]
186pub struct ImageSpec {
187    /// Runtime backend, e.g. `"docker"` or `"apptainer"`.
188    pub backend: String,
189    /// Canonical OCI reference, e.g. `"biocontainers/bwa:0.7.17"`.
190    pub reference: String,
191    /// Optional pinned digest for reproducibility.
192    pub digest: Option<String>,
193}
194
195#[derive(Debug, Clone, Serialize, Deserialize)]
196pub struct ReferenceDataSpec {
197    pub id: String,
198    pub version: String,
199    pub required: bool,
200    /// Container path where the dataset directory is mounted read-only.
201    #[serde(default, skip_serializing_if = "Option::is_none")]
202    pub mount_path: Option<String>,
203    /// Approximate compressed size in bytes.
204    #[serde(default, skip_serializing_if = "Option::is_none")]
205    pub size_bytes: Option<u64>,
206}
207
208/// Typed I/O port declaration for a tool.
209#[derive(Debug, Clone, Serialize, Deserialize)]
210pub struct IoSpec {
211    pub name: String,
212    /// Type reference, e.g. `"fasta"` or `"fasta[protein]"`.
213    #[serde(rename = "type")]
214    pub r#type: TypeRef,
215    /// How many values this port accepts.
216    #[serde(default)]
217    pub cardinality: Cardinality,
218    /// Absolute path inside the container where this value is mounted.
219    #[serde(default, skip_serializing_if = "Option::is_none")]
220    pub mount: Option<PathBuf>,
221    #[serde(default, skip_serializing_if = "Option::is_none")]
222    pub description: Option<String>,
223    #[serde(default, skip_serializing_if = "Option::is_none")]
224    pub default: Option<String>,
225}
226
227#[derive(Debug, Clone, Serialize, Deserialize)]
228pub struct EntrypointSpec {
229    pub command: String,
230    pub args_template: Option<String>,
231    #[serde(default)]
232    pub env: HashMap<String, String>,
233}
234
235/// Binary names that the tool's container exposes on PATH.
236///
237/// Omitting this block defaults to `exposed = [entrypoint.command]` for
238/// single-binary tools that do not need to declare anything extra.
239#[derive(Debug, Clone, Serialize, Deserialize)]
240pub struct BinariesSpec {
241    pub exposed: Vec<String>,
242}
243
244/// Per-tool overrides for `bv conformance`'s smoke check.
245///
246/// The smoke check tries a small set of probe args (`--version`, `-version`,
247/// `--help`, `-h`, `-v`, `version`) against every binary the tool exposes,
248/// and counts a binary as alive if any probe produces output or exits 0.
249/// Most tools don't need a `[tool.smoke]` block at all; this is the escape
250/// hatch for the unusual cases.
251#[derive(Debug, Clone, Default, Serialize, Deserialize)]
252pub struct SmokeSpec {
253    /// Override probe args for specific binaries, e.g. `{ "blastn" = "-version" }`.
254    /// Each value is a single command-line argument (or empty string for "run
255    /// the binary with no args"). When set, only this probe is tried for that
256    /// binary; the default list is bypassed.
257    #[serde(default, skip_serializing_if = "std::collections::HashMap::is_empty")]
258    pub probes: std::collections::HashMap<String, String>,
259    /// Binaries to skip entirely (daemons, "no non-destructive invocation"
260    /// tools, etc.). Listed binaries still appear in `[tool.binaries]` and
261    /// get shims; conformance just doesn't probe them.
262    #[serde(default, skip_serializing_if = "Vec::is_empty")]
263    pub skip: Vec<String>,
264}
265
266#[allow(dead_code)]
267fn default_timeout() -> u64 {
268    60
269}
270
271/// Optional Sigstore/cosign signature metadata.
272#[derive(Debug, Clone, Serialize, Deserialize)]
273pub struct SignatureSpec {
274    /// `"sigstore"` to verify the OCI image signature with cosign.
275    #[serde(default, skip_serializing_if = "Option::is_none")]
276    pub image: Option<String>,
277    /// `"sigstore"` to verify the manifest's commit signature.
278    #[serde(default, skip_serializing_if = "Option::is_none")]
279    pub manifest: Option<String>,
280}
281
282#[derive(Debug, Clone, Serialize, Deserialize)]
283pub struct ToolManifest {
284    pub id: String,
285    pub version: String,
286    pub description: Option<String>,
287    pub homepage: Option<String>,
288    pub license: Option<String>,
289    /// Governance tier. Defaults to `community` for new submissions.
290    #[serde(default)]
291    pub tier: Tier,
292    /// GitHub handles of maintainers, e.g. `"github:alice"`.
293    #[serde(default, skip_serializing_if = "Vec::is_empty")]
294    pub maintainers: Vec<String>,
295    /// Set to `true` when a tool is superseded or no longer maintained.
296    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
297    pub deprecated: bool,
298    pub image: ImageSpec,
299    pub hardware: HardwareSpec,
300    #[serde(default)]
301    pub reference_data: HashMap<String, ReferenceDataSpec>,
302    /// Typed inputs. Optional; manifests without this section parse unchanged.
303    #[serde(default)]
304    pub inputs: Vec<IoSpec>,
305    /// Typed outputs. Optional; manifests without this section parse unchanged.
306    #[serde(default)]
307    pub outputs: Vec<IoSpec>,
308    pub entrypoint: EntrypointSpec,
309    /// Container paths the tool writes to during normal execution and that
310    /// should therefore be bound to writable host directories. Critical on
311    /// apptainer (read-only SIF root), nice-to-have on docker (lets caches
312    /// outlive `docker rm`). Tool authors declare these; users can override
313    /// the host side via `[[cache]]` in `bv.toml`.
314    #[serde(default, skip_serializing_if = "Vec::is_empty")]
315    pub cache_paths: Vec<String>,
316    /// Binary names this tool exposes on PATH inside its container.
317    /// Omit for single-binary tools; defaults to `[entrypoint.command]`.
318    #[serde(default, skip_serializing_if = "Option::is_none")]
319    pub binaries: Option<BinariesSpec>,
320    /// Smoke-check overrides; consulted by `bv conformance` for unusual binaries.
321    #[serde(default, skip_serializing_if = "Option::is_none")]
322    pub smoke: Option<SmokeSpec>,
323    /// Sigstore / cosign signature declarations.
324    #[serde(default, skip_serializing_if = "Option::is_none")]
325    pub signatures: Option<SignatureSpec>,
326}
327
328impl ToolManifest {
329    pub fn has_typed_io(&self) -> bool {
330        !self.inputs.is_empty() || !self.outputs.is_empty()
331    }
332
333    /// Returns the effective list of binary names this tool exposes.
334    ///
335    /// When no `[tool.binaries]` block is present, defaults to
336    /// `[entrypoint.command]` (the basename component for path-style commands).
337    pub fn effective_binaries(&self) -> Vec<&str> {
338        match &self.binaries {
339            Some(b) => b.exposed.iter().map(|s| s.as_str()).collect(),
340            None => {
341                let cmd = &self.entrypoint.command;
342                let name = cmd
343                    .rfind('/')
344                    .map(|i| &cmd[i + 1..])
345                    .unwrap_or(cmd.as_str());
346                vec![name]
347            }
348        }
349    }
350}
351
352/// Top-level manifest, corresponding to a single `.toml` file in the registry.
353#[derive(Debug, Clone, Serialize, Deserialize)]
354pub struct Manifest {
355    pub tool: ToolManifest,
356}
357
358#[derive(Debug)]
359pub struct ValidationError {
360    pub field: String,
361    pub message: String,
362}
363
364impl fmt::Display for ValidationError {
365    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
366        write!(f, "{}: {}", self.field, self.message)
367    }
368}
369
370impl Manifest {
371    pub fn from_toml_str(s: &str) -> Result<Self> {
372        let m: Manifest = toml::from_str(s).map_err(|e| BvError::ManifestParse(e.to_string()))?;
373        m.validate_types()?;
374        Ok(m)
375    }
376
377    pub fn to_toml_string(&self) -> Result<String> {
378        toml::to_string_pretty(self).map_err(|e| BvError::ManifestParse(e.to_string()))
379    }
380
381    /// Validates that all TypeRefs in inputs/outputs exist in the bv-types vocabulary.
382    fn validate_types(&self) -> Result<()> {
383        let t = &self.tool;
384        for (side, specs) in [("inputs", &t.inputs), ("outputs", &t.outputs)] {
385            for spec in specs {
386                let id = spec.r#type.base_id();
387                if bv_types::lookup(id).is_none() {
388                    let suggestion = bv_types::suggest(id)
389                        .map(|s| format!(", did you mean `{s}`?"))
390                        .unwrap_or_default();
391                    return Err(BvError::ManifestParse(format!(
392                        "tool.{side}[{}]: unknown type `{id}`{suggestion}",
393                        spec.name
394                    )));
395                }
396            }
397        }
398        Ok(())
399    }
400
401    /// Returns a list of validation errors, or `Ok(())` if the manifest is valid.
402    pub fn validate(&self) -> std::result::Result<(), Vec<ValidationError>> {
403        let mut errors = Vec::new();
404        let t = &self.tool;
405
406        if t.id.is_empty() {
407            errors.push(ValidationError {
408                field: "tool.id".into(),
409                message: "must not be empty".into(),
410            });
411        }
412        if t.version.is_empty() {
413            errors.push(ValidationError {
414                field: "tool.version".into(),
415                message: "must not be empty".into(),
416            });
417        }
418        if t.image.backend.is_empty() {
419            errors.push(ValidationError {
420                field: "tool.image.backend".into(),
421                message: "must not be empty".into(),
422            });
423        }
424        if t.image.reference.is_empty() {
425            errors.push(ValidationError {
426                field: "tool.image.reference".into(),
427                message: "must not be empty".into(),
428            });
429        }
430        if t.entrypoint.command.is_empty() {
431            errors.push(ValidationError {
432                field: "tool.entrypoint.command".into(),
433                message: "must not be empty".into(),
434            });
435        }
436
437        for spec in &t.inputs {
438            if let Some(mount) = &spec.mount
439                && !mount.is_absolute()
440            {
441                errors.push(ValidationError {
442                    field: format!("tool.inputs[{}].mount", spec.name),
443                    message: "must be an absolute path".into(),
444                });
445            }
446        }
447        for spec in &t.outputs {
448            if let Some(mount) = &spec.mount
449                && !mount.is_absolute()
450            {
451                errors.push(ValidationError {
452                    field: format!("tool.outputs[{}].mount", spec.name),
453                    message: "must be an absolute path".into(),
454                });
455            }
456        }
457
458        if let Some(binaries) = &t.binaries {
459            let mut seen = std::collections::HashSet::new();
460            for name in &binaries.exposed {
461                if !seen.insert(name.as_str()) {
462                    errors.push(ValidationError {
463                        field: "tool.binaries.exposed".into(),
464                        message: format!("duplicate binary name '{name}'"),
465                    });
466                }
467            }
468            if !binaries.exposed.is_empty() {
469                let cmd = &t.entrypoint.command;
470                let basename = cmd.rfind('/').map(|i| &cmd[i + 1..]).unwrap_or(cmd);
471                if !binaries.exposed.iter().any(|b| b == basename) {
472                    errors.push(ValidationError {
473                        field: "tool.binaries.exposed".into(),
474                        message: format!(
475                            "entrypoint command '{basename}' must be listed in exposed"
476                        ),
477                    });
478                }
479            }
480        }
481
482        if errors.is_empty() {
483            Ok(())
484        } else {
485            Err(errors)
486        }
487    }
488}
489
490#[cfg(test)]
491mod tests {
492    use super::*;
493
494    const SAMPLE: &str = r#"
495[tool]
496id = "bwa"
497version = "0.7.17"
498description = "BWA short-read aligner"
499homepage = "http://bio-bwa.sourceforge.net/"
500license = "GPL-3.0"
501
502[tool.image]
503backend = "docker"
504reference = "biocontainers/bwa:0.7.17--h5bf99c6_8"
505digest = "sha256:abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890ab"
506
507[tool.hardware]
508cpu_cores = 8
509ram_gb = 32.0
510disk_gb = 50.0
511
512[tool.hardware.gpu]
513required = false
514
515[[tool.inputs]]
516name = "reads_r1"
517type = "fastq"
518cardinality = "one"
519description = "Forward reads"
520
521[[tool.inputs]]
522name = "reads_r2"
523type = "fastq"
524cardinality = "optional"
525description = "Reverse reads (paired-end)"
526
527[[tool.outputs]]
528name = "alignment"
529type = "bam"
530description = "Aligned reads"
531
532[tool.entrypoint]
533command = "bwa"
534args_template = "mem -t {cpu_cores} {reference} {reads_r1} {reads_r2}"
535
536[tool.entrypoint.env]
537MALLOC_ARENA_MAX = "4"
538"#;
539
540    const SAMPLE_NO_IO: &str = r#"
541[tool]
542id = "mytool"
543version = "1.0.0"
544
545[tool.image]
546backend = "docker"
547reference = "example/mytool:1.0.0"
548
549[tool.hardware]
550
551[tool.entrypoint]
552command = "mytool"
553"#;
554
555    #[test]
556    fn round_trip() {
557        let manifest = Manifest::from_toml_str(SAMPLE).expect("parse failed");
558        assert_eq!(manifest.tool.id, "bwa");
559        assert_eq!(manifest.tool.version, "0.7.17");
560        assert_eq!(manifest.tool.image.backend, "docker");
561        assert_eq!(manifest.tool.inputs.len(), 2);
562        assert_eq!(manifest.tool.outputs.len(), 1);
563        assert_eq!(manifest.tool.inputs[0].cardinality, Cardinality::One);
564        assert_eq!(manifest.tool.inputs[1].cardinality, Cardinality::Optional);
565
566        let serialised = manifest.to_toml_string().expect("serialise failed");
567        let reparsed = Manifest::from_toml_str(&serialised).expect("reparse failed");
568        assert_eq!(reparsed.tool.id, manifest.tool.id);
569        assert_eq!(reparsed.tool.version, manifest.tool.version);
570    }
571
572    #[test]
573    fn no_io_parses_unchanged() {
574        let m = Manifest::from_toml_str(SAMPLE_NO_IO).expect("parse failed");
575        assert!(m.tool.inputs.is_empty());
576        assert!(m.tool.outputs.is_empty());
577        assert!(!m.tool.has_typed_io());
578    }
579
580    #[test]
581    fn typeref_params_parsed() {
582        let s = r#"
583[tool]
584id = "t"
585version = "1.0.0"
586
587[tool.image]
588backend = "docker"
589reference = "example/t:1.0.0"
590
591[tool.hardware]
592
593[[tool.inputs]]
594name = "seqs"
595type = "fasta[protein]"
596cardinality = "one"
597
598[tool.entrypoint]
599command = "t"
600"#;
601        let m = Manifest::from_toml_str(s).unwrap();
602        assert_eq!(m.tool.inputs[0].r#type.params, vec!["protein"]);
603    }
604
605    #[test]
606    fn unknown_type_error() {
607        let s = r#"
608[tool]
609id = "t"
610version = "1.0.0"
611
612[tool.image]
613backend = "docker"
614reference = "example/t:1.0.0"
615
616[tool.hardware]
617
618[[tool.inputs]]
619name = "seqs"
620type = "protien_fasta"
621cardinality = "one"
622
623[tool.entrypoint]
624command = "t"
625"#;
626        let err = Manifest::from_toml_str(s).unwrap_err();
627        let msg = err.to_string();
628        assert!(msg.contains("unknown type"), "got: {msg}");
629    }
630
631    #[test]
632    fn cuda_version_ordering() {
633        let v12_1: CudaVersion = "12.1".parse().unwrap();
634        let v12_4: CudaVersion = "12.4".parse().unwrap();
635        let v13_0: CudaVersion = "13.0".parse().unwrap();
636        assert!(v12_1 < v12_4);
637        assert!(v12_4 < v13_0);
638        assert_eq!(v12_1, "12.1".parse::<CudaVersion>().unwrap());
639    }
640
641    #[test]
642    fn validate_catches_empty_id() {
643        let mut manifest = Manifest::from_toml_str(SAMPLE).unwrap();
644        manifest.tool.id = String::new();
645        let errs = manifest.validate().unwrap_err();
646        assert!(errs.iter().any(|e| e.field == "tool.id"));
647    }
648
649    #[test]
650    fn registry_manifests_parse() {
651        let registry = concat!(env!("CARGO_MANIFEST_DIR"), "/../../bv-registry/tools");
652        let Ok(read) = std::fs::read_dir(registry) else {
653            return;
654        };
655        for entry in read {
656            let tool_dir = entry.unwrap().path();
657            if !tool_dir.is_dir() {
658                continue;
659            }
660            for version_entry in std::fs::read_dir(&tool_dir).unwrap() {
661                let path = version_entry.unwrap().path();
662                if path.extension().is_some_and(|e| e == "toml") {
663                    let s = std::fs::read_to_string(&path)
664                        .unwrap_or_else(|_| panic!("failed to read {}", path.display()));
665                    Manifest::from_toml_str(&s)
666                        .unwrap_or_else(|e| panic!("{}: {e}", path.display()));
667                }
668            }
669        }
670    }
671}