bv_core/
manifest.rs

1use std::collections::BTreeMap;
2use std::fmt;
3use std::path::PathBuf;
4use std::str::FromStr;
5
6use serde::{Deserialize, Serialize};
7
8use bv_types::{Cardinality, TypeRef};
9
10use crate::error::{BvError, Result};
11
12/// Quality and governance tier for a tool in the registry.
13#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
14#[serde(rename_all = "lowercase")]
15pub enum Tier {
16    /// Typed I/O complete, conformance tests pass, from a recognized publisher, actively maintained.
17    Core,
18    /// Typed I/O present (may be partial), basic checks pass.
19    #[default]
20    Community,
21    /// Basic checks pass; may lack typed I/O. Hidden from default search results.
22    Experimental,
23}
24
25impl Tier {
26    pub fn as_str(&self) -> &'static str {
27        match self {
28            Tier::Core => "core",
29            Tier::Community => "community",
30            Tier::Experimental => "experimental",
31        }
32    }
33}
34
35impl fmt::Display for Tier {
36    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
37        f.write_str(self.as_str())
38    }
39}
40
41/// Structured CUDA version with ordering (`12.1 < 12.4 < 13.0`).
42#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
43pub struct CudaVersion {
44    pub major: u32,
45    pub minor: u32,
46}
47
48impl fmt::Display for CudaVersion {
49    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
50        write!(f, "{}.{}", self.major, self.minor)
51    }
52}
53
54impl FromStr for CudaVersion {
55    type Err = String;
56
57    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
58        let (maj, min) = s
59            .split_once('.')
60            .ok_or_else(|| format!("expected 'major.minor', got '{s}'"))?;
61        Ok(CudaVersion {
62            major: maj
63                .parse()
64                .map_err(|_| format!("invalid major version '{maj}'"))?,
65            minor: min
66                .parse()
67                .map_err(|_| format!("invalid minor version '{min}'"))?,
68        })
69    }
70}
71
72impl TryFrom<String> for CudaVersion {
73    type Error = String;
74    fn try_from(s: String) -> std::result::Result<Self, Self::Error> {
75        s.parse()
76    }
77}
78
79impl From<CudaVersion> for String {
80    fn from(v: CudaVersion) -> String {
81        v.to_string()
82    }
83}
84
85impl Serialize for CudaVersion {
86    fn serialize<S: serde::Serializer>(&self, s: S) -> std::result::Result<S::Ok, S::Error> {
87        s.serialize_str(&self.to_string())
88    }
89}
90
91impl<'de> Deserialize<'de> for CudaVersion {
92    fn deserialize<D: serde::Deserializer<'de>>(d: D) -> std::result::Result<Self, D::Error> {
93        let s = String::deserialize(d)?;
94        s.parse().map_err(serde::de::Error::custom)
95    }
96}
97
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct GpuSpec {
100    pub required: bool,
101    pub min_vram_gb: Option<u32>,
102    pub cuda_version: Option<CudaVersion>,
103}
104
105#[derive(Debug, Clone, Serialize, Deserialize)]
106pub struct HardwareSpec {
107    pub gpu: Option<GpuSpec>,
108    pub cpu_cores: Option<u32>,
109    pub ram_gb: Option<f64>,
110    pub disk_gb: Option<f64>,
111}
112
113impl HardwareSpec {
114    /// Check this manifest's requirements against the host's detected hardware.
115    /// Returns every requirement that is not satisfied.
116    pub fn check_against(
117        &self,
118        detected: &crate::hardware::DetectedHardware,
119    ) -> Vec<crate::hardware::HardwareMismatch> {
120        use crate::hardware::HardwareMismatch;
121        let mut out = Vec::new();
122
123        if let Some(gpu_req) = &self.gpu
124            && gpu_req.required
125        {
126            if detected.gpus.is_empty() {
127                out.push(HardwareMismatch::NoGpu);
128            } else {
129                if let Some(min_vram) = gpu_req.min_vram_gb {
130                    let best_vram_mb = detected.gpus.iter().map(|g| g.vram_mb).max().unwrap_or(0);
131                    // Round to nearest GiB instead of floor: nvidia-smi
132                    // typically reports just-under marketing capacity (e.g.
133                    // 24268 MiB on a "24 GB" RTX 3090). flooring made
134                    // min_vram_gb=24 spuriously fail on real hardware.
135                    let best_vram_gb = ((best_vram_mb as f64) / 1024.0).round() as u32;
136                    if best_vram_gb < min_vram {
137                        out.push(HardwareMismatch::InsufficientVram {
138                            required_gb: min_vram,
139                            available_gb: best_vram_gb,
140                        });
141                    }
142                }
143                if let Some(min_cuda) = &gpu_req.cuda_version {
144                    let best_cuda = detected
145                        .gpus
146                        .iter()
147                        .filter_map(|g| g.cuda_version.as_ref())
148                        .max();
149                    match best_cuda {
150                        None => out.push(HardwareMismatch::NoCuda {
151                            required: min_cuda.clone(),
152                        }),
153                        Some(avail) if avail < min_cuda => {
154                            out.push(HardwareMismatch::CudaTooOld {
155                                required: min_cuda.clone(),
156                                available: avail.clone(),
157                            });
158                        }
159                        _ => {}
160                    }
161                }
162            }
163        }
164
165        if let Some(min_ram) = self.ram_gb {
166            let avail = detected.ram_gb();
167            if avail < min_ram {
168                out.push(HardwareMismatch::InsufficientRam {
169                    required_gb: min_ram,
170                    available_gb: avail,
171                });
172            }
173        }
174
175        if let Some(min_disk) = self.disk_gb {
176            let avail = detected.disk_free_gb();
177            if avail < min_disk {
178                out.push(HardwareMismatch::InsufficientDisk {
179                    required_gb: min_disk,
180                    available_gb: avail,
181                });
182            }
183        }
184
185        out
186    }
187}
188
189#[derive(Debug, Clone, Serialize, Deserialize)]
190pub struct ImageSpec {
191    /// Runtime backend, e.g. `"docker"` or `"apptainer"`.
192    pub backend: String,
193    /// Canonical OCI reference, e.g. `"biocontainers/bwa:0.7.17"`.
194    pub reference: String,
195    /// Optional pinned digest for reproducibility.
196    pub digest: Option<String>,
197}
198
199/// Layer descriptor embedded in a factored manifest.
200/// Mirrors `lockfile::LayerDescriptor` but lives in the registry TOML.
201#[derive(Debug, Clone, Serialize, Deserialize)]
202pub struct FactoredLayerSpec {
203    pub digest: String,
204    pub size: u64,
205    pub media_type: String,
206    #[serde(default, skip_serializing_if = "Option::is_none")]
207    pub conda_package: Option<FactoredCondaPin>,
208}
209
210#[derive(Debug, Clone, Serialize, Deserialize)]
211pub struct FactoredCondaPin {
212    pub name: String,
213    pub version: String,
214    pub build: String,
215    pub channel: String,
216    pub sha256: String,
217}
218
219/// Factored OCI build metadata embedded in the registry manifest.
220///
221/// When `[tool.factored]` is present, clients that support factored images
222/// can pull at layer granularity instead of pulling the monolithic squashed
223/// image in `[tool.image]`. The `[tool.image]` section remains required as
224/// the fallback path for older clients.
225#[derive(Debug, Clone, Serialize, Deserialize)]
226pub struct FactoredSpec {
227    /// Path to the bv-builder spec YAML relative to the registry root.
228    pub spec_path: String,
229    /// Canonical OCI reference for the factored image.
230    pub image_reference: String,
231    /// Pinned digest of the factored image manifest.
232    pub image_digest: String,
233    /// OCI referrer digest of the repodata snapshot artifact.
234    #[serde(default, skip_serializing_if = "Option::is_none")]
235    pub repodata_snapshot_digest: Option<String>,
236    /// Pre-computed per-layer descriptors, in manifest order.
237    #[serde(default, skip_serializing_if = "Vec::is_empty")]
238    pub layers: Vec<FactoredLayerSpec>,
239}
240
241#[derive(Debug, Clone, Serialize, Deserialize)]
242pub struct ReferenceDataSpec {
243    pub id: String,
244    pub version: String,
245    pub required: bool,
246    /// Container path where the dataset directory is mounted read-only.
247    #[serde(default, skip_serializing_if = "Option::is_none")]
248    pub mount_path: Option<String>,
249    /// Approximate compressed size in bytes.
250    #[serde(default, skip_serializing_if = "Option::is_none")]
251    pub size_bytes: Option<u64>,
252}
253
254/// Typed I/O port declaration for a tool.
255#[derive(Debug, Clone, Serialize, Deserialize)]
256pub struct IoSpec {
257    pub name: String,
258    /// Type reference, e.g. `"fasta"` or `"fasta[protein]"`.
259    #[serde(rename = "type")]
260    pub r#type: TypeRef,
261    /// How many values this port accepts.
262    #[serde(default)]
263    pub cardinality: Cardinality,
264    #[serde(default = "default_required")]
265    pub required: bool,
266    /// Absolute path inside the container where this value is mounted.
267    #[serde(default, skip_serializing_if = "Option::is_none")]
268    pub mount: Option<PathBuf>,
269    #[serde(default, skip_serializing_if = "Option::is_none")]
270    pub description: Option<String>,
271    #[serde(default, skip_serializing_if = "Option::is_none")]
272    pub default: Option<String>,
273}
274
275fn default_required() -> bool {
276    false
277}
278
279#[derive(Debug, Clone, Serialize, Deserialize)]
280pub struct EntrypointSpec {
281    pub command: String,
282    pub args_template: Option<String>,
283    #[serde(default)]
284    pub env: BTreeMap<String, String>,
285}
286
287/// Binary names that the tool's container exposes on PATH.
288///
289/// Omitting this block defaults to `exposed = [entrypoint.command]` for
290/// single-binary tools that do not need to declare anything extra.
291#[derive(Debug, Clone, Serialize, Deserialize)]
292pub struct BinariesSpec {
293    pub exposed: Vec<String>,
294}
295
296/// Per-tool overrides for `bv conformance`'s smoke check.
297///
298/// The smoke check tries a small set of probe args (`--version`, `-version`,
299/// `--help`, `-h`, `-v`, `version`) against every binary the tool exposes,
300/// and counts a binary as alive if any probe produces output or exits 0.
301/// Most tools don't need a `[tool.smoke]` block at all; this is the escape
302/// hatch for the unusual cases.
303#[derive(Debug, Clone, Default, Serialize, Deserialize)]
304pub struct SmokeSpec {
305    /// Override probe args for specific binaries, e.g. `{ "blastn" = "-version" }`.
306    /// Each value is a single command-line argument (or empty string for "run
307    /// the binary with no args"). When set, only this probe is tried for that
308    /// binary; the default list is bypassed.
309    #[serde(default, skip_serializing_if = "std::collections::BTreeMap::is_empty")]
310    pub probes: std::collections::BTreeMap<String, String>,
311    /// Binaries to skip entirely (daemons, "no non-destructive invocation"
312    /// tools, etc.). Listed binaries still appear in `[tool.binaries]` and
313    /// get shims; conformance just doesn't probe them.
314    #[serde(default, skip_serializing_if = "Vec::is_empty")]
315    pub skip: Vec<String>,
316}
317
318#[allow(dead_code)]
319fn default_timeout() -> u64 {
320    60
321}
322
323/// Optional Sigstore/cosign signature metadata.
324#[derive(Debug, Clone, Serialize, Deserialize)]
325pub struct SignatureSpec {
326    /// `"sigstore"` to verify the OCI image signature with cosign.
327    #[serde(default, skip_serializing_if = "Option::is_none")]
328    pub image: Option<String>,
329    /// `"sigstore"` to verify the manifest's commit signature.
330    #[serde(default, skip_serializing_if = "Option::is_none")]
331    pub manifest: Option<String>,
332}
333
334#[derive(Debug, Clone, Serialize, Deserialize)]
335pub struct ToolManifest {
336    pub id: String,
337    pub version: String,
338    pub description: Option<String>,
339    pub homepage: Option<String>,
340    pub license: Option<String>,
341    /// Governance tier. Defaults to `community` for new submissions.
342    #[serde(default)]
343    pub tier: Tier,
344    /// GitHub handles of maintainers, e.g. `"github:alice"`.
345    #[serde(default, skip_serializing_if = "Vec::is_empty")]
346    pub maintainers: Vec<String>,
347    /// Set to `true` when a tool is superseded or no longer maintained.
348    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
349    pub deprecated: bool,
350    pub image: ImageSpec,
351    pub hardware: HardwareSpec,
352    #[serde(default)]
353    pub reference_data: BTreeMap<String, ReferenceDataSpec>,
354    /// Typed inputs. Optional; manifests without this section parse unchanged.
355    #[serde(default)]
356    pub inputs: Vec<IoSpec>,
357    /// Typed outputs. Optional; manifests without this section parse unchanged.
358    #[serde(default)]
359    pub outputs: Vec<IoSpec>,
360    /// Default invocation. Required unless `[tool.subcommands]` is non-empty;
361    /// see `validate()`. Multi-script tools may omit this entirely.
362    #[serde(default, skip_serializing_if = "Option::is_none")]
363    pub entrypoint: Option<EntrypointSpec>,
364    /// Tool-namespaced launchers. Reachable as `bv run <toolid> <name> ...args`.
365    /// Each value is the literal argv prefix; user args are appended verbatim.
366    /// Unlike `[tool.binaries]`, names are not exposed on PATH or in the global
367    /// binary index, so generic names (`train`, `eval`) are safe.
368    #[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
369    pub subcommands: BTreeMap<String, Vec<String>>,
370    /// Container paths the tool writes to during normal execution and that
371    /// should therefore be bound to writable host directories. Critical on
372    /// apptainer (read-only SIF root), nice-to-have on docker (lets caches
373    /// outlive `docker rm`). Tool authors declare these; users can override
374    /// the host side via `[[cache]]` in `bv.toml`.
375    #[serde(default, skip_serializing_if = "Vec::is_empty")]
376    pub cache_paths: Vec<String>,
377    /// Binary names this tool exposes on PATH inside its container.
378    /// Omit for single-binary tools; defaults to `[entrypoint.command]`.
379    #[serde(default, skip_serializing_if = "Option::is_none")]
380    pub binaries: Option<BinariesSpec>,
381    /// Smoke-check overrides; consulted by `bv conformance` for unusual binaries.
382    #[serde(default, skip_serializing_if = "Option::is_none")]
383    pub smoke: Option<SmokeSpec>,
384    /// Sigstore / cosign signature declarations.
385    #[serde(default, skip_serializing_if = "Option::is_none")]
386    pub signatures: Option<SignatureSpec>,
387    /// Factored OCI build metadata. Present when the tool has been rebuilt
388    /// by `bv-builder`. Clients that understand factored images use this for
389    /// layer-granularity pulls; older clients fall back to `[tool.image]`.
390    #[serde(default, skip_serializing_if = "Option::is_none")]
391    pub factored: Option<FactoredSpec>,
392}
393
394impl ToolManifest {
395    pub fn has_typed_io(&self) -> bool {
396        !self.inputs.is_empty() || !self.outputs.is_empty()
397    }
398
399    /// Returns the effective list of binary names this tool exposes.
400    ///
401    /// When `[tool.binaries]` is absent, defaults to the entrypoint command's
402    /// basename. Multi-script tools without an entrypoint expose no binaries
403    /// (their subcommands stay namespaced under the tool id).
404    pub fn effective_binaries(&self) -> Vec<&str> {
405        if let Some(b) = &self.binaries {
406            return b.exposed.iter().map(|s| s.as_str()).collect();
407        }
408        let Some(ep) = &self.entrypoint else {
409            return vec![];
410        };
411        let cmd = &ep.command;
412        let name = cmd
413            .rfind('/')
414            .map(|i| &cmd[i + 1..])
415            .unwrap_or(cmd.as_str());
416        vec![name]
417    }
418}
419
420/// Top-level manifest, corresponding to a single `.toml` file in the registry.
421#[derive(Debug, Clone, Serialize, Deserialize)]
422pub struct Manifest {
423    pub tool: ToolManifest,
424}
425
426#[derive(Debug)]
427pub struct ValidationError {
428    pub field: String,
429    pub message: String,
430}
431
432impl fmt::Display for ValidationError {
433    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
434        write!(f, "{}: {}", self.field, self.message)
435    }
436}
437
438impl Manifest {
439    pub fn from_toml_str(s: &str) -> Result<Self> {
440        let m: Manifest = toml::from_str(s).map_err(|e| BvError::ManifestParse(e.to_string()))?;
441        m.validate_types()?;
442        if let Err(errs) = m.validate() {
443            let combined = errs
444                .iter()
445                .map(|e| e.to_string())
446                .collect::<Vec<_>>()
447                .join("; ");
448            return Err(BvError::ManifestParse(format!(
449                "manifest validation failed: {combined}"
450            )));
451        }
452        Ok(m)
453    }
454
455    pub fn to_toml_string(&self) -> Result<String> {
456        toml::to_string_pretty(self).map_err(|e| BvError::ManifestParse(e.to_string()))
457    }
458
459    /// Validates that all TypeRefs in inputs/outputs exist in the bv-types vocabulary.
460    fn validate_types(&self) -> Result<()> {
461        let t = &self.tool;
462        for (side, specs) in [("inputs", &t.inputs), ("outputs", &t.outputs)] {
463            for spec in specs {
464                let id = spec.r#type.base_id();
465                if bv_types::lookup(id).is_none() {
466                    let suggestion = bv_types::suggest(id)
467                        .map(|s| format!(", did you mean `{s}`?"))
468                        .unwrap_or_default();
469                    return Err(BvError::ManifestParse(format!(
470                        "tool.{side}[{}]: unknown type `{id}`{suggestion}",
471                        spec.name
472                    )));
473                }
474            }
475        }
476        Ok(())
477    }
478
479    /// Returns a list of validation errors, or `Ok(())` if the manifest is valid.
480    pub fn validate(&self) -> std::result::Result<(), Vec<ValidationError>> {
481        let mut errors = Vec::new();
482        let t = &self.tool;
483
484        if t.id.is_empty() {
485            errors.push(ValidationError {
486                field: "tool.id".into(),
487                message: "must not be empty".into(),
488            });
489        }
490        if t.version.is_empty() {
491            errors.push(ValidationError {
492                field: "tool.version".into(),
493                message: "must not be empty".into(),
494            });
495        }
496        if t.image.backend.is_empty() {
497            errors.push(ValidationError {
498                field: "tool.image.backend".into(),
499                message: "must not be empty".into(),
500            });
501        }
502        if t.image.reference.is_empty() {
503            errors.push(ValidationError {
504                field: "tool.image.reference".into(),
505                message: "must not be empty".into(),
506            });
507        }
508        match (&t.entrypoint, t.subcommands.is_empty()) {
509            (None, true) => errors.push(ValidationError {
510                field: "tool.entrypoint".into(),
511                message: "must declare either [tool.entrypoint] or [tool.subcommands]".into(),
512            }),
513            (Some(ep), _) if ep.command.is_empty() => errors.push(ValidationError {
514                field: "tool.entrypoint.command".into(),
515                message: "must not be empty".into(),
516            }),
517            _ => {}
518        }
519
520        for (name, cmd) in &t.subcommands {
521            if name.is_empty() {
522                errors.push(ValidationError {
523                    field: "tool.subcommands".into(),
524                    message: "subcommand name must not be empty".into(),
525                });
526                continue;
527            }
528            if name.starts_with('-') {
529                errors.push(ValidationError {
530                    field: format!("tool.subcommands.{name}"),
531                    message: "subcommand name must not start with '-'".into(),
532                });
533            }
534            if cmd.is_empty() {
535                errors.push(ValidationError {
536                    field: format!("tool.subcommands.{name}"),
537                    message: "command vector must not be empty".into(),
538                });
539            }
540        }
541
542        for spec in &t.inputs {
543            if let Some(mount) = &spec.mount
544                && !mount.is_absolute()
545            {
546                errors.push(ValidationError {
547                    field: format!("tool.inputs[{}].mount", spec.name),
548                    message: "must be an absolute path".into(),
549                });
550            }
551        }
552        for spec in &t.outputs {
553            if let Some(mount) = &spec.mount
554                && !mount.is_absolute()
555            {
556                errors.push(ValidationError {
557                    field: format!("tool.outputs[{}].mount", spec.name),
558                    message: "must be an absolute path".into(),
559                });
560            }
561        }
562
563        if let Some(binaries) = &t.binaries {
564            let mut seen = std::collections::HashSet::new();
565            for name in &binaries.exposed {
566                if !seen.insert(name.as_str()) {
567                    errors.push(ValidationError {
568                        field: "tool.binaries.exposed".into(),
569                        message: format!("duplicate binary name '{name}'"),
570                    });
571                }
572            }
573            if !binaries.exposed.is_empty()
574                && let Some(ep) = &t.entrypoint
575            {
576                let cmd = &ep.command;
577                let basename = cmd.rfind('/').map(|i| &cmd[i + 1..]).unwrap_or(cmd);
578                if !binaries.exposed.iter().any(|b| b == basename) {
579                    errors.push(ValidationError {
580                        field: "tool.binaries.exposed".into(),
581                        message: format!(
582                            "entrypoint command '{basename}' must be listed in exposed"
583                        ),
584                    });
585                }
586            }
587        }
588
589        if errors.is_empty() {
590            Ok(())
591        } else {
592            Err(errors)
593        }
594    }
595}
596
597#[cfg(test)]
598mod tests {
599    use super::*;
600
601    const SAMPLE: &str = r#"
602[tool]
603id = "bwa"
604version = "0.7.17"
605description = "BWA short-read aligner"
606homepage = "http://bio-bwa.sourceforge.net/"
607license = "GPL-3.0"
608
609[tool.image]
610backend = "docker"
611reference = "biocontainers/bwa:0.7.17--h5bf99c6_8"
612digest = "sha256:abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890ab"
613
614[tool.hardware]
615cpu_cores = 8
616ram_gb = 32.0
617disk_gb = 50.0
618
619[tool.hardware.gpu]
620required = false
621
622[[tool.inputs]]
623name = "reads_r1"
624type = "fastq"
625cardinality = "one"
626description = "Forward reads"
627
628[[tool.inputs]]
629name = "reads_r2"
630type = "fastq"
631cardinality = "optional"
632description = "Reverse reads (paired-end)"
633
634[[tool.outputs]]
635name = "alignment"
636type = "bam"
637description = "Aligned reads"
638
639[tool.entrypoint]
640command = "bwa"
641args_template = "mem -t {cpu_cores} {reference} {reads_r1} {reads_r2}"
642
643[tool.entrypoint.env]
644MALLOC_ARENA_MAX = "4"
645"#;
646
647    const SAMPLE_NO_IO: &str = r#"
648[tool]
649id = "mytool"
650version = "1.0.0"
651
652[tool.image]
653backend = "docker"
654reference = "example/mytool:1.0.0"
655
656[tool.hardware]
657
658[tool.entrypoint]
659command = "mytool"
660"#;
661
662    #[test]
663    fn round_trip() {
664        let manifest = Manifest::from_toml_str(SAMPLE).expect("parse failed");
665        assert_eq!(manifest.tool.id, "bwa");
666        assert_eq!(manifest.tool.version, "0.7.17");
667        assert_eq!(manifest.tool.image.backend, "docker");
668        assert_eq!(manifest.tool.inputs.len(), 2);
669        assert_eq!(manifest.tool.outputs.len(), 1);
670        assert_eq!(manifest.tool.inputs[0].cardinality, Cardinality::One);
671        assert_eq!(manifest.tool.inputs[1].cardinality, Cardinality::Optional);
672
673        let serialised = manifest.to_toml_string().expect("serialise failed");
674        let reparsed = Manifest::from_toml_str(&serialised).expect("reparse failed");
675        assert_eq!(reparsed.tool.id, manifest.tool.id);
676        assert_eq!(reparsed.tool.version, manifest.tool.version);
677    }
678
679    /// Regression: HashMap-backed fields produced non-deterministic TOML
680    /// output, breaking lockfile drift detection. Re-serializing the same
681    /// manifest must always yield identical bytes.
682    #[test]
683    fn to_toml_string_is_deterministic_with_subcommands() {
684        let s = r#"
685[tool]
686id = "multi"
687version = "1.0.0"
688
689[tool.image]
690backend = "docker"
691reference = "example/multi:1.0.0"
692
693[tool.hardware]
694
695[tool.entrypoint]
696command = "main"
697
698[tool.subcommands]
699zebra = ["script_z.py"]
700alpha = ["script_a.py"]
701mango = ["python", "-m", "scripts.mango"]
702beta = ["script_b.py"]
703"#;
704        let m = Manifest::from_toml_str(s).expect("parse");
705        let a = m.to_toml_string().unwrap();
706        // Re-serialize many times to make iteration-order luck unlikely.
707        for _ in 0..32 {
708            assert_eq!(a, m.to_toml_string().unwrap(), "non-deterministic output");
709        }
710        // And the keys must appear in lexicographic order (BTreeMap).
711        let alpha = a.find("alpha = ").unwrap();
712        let beta = a.find("beta = ").unwrap();
713        let mango = a.find("mango = ").unwrap();
714        let zebra = a.find("zebra = ").unwrap();
715        assert!(alpha < beta && beta < mango && mango < zebra);
716    }
717
718    #[test]
719    fn no_io_parses_unchanged() {
720        let m = Manifest::from_toml_str(SAMPLE_NO_IO).expect("parse failed");
721        assert!(m.tool.inputs.is_empty());
722        assert!(m.tool.outputs.is_empty());
723        assert!(!m.tool.has_typed_io());
724    }
725
726    #[test]
727    fn typeref_params_parsed() {
728        let s = r#"
729[tool]
730id = "t"
731version = "1.0.0"
732
733[tool.image]
734backend = "docker"
735reference = "example/t:1.0.0"
736
737[tool.hardware]
738
739[[tool.inputs]]
740name = "seqs"
741type = "fasta[protein]"
742cardinality = "one"
743
744[tool.entrypoint]
745command = "t"
746"#;
747        let m = Manifest::from_toml_str(s).unwrap();
748        assert_eq!(m.tool.inputs[0].r#type.params, vec!["protein"]);
749    }
750
751    #[test]
752    fn unknown_type_error() {
753        let s = r#"
754[tool]
755id = "t"
756version = "1.0.0"
757
758[tool.image]
759backend = "docker"
760reference = "example/t:1.0.0"
761
762[tool.hardware]
763
764[[tool.inputs]]
765name = "seqs"
766type = "protien_fasta"
767cardinality = "one"
768
769[tool.entrypoint]
770command = "t"
771"#;
772        let err = Manifest::from_toml_str(s).unwrap_err();
773        let msg = err.to_string();
774        assert!(msg.contains("unknown type"), "got: {msg}");
775    }
776
777    #[test]
778    fn cuda_version_ordering() {
779        let v12_1: CudaVersion = "12.1".parse().unwrap();
780        let v12_4: CudaVersion = "12.4".parse().unwrap();
781        let v13_0: CudaVersion = "13.0".parse().unwrap();
782        assert!(v12_1 < v12_4);
783        assert!(v12_4 < v13_0);
784        assert_eq!(v12_1, "12.1".parse::<CudaVersion>().unwrap());
785    }
786
787    #[test]
788    fn subcommands_only_parses() {
789        let s = r#"
790[tool]
791id = "genie2"
792version = "1.0.0"
793
794[tool.image]
795backend = "docker"
796reference = "ghcr.io/example/genie2:1.0.0"
797
798[tool.hardware]
799
800[tool.subcommands]
801train                = ["python", "genie/train.py"]
802sample_unconditional = ["python", "genie/sample_unconditional.py"]
803"#;
804        let m = Manifest::from_toml_str(s).unwrap();
805        assert!(m.tool.entrypoint.is_none());
806        assert_eq!(m.tool.subcommands.len(), 2);
807        assert_eq!(
808            m.tool.subcommands.get("train").unwrap(),
809            &vec!["python".to_string(), "genie/train.py".to_string()]
810        );
811        m.validate().expect("subcommand-only manifest is valid");
812        // No entrypoint and no [tool.binaries] => no exposed binaries.
813        assert!(m.tool.effective_binaries().is_empty());
814    }
815
816    #[test]
817    fn validate_requires_entrypoint_or_subcommands() {
818        let s = r#"
819[tool]
820id = "broken"
821version = "1.0.0"
822
823[tool.image]
824backend = "docker"
825reference = "example/broken:1.0.0"
826
827[tool.hardware]
828"#;
829        // from_toml_str now runs validate(); manifest with neither entrypoint
830        // nor subcommands must be rejected at parse time.
831        let err = Manifest::from_toml_str(s).unwrap_err();
832        assert!(
833            err.to_string().contains("tool.entrypoint"),
834            "expected entrypoint-or-subcommands error, got: {err}"
835        );
836    }
837
838    #[test]
839    fn validate_rejects_dash_prefixed_subcommand() {
840        let s = r#"
841[tool]
842id = "t"
843version = "1.0.0"
844
845[tool.image]
846backend = "docker"
847reference = "example/t:1.0.0"
848
849[tool.hardware]
850
851[tool.subcommands]
852"-bad" = ["python", "x.py"]
853"#;
854        let err = Manifest::from_toml_str(s).unwrap_err();
855        assert!(err.to_string().contains("-bad"), "got: {err}");
856    }
857
858    #[test]
859    fn subcommands_round_trip() {
860        let s = r#"
861[tool]
862id = "t"
863version = "1.0.0"
864
865[tool.image]
866backend = "docker"
867reference = "example/t:1.0.0"
868
869[tool.hardware]
870
871[tool.subcommands]
872go = ["python", "main.py"]
873"#;
874        let m = Manifest::from_toml_str(s).unwrap();
875        let serialised = m.to_toml_string().unwrap();
876        let reparsed = Manifest::from_toml_str(&serialised).unwrap();
877        assert_eq!(reparsed.tool.subcommands.len(), 1);
878    }
879
880    #[test]
881    fn validate_catches_empty_id() {
882        let mut manifest = Manifest::from_toml_str(SAMPLE).unwrap();
883        manifest.tool.id = String::new();
884        let errs = manifest.validate().unwrap_err();
885        assert!(errs.iter().any(|e| e.field == "tool.id"));
886    }
887
888    #[test]
889    fn registry_manifests_parse() {
890        let registry = concat!(env!("CARGO_MANIFEST_DIR"), "/../../bv-registry/tools");
891        let Ok(read) = std::fs::read_dir(registry) else {
892            return;
893        };
894        for entry in read {
895            let tool_dir = entry.unwrap().path();
896            if !tool_dir.is_dir() {
897                continue;
898            }
899            for version_entry in std::fs::read_dir(&tool_dir).unwrap() {
900                let path = version_entry.unwrap().path();
901                if path.extension().is_some_and(|e| e == "toml") {
902                    let s = std::fs::read_to_string(&path)
903                        .unwrap_or_else(|_| panic!("failed to read {}", path.display()));
904                    Manifest::from_toml_str(&s)
905                        .unwrap_or_else(|e| panic!("{}: {e}", path.display()));
906                }
907            }
908        }
909    }
910}
bv_core/manifest.rs

bv_core/
manifest.rs