use anyhow::Result;
use clap::{Parser, Subcommand};
use tracing_subscriber::{fmt, prelude::*, EnvFilter};
use biovault::cli;
use cli::commands;
fn validate_example_name(s: &str) -> Result<String, String> {
let examples = cli::examples::list_examples();
if examples.contains(&s.to_string()) {
Ok(s.to_string())
} else {
Err(format!(
"Unknown example '{}'. Available examples: {}",
s,
examples.join(", ")
))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn validate_example_name_accepts_known_and_rejects_unknown() {
let list = cli::examples::list_examples();
if let Some(first) = list.first() {
assert!(validate_example_name(first).is_ok());
}
let err = validate_example_name("__definitely_not_real__").unwrap_err();
assert!(err.contains("Unknown example"));
}
}
#[derive(Parser)]
#[command(
name = "bv",
version,
about = "BioVault - A bioinformatics data management CLI",
long_about = None
)]
struct Cli {
#[command(subcommand)]
command: Commands,
#[arg(short, long, global = true, help = "Increase verbosity")]
verbose: bool,
#[arg(long, global = true, help = "Path to config file")]
config: Option<String>,
}
#[derive(Subcommand)]
enum Commands {
#[command(about = "Check for updates and install the latest version")]
Update,
#[command(about = "Initialize a new BioVault repository")]
Init {
#[arg(
help = "Email address for the vault configuration (optional, will detect from SYFTBOX_EMAIL)"
)]
email: Option<String>,
#[arg(short, long, help = "Automatically accept defaults (for testing)")]
quiet: bool,
},
#[command(about = "Show system information")]
Info,
#[command(about = "Check for required dependencies")]
Check,
#[command(about = "Setup environment for known systems (e.g., Google Colab)")]
Setup,
#[command(about = "Project management commands")]
Project {
#[command(subcommand)]
command: ProjectCommands,
},
#[command(about = "Run a project workflow with Nextflow")]
Run {
#[arg(help = "Path to project directory")]
project_folder: String,
#[arg(
help = "Participant source: local file path, Syft URL, or HTTP URL (with optional #fragment)"
)]
participant_source: String,
#[arg(long, help = "Use mock data if available")]
test: bool,
#[arg(long, help = "Auto-confirm file downloads")]
download: bool,
#[arg(long, help = "Show commands without executing")]
dry_run: bool,
#[arg(long, default_value = "true", help = "Run with Docker")]
with_docker: bool,
#[arg(long, help = "Nextflow work directory")]
work_dir: Option<String>,
#[arg(long, help = "Resume from previous run")]
resume: bool,
#[arg(long, help = "Template to use (default, snp, etc.)")]
template: Option<String>,
#[arg(long, help = "Custom results directory name")]
results_dir: Option<String>,
},
#[command(name = "sample-data", about = "Manage sample data")]
SampleData {
#[command(subcommand)]
command: SampleDataCommands,
},
#[command(about = "Manage participants")]
Participant {
#[command(subcommand)]
command: ParticipantCommands,
},
#[command(about = "Manage biobank data publishing")]
Biobank {
#[command(subcommand)]
command: BiobankCommands,
},
#[command(about = "Manage BioVault configuration")]
Config {
#[command(subcommand)]
command: Option<ConfigCommands>,
},
#[command(about = "FASTQ file operations")]
Fastq {
#[command(subcommand)]
command: FastqCommands,
},
#[command(about = "Submit a project to another biobank via SyftBox")]
Submit {
#[arg(help = "Path to project directory (use '.' for current directory)")]
project_path: String,
#[arg(
help = "Destination: either a datasite email (e.g., user@domain.com) or full Syft URL (e.g., syft://user@domain.com/public/biovault/participants.yaml#participants.ID)"
)]
destination: String,
#[arg(long, help = "Skip interactive prompts, use defaults")]
non_interactive: bool,
#[arg(
long,
help = "Force resubmission even if project was already submitted"
)]
force: bool,
},
#[command(about = "Clean up stale database locks")]
Cleanup {
#[arg(long, help = "Clean all locks in all virtualenvs")]
all: bool,
},
#[command(about = "View and manage inbox messages")]
Inbox {
#[arg(short = 'i', long, help = "Interactive mode (default)")]
interactive: bool,
#[arg(long, help = "Plain, non-interactive list output")]
plain: bool,
#[arg(short = 's', long, help = "Show sent messages")]
sent: bool,
#[arg(short = 'a', long, help = "Show all messages (including deleted)")]
all: bool,
#[arg(short = 'u', long, help = "Show only unread messages")]
unread: bool,
#[arg(short = 'p', long, help = "Show project submissions")]
projects: bool,
#[arg(
short = 't',
long,
help = "Filter by message type (text/project/request)"
)]
message_type: Option<String>,
#[arg(short = 'f', long, help = "Filter by sender")]
from: Option<String>,
#[arg(long, help = "Search messages by content")]
search: Option<String>,
},
#[command(about = "Manage messages via SyftBox RPC")]
Message {
#[command(subcommand)]
command: MessageCommands,
},
#[command(about = "Sample sheet operations")]
Samplesheet {
#[command(subcommand)]
command: SamplesheetCommands,
},
#[command(about = "Manage the BioVault daemon for automatic message processing")]
Daemon {
#[command(subcommand)]
command: DaemonCommands,
},
#[command(
name = "hard-reset",
about = "Delete all BioVault data and configuration (DESTRUCTIVE)"
)]
HardReset {
#[arg(long, help = "Skip confirmation prompts (use with caution)")]
ignore_warning: bool,
},
}
#[derive(Subcommand)]
enum DaemonCommands {
#[command(about = "Start the BioVault daemon")]
Start {
#[arg(long, help = "Run daemon in foreground (no background)")]
foreground: bool,
},
#[command(about = "Stop the running daemon")]
Stop,
#[command(about = "Restart the daemon (stop if running, then start)")]
Restart {
#[arg(long, help = "Run daemon in foreground after restart")]
foreground: bool,
},
#[command(about = "Check daemon status")]
Status,
#[command(about = "View daemon logs")]
Logs {
#[arg(short, long, help = "Follow log output (tail -f)")]
follow: bool,
#[arg(short, long, help = "Number of lines to show (default: 50)")]
lines: Option<usize>,
},
#[command(about = "Install daemon as a systemd service (Linux only)")]
Install,
#[command(about = "Uninstall daemon systemd service (Linux only)")]
Uninstall,
}
#[derive(Subcommand)]
enum ProjectCommands {
#[command(about = "Create a new project")]
Create {
#[arg(long, help = "Project name")]
name: Option<String>,
#[arg(long, help = "Folder path (defaults to ./{name})")]
folder: Option<String>,
#[arg(long, value_parser = validate_example_name, help = "Use example template (use 'bv project examples' to list available)")]
example: Option<String>,
},
#[command(about = "List available example templates")]
Examples,
}
#[derive(Subcommand)]
enum SampleDataCommands {
#[command(about = "Fetch sample data")]
Fetch {
#[arg(
value_delimiter = ',',
help = "Participant IDs to fetch (comma-separated)"
)]
participant_ids: Option<Vec<String>>,
#[arg(long, help = "Fetch all available sample data")]
all: bool,
},
#[command(about = "List available sample data")]
List,
}
#[derive(Subcommand)]
enum ParticipantCommands {
#[command(about = "Add a new participant")]
Add {
#[arg(long, help = "Participant ID")]
id: Option<String>,
#[arg(long, help = "Aligned file path (.cram, .bam, or .sam)")]
aligned: Option<String>,
#[arg(
long,
help = "Template type (default or snp)",
default_value = "default"
)]
template: Option<String>,
#[arg(long, help = "SNP file path (for SNP template)")]
snp: Option<String>,
#[arg(long, help = "Reference genome file path (.fa or .fasta)")]
reference: Option<String>,
#[arg(long, help = "Reference version (GRCh38 or GRCh37)")]
ref_version: Option<String>,
#[arg(long, help = "Skip interactive prompts, use defaults")]
non_interactive: bool,
},
#[command(about = "List all participants")]
List,
#[command(about = "Delete a participant")]
Delete {
#[arg(help = "Participant ID to delete")]
id: String,
},
#[command(about = "Validate participant files")]
Validate {
#[arg(help = "Participant ID to validate (validates all if not specified)")]
id: Option<String>,
},
}
#[derive(Subcommand)]
enum BiobankCommands {
#[command(about = "List biobanks in SyftBox")]
List,
#[command(about = "Publish participants to SyftBox")]
Publish {
#[arg(long, help = "Participant ID to publish")]
participant_id: Option<String>,
#[arg(long, help = "Publish all participants")]
all: bool,
#[arg(
long,
help = "HTTP relay servers (defaults to syftbox.net)",
value_delimiter = ','
)]
http_relay_servers: Option<Vec<String>>,
},
#[command(about = "Unpublish participants from SyftBox")]
Unpublish {
#[arg(long, help = "Participant ID to unpublish")]
participant_id: Option<String>,
#[arg(long, help = "Unpublish all participants")]
all: bool,
},
}
#[derive(Subcommand)]
enum ConfigCommands {
#[command(about = "Set email address")]
Email {
#[arg(help = "Email address")]
email: String,
},
#[command(about = "Set SyftBox config path")]
Syftbox {
#[arg(help = "Path to SyftBox config.json (omit to use default)")]
path: Option<String>,
},
}
#[derive(Subcommand)]
enum FastqCommands {
#[command(about = "Combine multiple FASTQ files into one")]
Combine {
#[arg(help = "Input folder containing FASTQ files")]
input_folder: String,
#[arg(help = "Output file path")]
output_file: String,
#[arg(long, help = "Validate files before combining")]
validate: bool,
#[arg(long, help = "Skip validation prompt and use default")]
no_prompt: bool,
#[arg(
long,
default_value = "tsv",
help = "Stats output format (tsv, yaml, json)"
)]
stats_format: String,
},
}
#[derive(Subcommand)]
enum SamplesheetCommands {
#[command(about = "Create a sample sheet CSV from a folder of files")]
Create {
#[arg(help = "Input directory containing files")]
input_dir: String,
#[arg(help = "Output CSV file path")]
output_file: String,
#[arg(
long = "file_filter",
help = "File pattern filter (e.g., *.txt, default: all files)"
)]
file_filter: Option<String>,
#[arg(
long = "extract_cols",
help = "Pattern for extracting fields from filenames (e.g., {participant_id}_X_X_GSAv3-DTC_GRCh38-{date}.txt)"
)]
extract_cols: Option<String>,
#[arg(
long = "ignore",
help = "Add files even if they don't match the extraction pattern"
)]
ignore: bool,
},
}
#[derive(Subcommand)]
enum MessageCommands {
#[command(about = "Send a message to another datasite")]
Send {
#[arg(help = "Recipient email address")]
recipient: String,
#[arg(help = "Message content")]
message: String,
#[arg(short = 's', long = "subject", help = "Optional message subject")]
subject: Option<String>,
},
#[command(about = "Reply to a message")]
Reply {
#[arg(help = "Message ID to reply to")]
message_id: String,
#[arg(help = "Reply content")]
body: String,
},
#[command(about = "Read a specific message")]
Read {
#[arg(help = "Message ID to read")]
message_id: String,
},
#[command(about = "Delete a message")]
Delete {
#[arg(help = "Message ID to delete")]
message_id: String,
},
#[command(about = "List messages")]
List {
#[arg(short = 'u', long = "unread", help = "Show only unread messages")]
unread: bool,
#[arg(short = 's', long = "sent", help = "Show sent messages")]
sent: bool,
#[arg(short = 'p', long = "projects", help = "Show only project messages")]
projects: bool,
},
#[command(about = "View a message thread")]
Thread {
#[arg(help = "Thread ID to view")]
thread_id: String,
},
#[command(about = "Sync messages (check for new and update ACKs)")]
Sync,
#[command(about = "Process a project message (run test/real)")]
Process {
#[arg(help = "Message ID of the project to process")]
message_id: String,
#[arg(long, help = "Run with test data", conflicts_with = "real")]
test: bool,
#[arg(long, help = "Run with real data", conflicts_with = "test")]
real: bool,
#[arg(long, help = "Participant to use (defaults to first available)")]
participant: Option<String>,
#[arg(long, help = "Approve after successful run")]
approve: bool,
#[arg(long, help = "Non-interactive mode (skip prompts)")]
non_interactive: bool,
},
#[command(about = "Archive a project message (revoke write permissions)")]
Archive {
#[arg(help = "Message ID to archive")]
message_id: String,
},
}
#[tokio::main]
async fn main() -> Result<()> {
let cli = Cli::parse();
let filter_level = if cli.verbose { "debug" } else { "info" };
tracing_subscriber::registry()
.with(fmt::layer())
.with(EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(filter_level)))
.init();
let _ = commands::update::check_and_notify_random().await;
let _ = cli::upgrade::check_and_upgrade();
match cli.command {
Commands::Update => {
commands::update::execute().await?;
}
Commands::Init { email, quiet } => {
commands::init::execute(email.as_deref(), quiet).await?;
}
Commands::Info => {
commands::info::execute().await?;
}
Commands::Check => {
commands::check::execute().await?;
}
Commands::Setup => {
commands::setup::execute().await?;
}
Commands::Project { command } => match command {
ProjectCommands::Create {
name,
folder,
example,
} => {
commands::project::create(name, folder, example).await?;
}
ProjectCommands::Examples => {
commands::project::list_examples()?;
}
},
Commands::Run {
project_folder,
participant_source,
test,
download,
dry_run,
with_docker,
work_dir,
resume,
template,
results_dir,
} => {
commands::run::execute(commands::run::RunParams {
project_folder,
participant_source,
test,
download,
dry_run,
with_docker,
work_dir,
resume,
template,
results_dir,
})
.await?;
}
Commands::SampleData { command } => match command {
SampleDataCommands::Fetch {
participant_ids,
all,
} => {
commands::sample_data::fetch(participant_ids, all, false).await?;
}
SampleDataCommands::List => {
commands::sample_data::list().await?;
}
},
Commands::Participant { command } => match command {
ParticipantCommands::Add {
id,
aligned,
template,
snp,
reference,
ref_version,
non_interactive,
} => {
commands::participant::add(
id,
aligned,
template,
snp,
reference,
ref_version,
non_interactive,
)
.await?;
}
ParticipantCommands::List => {
commands::participant::list().await?;
}
ParticipantCommands::Delete { id } => {
commands::participant::delete(id).await?;
}
ParticipantCommands::Validate { id } => {
commands::participant::validate(id).await?;
}
},
Commands::Biobank { command } => match command {
BiobankCommands::List => {
commands::biobank::list(None).await?;
}
BiobankCommands::Publish {
participant_id,
all,
http_relay_servers,
} => {
commands::biobank::publish(participant_id, all, http_relay_servers).await?;
}
BiobankCommands::Unpublish {
participant_id,
all,
} => {
commands::biobank::unpublish(participant_id, all).await?;
}
},
Commands::Config { command } => {
if let Some(cmd) = command {
match cmd {
ConfigCommands::Email { email } => {
commands::config_cmd::set_email(email).await?;
}
ConfigCommands::Syftbox { path } => {
commands::config_cmd::set_syftbox(path).await?;
}
}
} else {
commands::config_cmd::show().await?;
}
}
Commands::Fastq { command } => match command {
FastqCommands::Combine {
input_folder,
output_file,
validate,
no_prompt,
stats_format,
} => {
let should_validate = if no_prompt { Some(validate) } else { None };
commands::fastq::combine(
input_folder,
output_file,
should_validate,
Some(stats_format),
)
.await?;
}
},
Commands::Submit {
project_path,
destination,
non_interactive,
force,
} => {
commands::submit::submit(project_path, destination, non_interactive, force).await?;
}
Commands::Cleanup { all } => {
let config = biovault::config::Config::load()?;
commands::messages::cleanup_locks(&config, all)?;
}
Commands::Inbox {
interactive,
plain,
sent,
all,
unread,
projects,
message_type,
from,
search,
} => {
let config = biovault::config::Config::load()?;
if plain && !interactive {
let filters = commands::inbox::ListFilters {
sent,
all,
unread,
projects,
message_type,
from,
search,
};
commands::inbox::list(&config, filters)?;
} else {
commands::inbox::interactive(&config, None).await?;
}
}
Commands::Message { command } => match command {
MessageCommands::Send {
recipient,
message,
subject,
} => {
let config = biovault::config::Config::load()?;
commands::messages::send_message(
&config,
&recipient,
&message,
subject.as_deref(),
)?;
}
MessageCommands::Reply { message_id, body } => {
let config = biovault::config::Config::load()?;
commands::messages::reply_message(&config, &message_id, &body)?;
}
MessageCommands::Read { message_id } => {
let config = biovault::config::Config::load()?;
commands::messages::read_message(&config, &message_id).await?;
}
MessageCommands::Delete { message_id } => {
let config = biovault::config::Config::load()?;
commands::messages::delete_message(&config, &message_id)?;
}
MessageCommands::List {
unread,
sent,
projects,
} => {
let config = biovault::config::Config::load()?;
commands::messages::list_messages(&config, unread, sent, projects)?;
}
MessageCommands::Thread { thread_id } => {
let config = biovault::config::Config::load()?;
commands::messages::view_thread(&config, &thread_id)?;
}
MessageCommands::Sync => {
let config = biovault::config::Config::load()?;
commands::messages::sync_messages(&config)?;
}
MessageCommands::Process {
message_id,
test,
real,
participant,
approve,
non_interactive,
} => {
let config = biovault::config::Config::load()?;
commands::messages::process_project_message(
&config,
&message_id,
test,
real,
participant,
approve,
non_interactive,
)
.await?;
}
MessageCommands::Archive { message_id } => {
let config = biovault::config::Config::load()?;
commands::messages::archive_message(&config, &message_id)?;
}
},
Commands::Samplesheet { command } => match command {
SamplesheetCommands::Create {
input_dir,
output_file,
file_filter,
extract_cols,
ignore,
} => {
commands::samplesheet::create(
input_dir,
output_file,
file_filter,
extract_cols,
ignore,
)
.await?;
}
},
Commands::Daemon { command } => {
let config = if let Ok(config_json) = std::env::var("BV_DAEMON_CONFIG") {
serde_json::from_str(&config_json)?
} else {
biovault::config::Config::load()?
};
match command {
DaemonCommands::Start { foreground } => {
commands::daemon::start(&config, foreground).await?;
}
DaemonCommands::Stop => {
commands::daemon::stop(&config).await?;
}
DaemonCommands::Restart { foreground } => {
let _ = commands::daemon::stop(&config).await;
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
commands::daemon::start(&config, foreground).await?;
}
DaemonCommands::Status => {
commands::daemon::service_status(&config).await?;
}
DaemonCommands::Logs { follow, lines } => {
commands::daemon::logs(&config, follow, lines).await?;
}
DaemonCommands::Install => {
commands::daemon::install_service(&config).await?;
}
DaemonCommands::Uninstall => {
commands::daemon::uninstall_service(&config).await?;
}
}
}
Commands::HardReset { ignore_warning } => {
commands::hard_reset::execute(ignore_warning).await?;
}
}
Ok(())
}