biors 0.40.0

Command-line tools for bio-rs biological AI model input workflows.
use super::PaddingArg;
use crate::errors::CliError;
use crate::input::open_fasta_input;
use crate::output::print_success;
use biors_core::{
    fasta::parse_fasta_records_reader,
    model_input::{validate_model_input_policy, ModelInputPolicy},
    workflow::{
        prepare_protein_model_input_workflow_with_invocation, SequenceWorkflowInvocation,
        SequenceWorkflowOutput,
    },
};
use std::path::{Path, PathBuf};

pub(crate) fn run_workflow(
    max_length: usize,
    pad_token_id: u8,
    padding: PaddingArg,
    path: PathBuf,
) -> Result<(), CliError> {
    let output = workflow_output("biors workflow", max_length, pad_token_id, padding, path)?;
    let input_hash = output.provenance.input_hash.clone();
    print_success(Some(input_hash), output)
}

pub(crate) fn workflow_output(
    command: &'static str,
    max_length: usize,
    pad_token_id: u8,
    padding: PaddingArg,
    path: PathBuf,
) -> Result<SequenceWorkflowOutput, CliError> {
    let policy = ModelInputPolicy {
        max_length,
        pad_token_id,
        padding: padding.into(),
    };
    validate_model_input_policy(&policy)?;
    let reader = open_fasta_input(&path)?;
    let input = parse_fasta_records_reader(reader)
        .map_err(|error| CliError::from_fasta_read(path.clone(), error))?;
    let invocation = workflow_invocation(command, max_length, pad_token_id, padding, &path);
    prepare_protein_model_input_workflow_with_invocation(
        input.input_hash,
        &input.records,
        policy,
        invocation,
    )
    .map_err(CliError::from)
}

fn workflow_invocation(
    command: &'static str,
    max_length: usize,
    pad_token_id: u8,
    padding: PaddingArg,
    path: &Path,
) -> SequenceWorkflowInvocation {
    SequenceWorkflowInvocation {
        command: command.to_string(),
        arguments: vec![
            "--max-length".to_string(),
            max_length.to_string(),
            "--pad-token-id".to_string(),
            pad_token_id.to_string(),
            "--padding".to_string(),
            padding_arg_value(padding).to_string(),
            path.to_string_lossy().into_owned(),
        ],
    }
}

fn padding_arg_value(padding: PaddingArg) -> &'static str {
    match padding {
        PaddingArg::FixedLength => "fixed_length",
        PaddingArg::NoPadding => "no_padding",
    }
}