seqrepo 0.10.3

Port of (read-only) functionality of biocommons/seqrepo to Rust
Documentation
//! Command line interface to the `seqrepo` crate.

use clap::{arg, command, Args, Parser, Subcommand, ValueEnum};
use clap_verbosity_flag::{InfoLevel, Verbosity};
use textwrap::wrap;
use tracing::debug;

use seqrepo::{self, AliasDbRecord, Error, Interface, Query, SeqRepo};

/// Commonly used command line arguments.
#[derive(Parser, Debug)]
pub struct CommonArgs {
    /// Verbosity of the program
    #[clap(flatten)]
    pub verbose: Verbosity<InfoLevel>,

    /// Root directory
    #[arg(
        short,
        long,
        env = "SEQREPO_ROOT_DIR",
        default_value = "~/hgvs-rs-data/seqrepo-data"
    )]
    pub root_directory: String,
}

/// CLI parser based on clap.
#[derive(Debug, Parser)]
#[command(
    author,
    version,
    about = "SeqRepo access written in Rust",
    long_about = "(Read-only) access to SeqRepo data from Rust"
)]
struct Cli {
    /// Commonly used arguments
    #[command(flatten)]
    common: CommonArgs,

    /// The sub command to run
    #[command(subcommand)]
    command: Commands,
}

/// Enum supporting the parsing of top-level commands.
#[allow(clippy::large_enum_variant)]
#[derive(Debug, Subcommand)]
enum Commands {
    /// "export" sub command
    Export(ExportArgs),
}

/// Enum for selecting the namespace on the command line.
#[derive(ValueEnum, Debug, PartialEq, Eq, Clone, Copy)]
pub enum Namespace {
    Refseq,
    Ensembl,
    Lrg,
    Sha512t24u,
    Ga4gh,
}

impl From<Namespace> for seqrepo::Namespace {
    fn from(value: Namespace) -> Self {
        match value {
            Namespace::Refseq => seqrepo::Namespace::new("NCBI"),
            Namespace::Ensembl => seqrepo::Namespace::new("Ensembl"),
            Namespace::Lrg => seqrepo::Namespace::new("Lrg"),
            Namespace::Sha512t24u => seqrepo::Namespace::new(""),
            Namespace::Ga4gh => seqrepo::Namespace::new(""),
        }
    }
}

/// A pair of namespace and alias as read from the command line.
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct NamespacedAlias {
    pub namespace: Namespace,
    pub alias: String,
}

impl From<NamespacedAlias> for seqrepo::NamespacedAlias {
    fn from(value: NamespacedAlias) -> Self {
        seqrepo::NamespacedAlias {
            alias: match value.namespace {
                Namespace::Refseq | Namespace::Ensembl | Namespace::Lrg => value.alias,
                Namespace::Sha512t24u => format!("GS_{}", value.alias),
                Namespace::Ga4gh => format!("GS_{}", &value.alias[3..]),
            },
            namespace: value.namespace.into(),
        }
    }
}

/// Parsing of "export" subcommand
#[derive(Debug, Args)]
struct ExportArgs {
    /// The namespace to use.
    #[arg(short, long, value_enum, rename_all = "lower")]
    pub namespace: Option<Namespace>,
    /// The instance name to use.
    #[arg(short, long, default_value = "latest")]
    pub instance_name: String,
    /// The sequence aliases to query for.
    #[arg()]
    pub aliases: Vec<String>,
}

/// Implementation of "export" command.
fn main_export(common_args: &CommonArgs, args: &ExportArgs) -> Result<(), Error> {
    debug!("common_args = {:?}", &common_args);
    debug!("args = {:?}", &args);

    let seq_repo = SeqRepo::new(&common_args.root_directory, &args.instance_name)?;
    let alias_db = seq_repo.alias_db();

    let mut query = Query {
        namespace: args.namespace.as_ref().map(|namespace| (*namespace).into()),
        ..Default::default()
    };

    let mut group: Vec<AliasDbRecord> = Vec::new();

    fn print_and_clear_group(seq_repo: &SeqRepo, group: &mut Vec<AliasDbRecord>) {
        if !group.is_empty() {
            let seq = seq_repo
                .fetch_sequence(&seqrepo::AliasOrSeqId::SeqId(group[0].seqid.clone()))
                .unwrap();
            group.sort_by(|a, b| {
                let (a, b) = (&a.namespace, &b.namespace);
                a.value.partial_cmp(&b.value).unwrap()
            });
            let metas = group
                .iter()
                .map(|record| format!("{}:{}", *record.namespace, record.alias))
                .collect::<Vec<_>>();

            println!(">{}", metas.join(" "));
            for line in wrap(&seq, 100) {
                println!("{line}");
            }

            group.clear();
        }
    }

    let mut handle_record = |record: Result<AliasDbRecord, Error>| {
        let record = record.unwrap();
        if !group.is_empty() && group[0].seqid != record.seqid {
            print_and_clear_group(&seq_repo, &mut group);
        }
        group.push(record);
    };

    if args.aliases.is_empty() {
        alias_db.find(&query, &mut handle_record)?;
    } else {
        for alias in &args.aliases {
            query.alias = Some(alias.clone());
            alias_db.find(&query, &mut handle_record)?;
        }
    }

    print_and_clear_group(&seq_repo, &mut group);

    Ok(())
}

pub fn main() -> Result<(), Error> {
    let cli = Cli::parse();

    // Build a tracing subscriber according to the configuration in `cli.common`.
    let collector = tracing_subscriber::fmt()
        .with_target(false)
        .with_max_level(match cli.common.verbose.log_level() {
            Some(level) => match level {
                log::Level::Error => tracing::Level::ERROR,
                log::Level::Warn => tracing::Level::WARN,
                log::Level::Info => tracing::Level::INFO,
                log::Level::Debug => tracing::Level::DEBUG,
                log::Level::Trace => tracing::Level::TRACE,
            },
            None => tracing::Level::INFO,
        })
        .compact()
        .finish();

    tracing::subscriber::with_default(collector, || {
        match &cli.command {
            Commands::Export(args) => {
                main_export(&cli.common, args)?;
            }
        }

        Ok::<(), Error>(())
    })?;

    debug!("All done! Have a nice day.");

    Ok(())
}

#[cfg(test)]
mod test {
    use clap_verbosity_flag::Verbosity;

    use super::main_export;
    use crate::{CommonArgs, ExportArgs};

    #[test]
    fn run_cmd() -> Result<(), Error> {
        main_export(
            &CommonArgs {
                verbose: Verbosity::new(0, 0),
                root_directory: "tests/data/seqrepo".to_string(),
            },
            &ExportArgs {
                namespace: None,
                instance_name: "latest".to_string(),
                aliases: vec!["XR_001757199.1".to_string()],
            },
        )
    }
}

// <LICENSE>
// Copyright 2023 seqrepo-rs Contributors
// Copyright 2016 biocommons.seqrepo Contributors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// </LICENSE>