gfatk/
extract.rs

1use std::path::PathBuf;
2
3use crate::gfa::gfa::GFAtk;
4use crate::load::{load_gfa, load_gfa_stdin};
5use crate::utils;
6use anyhow::{bail, Context, Result};
7use petgraph::graph::NodeIndex;
8
9/// Supply a sequence/segment ID from the GFA, and extract the GFA with all nodes connected to the input node.
10///
11/// For example:
12/// ```bash
13/// gfatk extract in.gfa -s 1 > out.gfa
14/// ```
15pub fn extract(matches: &clap::ArgMatches) -> Result<()> {
16    // read in path and parse gfa
17    let gfa_file = matches.get_one::<PathBuf>("GFA");
18    let sequence_ids = matches
19        .get_many::<usize>("sequence-ids")
20        .expect("errored by clap")
21        .copied()
22        .collect::<Vec<_>>();
23    let iterations = *matches
24        .get_one::<i32>("iterations")
25        .expect("defaulted by clap");
26
27    let gfa: GFAtk = match gfa_file {
28        Some(f) => {
29            let ext = f.extension();
30            match ext {
31                Some(e) => {
32                    if e == "gfa" {
33                        GFAtk(load_gfa(f)?)
34                    } else {
35                        bail!("Input is not a GFA.")
36                    }
37                }
38                None => bail!("Could not read file."),
39            }
40        }
41        None => match utils::is_stdin() {
42            true => GFAtk(load_gfa_stdin(std::io::stdin().lock())?),
43            false => bail!("No input from STDIN. Run `gfatk extract -h` for help."),
44        },
45    };
46
47    let (graph_indices, gfa_graph) = gfa.into_ungraph()?;
48
49    // get the node index of the target sequence ID.
50    let target_indices = sequence_ids
51        .iter()
52        .map(|e| graph_indices.seg_id_to_node_index(*e))
53        .collect::<Result<Vec<NodeIndex>>>();
54
55    let target_indices =
56        target_indices.context("One of your input segment ID's does not exist in the graph.")?;
57
58    let sequences_to_keep =
59        gfa_graph.recursive_search(sequence_ids, iterations, target_indices, graph_indices)?;
60
61    gfa.print_extract(sequences_to_keep);
62
63    Ok(())
64}