1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
use std::{fs, path::PathBuf};
use anyhow::{bail, Context, Result};
use assembly_theory::{
assembly::{depth, index_search, ParallelMode},
bounds::Bound,
canonize::CanonizeMode,
kernels::KernelMode,
loader::parse_molfile_str,
memoize::MemoizeMode,
};
use clap::{Args, Parser};
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct Cli {
/// Path to .mol file to compute the assembly index for.
molpath: PathBuf,
/// Print molecule graph information, skipping assembly index calculation.
#[arg(long)]
molinfo: bool,
/// Calculate and print the molecule's assembly depth.
#[arg(long)]
depth: bool,
/// Print the assembly index, assembly depth, number of edge-disjoint
/// isomorphic subgraph pairs, and size of the search space. Note that the
/// search space size is nondeterministic owing to some `HashMap` details.
#[arg(long)]
verbose: bool,
/// Timeout duration in milliseconds after which search is stopped and the
/// best assembly index found so far is returned, or `None` if search is
/// run until the true assembly index is found.
#[arg(long)]
timeout: Option<u64>,
/// Algorithm for graph canonization.
#[arg(long, value_enum, default_value_t = CanonizeMode::TreeNauty)]
canonize: CanonizeMode,
/// Parallelization strategy for the search phase.
#[arg(long, value_enum, default_value_t = ParallelMode::DepthOne)]
parallel: ParallelMode,
/// Strategy for memoizing assembly states in the search phase.
#[arg(long, value_enum, default_value_t = MemoizeMode::CanonIndex)]
memoize: MemoizeMode,
/// Bounding strategies to apply in the search phase.
#[command(flatten)]
boundsgroup: Option<BoundsGroup>,
/// Strategy for performing graph kernelization during the search phase.
#[arg(long, value_enum, default_value_t = KernelMode::None)]
kernel: KernelMode,
}
#[derive(Args, Debug)]
#[group(required = false, multiple = false)]
struct BoundsGroup {
/// Do not use any bounding strategy during the search phase.
#[arg(long)]
no_bounds: bool,
/// Apply the specified bounding strategies during the search phase.
#[arg(long, num_args = 1..)]
bounds: Vec<Bound>,
}
fn main() -> Result<()> {
// Parse command line arguments.
let cli = Cli::parse();
// Load the .mol file as a molecule::Molecule.
let molfile = fs::read_to_string(&cli.molpath).context("Cannot read input file.")?;
let mol = parse_molfile_str(&molfile).context("Cannot parse molfile.")?;
if mol.is_malformed() {
bail!("Bad input! Molecule has self-loops or multi-edges.")
}
// If --molinfo is set, print molecule graph and exit.
if cli.molinfo {
println!("{}", mol.info());
return Ok(());
}
// If --depth is set, calculate and print assembly depth and exit.
if cli.depth {
println!("{}", depth(&mol));
return Ok(());
}
// Handle bounding strategy CLI arguments.
let boundlist: &[Bound] = match cli.boundsgroup {
// By default, use a combination of the integer and vector bounds.
None => &[Bound::Int, Bound::MatchableEdges],
// If --no-bounds is set, do not use any bounds.
Some(BoundsGroup {
no_bounds: true, ..
}) => &[],
// Otherwise, use the bounds that were specified.
Some(BoundsGroup {
no_bounds: false,
bounds,
}) => &bounds.clone(),
};
// Call index calculation with all the various options.
let (index, num_matches, states_searched) = index_search(
&mol,
cli.timeout,
cli.canonize,
cli.parallel,
cli.memoize,
cli.kernel,
boundlist,
);
// Print final output, depending on --verbose.
match (cli.verbose, states_searched) {
// Found the exact assembly index.
(true, Some(states_searched)) => {
println!("Assembly Index: {index}");
println!("Matches: {num_matches}");
println!("States Searched: {states_searched}");
}
(false, Some(_)) => {
println!("{index}");
}
// Search timed out and returned an upper bound.
(true, None) => {
println!("Assembly Index: <= {index} (timed out)");
println!("Matches: {num_matches}");
println!("States Searched: not computed on timeout");
}
(false, None) => {
println!("<= {index} (timed out)");
}
}
Ok(())
}