gapsmith_find/lib.rs
1//! Pathway / reaction finder.
2//!
3//! Mirrors the R reference implementation spread across
4//! `src/prepare_batch_alignments.R`, `src/analyse_alignments.R`, and
5//! `src/gapseq_find.sh`. The pipeline:
6//!
7//! 1. [`pathways::select`] picks a subset of [`gapsmith_db::PathwayRow`] to
8//! evaluate, based on a user keyword or pattern.
9//! 2. [`seqfile::resolve_for_reaction`] walks the `dat/seq/` tree to find
10//! the reference FASTA(s) for each reaction.
11//! 3. [`runner::run_find`] builds one concatenated `query.faa`, runs the
12//! alignment (via [`gapsmith_align::Aligner`]) against the input genome,
13//! classifies every hit, and aggregates per-pathway completeness.
14//! 4. [`output`] emits `*-Reactions.tbl` and `*-Pathways.tbl` in gapseq's
15//! column order.
16//!
17//! Complex / subunit detection (`src/complex_detection.R`) lives in
18//! [`complex`]; it has point-by-point R-parity on 9 handcrafted cases
19//! (greek / latin numerals, size-dict synonyms, coverage edges). See
20//! `crates/gapsmith-find/tests/complex_parity.rs`.
21//!
22//! # End-to-end parity
23//!
24//! [`runner::run_find`] produces byte-identical `*-Pathways.tbl` output
25//! against real gapseq on two test cases (`-p PWY-6587` and `-p amino` on
26//! `toy/ecore.faa`). See `crates/gapsmith-find/tests/pipeline_parity.rs`.
27
28pub mod classify;
29pub mod complex;
30pub mod dbhit;
31pub mod output;
32pub mod pathways;
33pub mod runner;
34pub mod seqfile;
35pub mod taxonomy;
36pub mod types;
37
38pub use classify::{classify_hits, ClassifyOptions};
39pub use output::{write_pathways_tbl, write_reactions_tbl};
40pub use pathways::{select, ExpandedReaction, PathwaySelectOptions};
41pub use runner::{run_find, FindError, FindOptions, FindReport};
42pub use seqfile::{resolve_for_reaction, ResolvedSeq, SeqfileOptions};
43pub use types::{HitStatus, PathwayResult, PwyStatus, ReactionHit};