Skip to main content

gapsmith_find/
types.rs

1//! Result types produced by the find pipeline.
2
3use serde::{Deserialize, Serialize};
4
5/// One row of the Reactions.tbl output, carrying both the reaction metadata
6/// (pathway, name, EC, keyrea, spont) and the best blast hit for the
7/// reaction (if any). Mirrors `src/analyse_alignments.R`'s `rxndt`.
8///
9/// Column order matches the golden output at
10/// `toy/ecoli-all-Reactions.tbl` so downstream tools (R's
11/// `generate_GSdraft.R`, `predict_medium.R`, etc.) parse it unchanged.
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ReactionHit {
14    pub pathway: String,
15    pub pathway_status: Option<PwyStatus>,
16    pub rxn: String,
17    pub name: String,
18    pub ec: String,
19    pub keyrea: bool,
20    pub spont: bool,
21
22    // Complex-related fields.
23    pub is_complex: bool,
24    pub subunit_count: u32,
25    pub subunits: String,
26    pub complex: Option<String>,
27    pub subunits_found: Option<u32>,
28    pub subunit_undefined_found: Option<bool>,
29    pub complex_status: Option<u8>,
30
31    /// The originating reference fasta path (e.g. `rev/1.1.1.1.fasta`) or
32    /// `None` if no fasta was found.
33    pub file: Option<String>,
34    /// Space-joined, sorted, deduplicated SEED reaction IDs associated with
35    /// this reaction via EC / MetaCyc-id / enzyme-name lookups. Matches
36    /// gapseq's `dbhit` column exactly (see `src/getDBhit.R`). Empty when
37    /// no SEED reaction matches.
38    pub dbhit: String,
39    /// True when `dbhit` is non-empty. Not written to output; handy for
40    /// downstream consumers.
41    pub has_dbhit: bool,
42
43    /// Source subdirectory (`rxn`, `rev`, `unrev`, `user`) derived from
44    /// `file`. Empty when no fasta was found.
45    pub src: String,
46    /// Reference-sequence type (`EC`, `metacyc`, `reaName`). Empty when no
47    /// fasta was found.
48    pub reftype: String,
49
50    pub qseqid: Option<String>,
51    pub pident: Option<f32>,
52    pub evalue: Option<f64>,
53    pub bitscore: Option<f32>,
54    pub qcov: Option<f32>,
55    pub stitle: Option<String>,
56    pub sstart: Option<i32>,
57    pub send: Option<i32>,
58    pub exception: bool,
59    pub status: HitStatus,
60}
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
63#[serde(rename_all = "snake_case")]
64pub enum HitStatus {
65    /// Hit passes bitscore + identity cutoffs (and exception-table cutoff
66    /// if applicable). Reaction is considered present.
67    GoodBlast,
68    /// Hit present but fails one of the cutoffs.
69    BadBlast,
70    /// Reference FASTA exists for this reaction but no hit was produced.
71    NoBlast,
72    /// No reference FASTA could be found for this reaction.
73    NoSeqData,
74    /// Reaction is marked as spontaneous; no hit and no sequence data.
75    Spontaneous,
76}
77
78impl HitStatus {
79    pub fn as_str(self) -> &'static str {
80        match self {
81            HitStatus::GoodBlast => "good_blast",
82            HitStatus::BadBlast => "bad_blast",
83            HitStatus::NoBlast => "no_blast",
84            HitStatus::NoSeqData => "no_seq_data",
85            HitStatus::Spontaneous => "spontaneous",
86        }
87    }
88}
89
90/// High-level presence tag attached to every predicted-present pathway.
91/// See `src/analyse_alignments.R:180-189`.
92#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
93#[serde(rename_all = "lowercase")]
94pub enum PwyStatus {
95    /// Completeness exactly 100%.
96    Full,
97    /// Predicted present via the lower (`completenessCutoff`) threshold,
98    /// with all key reactions present.
99    Threshold,
100    /// Predicted present via key-enzyme hint below the main threshold.
101    Keyenzyme,
102}
103
104impl PwyStatus {
105    pub fn as_str(self) -> &'static str {
106        match self {
107            PwyStatus::Full => "full",
108            PwyStatus::Threshold => "threshold",
109            PwyStatus::Keyenzyme => "keyenzyme",
110        }
111    }
112}
113
114/// One row of the Pathways.tbl output.
115#[derive(Debug, Clone, Serialize, Deserialize)]
116pub struct PathwayResult {
117    pub id: String,
118    pub name: String,
119    pub prediction: bool,
120    pub completeness: f64,
121    pub status: Option<PwyStatus>,
122    pub n_reaction: u32,
123    pub n_spontaneous: u32,
124    pub n_vague: u32,
125    pub n_key_reaction: u32,
126    pub n_reaction_found: u32,
127    pub n_key_reaction_found: u32,
128    pub reactions_found: Vec<String>,
129    pub spontaneous_reactions: Vec<String>,
130    pub key_reactions: Vec<String>,
131}