gapsmith_find/types.rs
1//! Result types produced by the find pipeline.
2
3use serde::{Deserialize, Serialize};
4
5/// One row of the Reactions.tbl output, carrying both the reaction metadata
6/// (pathway, name, EC, keyrea, spont) and the best blast hit for the
7/// reaction (if any). Mirrors `src/analyse_alignments.R`'s `rxndt`.
8///
9/// Column order matches the golden output at
10/// `toy/ecoli-all-Reactions.tbl` so downstream tools (R's
11/// `generate_GSdraft.R`, `predict_medium.R`, etc.) parse it unchanged.
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ReactionHit {
14 pub pathway: String,
15 pub pathway_status: Option<PwyStatus>,
16 pub rxn: String,
17 pub name: String,
18 pub ec: String,
19 pub keyrea: bool,
20 pub spont: bool,
21
22 // Complex-related fields.
23 pub is_complex: bool,
24 pub subunit_count: u32,
25 pub subunits: String,
26 pub complex: Option<String>,
27 pub subunits_found: Option<u32>,
28 pub subunit_undefined_found: Option<bool>,
29 pub complex_status: Option<u8>,
30
31 /// The originating reference fasta path (e.g. `rev/1.1.1.1.fasta`) or
32 /// `None` if no fasta was found.
33 pub file: Option<String>,
34 /// Space-joined, sorted, deduplicated SEED reaction IDs associated with
35 /// this reaction via EC / MetaCyc-id / enzyme-name lookups. Matches
36 /// gapseq's `dbhit` column exactly (see `src/getDBhit.R`). Empty when
37 /// no SEED reaction matches.
38 pub dbhit: String,
39 /// True when `dbhit` is non-empty. Not written to output; handy for
40 /// downstream consumers.
41 pub has_dbhit: bool,
42
43 /// Source subdirectory (`rxn`, `rev`, `unrev`, `user`) derived from
44 /// `file`. Empty when no fasta was found.
45 pub src: String,
46 /// Reference-sequence type (`EC`, `metacyc`, `reaName`). Empty when no
47 /// fasta was found.
48 pub reftype: String,
49
50 pub qseqid: Option<String>,
51 pub pident: Option<f32>,
52 pub evalue: Option<f64>,
53 pub bitscore: Option<f32>,
54 pub qcov: Option<f32>,
55 pub stitle: Option<String>,
56 pub sstart: Option<i32>,
57 pub send: Option<i32>,
58 pub exception: bool,
59 pub status: HitStatus,
60}
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
63#[serde(rename_all = "snake_case")]
64pub enum HitStatus {
65 /// Hit passes bitscore + identity cutoffs (and exception-table cutoff
66 /// if applicable). Reaction is considered present.
67 GoodBlast,
68 /// Hit present but fails one of the cutoffs.
69 BadBlast,
70 /// Reference FASTA exists for this reaction but no hit was produced.
71 NoBlast,
72 /// No reference FASTA could be found for this reaction.
73 NoSeqData,
74 /// Reaction is marked as spontaneous; no hit and no sequence data.
75 Spontaneous,
76}
77
78impl HitStatus {
79 pub fn as_str(self) -> &'static str {
80 match self {
81 HitStatus::GoodBlast => "good_blast",
82 HitStatus::BadBlast => "bad_blast",
83 HitStatus::NoBlast => "no_blast",
84 HitStatus::NoSeqData => "no_seq_data",
85 HitStatus::Spontaneous => "spontaneous",
86 }
87 }
88}
89
90/// High-level presence tag attached to every predicted-present pathway.
91/// See `src/analyse_alignments.R:180-189`.
92#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
93#[serde(rename_all = "lowercase")]
94pub enum PwyStatus {
95 /// Completeness exactly 100%.
96 Full,
97 /// Predicted present via the lower (`completenessCutoff`) threshold,
98 /// with all key reactions present.
99 Threshold,
100 /// Predicted present via key-enzyme hint below the main threshold.
101 Keyenzyme,
102}
103
104impl PwyStatus {
105 pub fn as_str(self) -> &'static str {
106 match self {
107 PwyStatus::Full => "full",
108 PwyStatus::Threshold => "threshold",
109 PwyStatus::Keyenzyme => "keyenzyme",
110 }
111 }
112}
113
114/// One row of the Pathways.tbl output.
115#[derive(Debug, Clone, Serialize, Deserialize)]
116pub struct PathwayResult {
117 pub id: String,
118 pub name: String,
119 pub prediction: bool,
120 pub completeness: f64,
121 pub status: Option<PwyStatus>,
122 pub n_reaction: u32,
123 pub n_spontaneous: u32,
124 pub n_vague: u32,
125 pub n_key_reaction: u32,
126 pub n_reaction_found: u32,
127 pub n_key_reaction_found: u32,
128 pub reactions_found: Vec<String>,
129 pub spontaneous_reactions: Vec<String>,
130 pub key_reactions: Vec<String>,
131}