ggetrs_string/
cli.rs

1use crate::{
2    StringNetworkType,
3    utils::{OutputFormat, match_output},
4};
5use anyhow::Result;
6use bon::{Builder, builder};
7use clap::{Parser, Subcommand};
8use serde_json::{Value, json};
9use std::io::Write;
10
11#[derive(Subcommand)]
12pub enum ModString {
13    /// Maps common protein names, synonyms and UniProt identifiers into STRING identifiers
14    MapIds {
15        #[clap(flatten)]
16        args: StringMappingArgs,
17
18        #[clap(flatten)]
19        output: OutputArgs,
20    },
21    /// Retrieves the network interactions for your input protein(s) in various text based formats
22    Network {
23        #[clap(flatten)]
24        args: StringNetworkArgs,
25
26        #[clap(flatten)]
27        output: OutputArgs,
28    },
29    /// Retrieve the protein similarity scores between the input proteins
30    Homology {
31        #[clap(flatten)]
32        args: StringHomologyArgs,
33
34        #[clap(flatten)]
35        output: OutputArgs,
36    },
37    /// Gets all the STRING interaction partners of your proteins
38    Interactions {
39        #[clap(flatten)]
40        args: StringInteractionsArgs,
41
42        #[clap(flatten)]
43        output: OutputArgs,
44    },
45    /// Performs the enrichment analysis of your set of proteins for the Gene Ontology, KEGG pathways, UniProt Keywords, PubMed publications, Pfam, InterPro and SMART domains.
46    Enrichment {
47        #[clap(flatten)]
48        args: StringFunctionalEnrichmentArgs,
49
50        #[clap(flatten)]
51        output: OutputArgs,
52    },
53    /// Gets the functional annotation (Gene Ontology, UniProt Keywords, PFAM, INTERPRO and SMART domains) of your list of proteins.
54    Annotations {
55        #[clap(flatten)]
56        args: StringFunctionalAnnotationArgs,
57
58        #[clap(flatten)]
59        output: OutputArgs,
60    },
61    /// Tests if your network has more interactions than expected
62    PpiEnrichment {
63        #[clap(flatten)]
64        args: StringPpiEnrichmentArgs,
65
66        #[clap(flatten)]
67        output: OutputArgs,
68    },
69}
70
71#[derive(Debug, Clone, Parser)]
72#[clap(next_help_heading = "Output Arguments")]
73pub struct OutputArgs {
74    /// Optional filepath to write output to [default=stdout]
75    #[clap(short, long)]
76    pub output: Option<String>,
77
78    /// The output format to use
79    #[clap(short, long, default_value = "tsv")]
80    pub format: OutputFormat,
81}
82impl OutputArgs {
83    pub fn get_writer(&self) -> Result<Box<dyn Write>> {
84        match_output(self.output.clone())
85    }
86}
87
88#[derive(Debug, Clone, Parser, Builder)]
89#[clap(next_help_heading = "STRING Network Arguments")]
90/// Retrieves the network interactions for your input protein(s) in various text based formats
91pub struct StringNetworkArgs {
92    /// List of genes to retrieve network for
93    #[clap(required = true)]
94    pub identifiers: Vec<String>,
95
96    /// Species to retrieve network for (NCBI taxonomy ID)
97    #[clap(short, long, default_value = "9606")]
98    #[builder(default = 9606)]
99    pub species: usize,
100
101    /// threshold of significance to include a interaction, a number between 0 and 1000 (default depends on the network)
102    #[clap(short, long)]
103    pub required_score: Option<f64>,
104
105    /// The type of network to retrieve
106    #[clap(short, long, default_value = "functional")]
107    #[builder(default = StringNetworkType::Functional)]
108    pub network_type: StringNetworkType,
109
110    /// adds a number of proteins with to the network based on their confidence score
111    #[clap(short, long)]
112    pub add_nodes: Option<usize>,
113
114    /// when available use submitted names in the preferredName column when true
115    #[clap(short = 'q', long)]
116    #[builder(default = false)]
117    pub show_query_node_labels: bool,
118
119    /// identifier of the caller to provide to the server
120    #[clap(short, long, default_value = "ggetrs")]
121    #[builder(default = "ggetrs".to_string())]
122    pub caller_identity: String,
123}
124impl StringNetworkArgs {
125    #[must_use]
126    pub fn build_post(&self) -> Value {
127        let mut data = json!({
128            "identifiers": self.identifiers.join("%0d"),
129            "species": self.species,
130            "network_type": self.network_type.to_string(),
131            "caller_identity": self.caller_identity,
132        });
133        data["show_query_node_labels"] = if self.show_query_node_labels {
134            json!(1)
135        } else {
136            json!(0)
137        };
138        if let Some(score) = self.required_score {
139            data["required_score"] = json!(score);
140        }
141        if let Some(nodes) = self.add_nodes {
142            data["add_nodes"] = json!(nodes);
143        }
144        data
145    }
146}
147
148#[derive(Debug, Clone, Parser, Builder)]
149#[clap(next_help_heading = "STRING Homology Arguments")]
150pub struct StringHomologyArgs {
151    /// List of genes to retrieve network for
152    #[clap(required = true)]
153    pub identifiers: Vec<String>,
154
155    /// Species to retrieve network for (NCBI taxonomy ID)
156    #[clap(short, long, default_value = "9606")]
157    #[builder(default = 9606)]
158    pub species: usize,
159
160    /// identifier of the caller to provide to the server
161    #[clap(short, long, default_value = "ggetrs")]
162    #[builder(default = "ggetrs".to_string())]
163    pub caller_identity: String,
164}
165impl StringHomologyArgs {
166    #[must_use]
167    pub fn build_post(&self) -> Value {
168        json!({
169            "identifiers": self.identifiers.join("%0d"),
170            "species": self.species,
171            "caller_identity": self.caller_identity,
172        })
173    }
174}
175
176#[derive(Debug, Clone, Parser, Builder)]
177#[clap(next_help_heading = "STRING Mapping Identifiers Arguments")]
178pub struct StringMappingArgs {
179    /// List of genes to retrieve network for
180    #[clap(required = true)]
181    pub identifiers: Vec<String>,
182
183    /// insert column with your input identifier
184    #[clap(short, long)]
185    #[builder(default = false)]
186    pub echo_query: bool,
187
188    /// limits the number of matches per query identifier (best matches come first)
189    #[clap(short, long)]
190    pub limit: Option<usize>,
191
192    /// Species to retrieve network for (NCBI taxonomy ID)
193    #[clap(short, long, default_value = "9606")]
194    #[builder(default = 9606)]
195    pub species: usize,
196
197    /// identifier of the caller to provide to the server
198    #[clap(short, long, default_value = "ggetrs")]
199    #[builder(default = "ggetrs".to_string())]
200    pub caller_identity: String,
201}
202impl StringMappingArgs {
203    #[must_use]
204    pub fn build_post(&self) -> Value {
205        let mut data = json!({
206            "identifiers": self.identifiers.join("%0d"),
207            "echo_query": self.echo_query,
208            "species": self.species,
209            "caller_identity": self.caller_identity,
210        });
211        if let Some(limit) = self.limit {
212            data["limit"] = json!(limit);
213        }
214        data
215    }
216}
217
218/// Gets all the STRING interaction partners of your proteins
219#[derive(Debug, Clone, Parser, Builder)]
220#[clap(next_help_heading = "STRING Interactions Arguments")]
221pub struct StringInteractionsArgs {
222    /// List of genes to retrieve network for
223    #[clap(required = true)]
224    pub identifiers: Vec<String>,
225
226    /// Species to retrieve network for (NCBI taxonomy ID)
227    #[clap(short, long, default_value = "9606")]
228    #[builder(default = 9606)]
229    pub species: usize,
230
231    /// limits the number of interaction partners retrieved per protein (most confident interactions come first)
232    /// default set by the server (usually 10)
233    #[clap(short, long)]
234    pub limit: Option<usize>,
235
236    /// threshold of significance to include a interaction, a number between 0 and 1000 (default depends on the network)
237    #[clap(short, long)]
238    pub required_score: Option<f64>,
239
240    /// The type of network to retrieve
241    #[clap(short, long, default_value = "functional")]
242    #[builder(default = StringNetworkType::Functional)]
243    pub network_type: StringNetworkType,
244
245    /// identifier of the caller to provide to the server
246    #[clap(short, long, default_value = "ggetrs")]
247    #[builder(default = "ggetrs".to_string())]
248    pub caller_identity: String,
249}
250impl StringInteractionsArgs {
251    #[must_use]
252    pub fn build_post(&self) -> Value {
253        let mut data = json!({
254            "identifiers": self.identifiers.join("%0d"),
255            "species": self.species,
256            "network_type": self.network_type.to_string(),
257            "caller_identity": self.caller_identity,
258        });
259        if let Some(limit) = self.limit {
260            data["limit"] = json!(limit);
261        }
262        if let Some(score) = self.required_score {
263            data["required_score"] = json!(score);
264        }
265        data
266    }
267}
268
269/// Performs the enrichment analysis of your set of proteins for the Gene Ontology, KEGG pathways, UniProt Keywords, PubMed publications, Pfam, InterPro and SMART domains.
270#[derive(Debug, Clone, Parser, Builder)]
271#[clap(next_help_heading = "STRING Functional Enrichment Arguments")]
272pub struct StringFunctionalEnrichmentArgs {
273    /// List of genes to retrieve network for
274    #[clap(required = true)]
275    pub identifiers: Vec<String>,
276
277    /// Background list of genes to compare against (Only STRING identifiers are accepted)
278    /// If not provided, the whole genome of the species is used
279    #[clap(short, long)]
280    pub background: Option<Vec<String>>,
281
282    /// Species to retrieve network for (NCBI taxonomy ID)
283    #[clap(short, long, default_value = "9606")]
284    #[builder(default = 9606)]
285    pub species: usize,
286
287    /// identifier of the caller to provide to the server
288    #[clap(short, long, default_value = "ggetrs")]
289    #[builder(default = "ggetrs".to_string())]
290    pub caller_identity: String,
291}
292impl StringFunctionalEnrichmentArgs {
293    #[must_use]
294    pub fn build_post(&self) -> Value {
295        let mut data = json!({
296            "identifiers": self.identifiers.join("%0d"),
297            "species": self.species,
298            "caller_identity": self.caller_identity,
299        });
300        if let Some(background) = &self.background {
301            data["background"] = json!(background.join("%0d"));
302        }
303        data
304    }
305}
306
307/// Gets the functional annotation (Gene Ontology, UniProt Keywords, PFAM, INTERPRO and SMART domains) of your list of proteins.
308#[derive(Debug, Clone, Parser, Builder)]
309#[clap(next_help_heading = "STRING Functional Annotations Arguments")]
310pub struct StringFunctionalAnnotationArgs {
311    /// List of genes to retrieve network for
312    #[clap(required = true)]
313    pub identifiers: Vec<String>,
314
315    /// Species to retrieve network for (NCBI taxonomy ID)
316    #[clap(short, long, default_value = "9606")]
317    #[builder(default = 9606)]
318    pub species: usize,
319
320    /// Return PubMed annotations in addition to other categories
321    #[clap(short = 'p', long)]
322    #[builder(default = false)]
323    pub allow_pubmed: bool,
324
325    /// Only return PubMed annotations
326    #[clap(short = 'P', long)]
327    #[builder(default = false)]
328    pub only_pubmed: bool,
329
330    /// identifier of the caller to provide to the server
331    #[clap(short, long, default_value = "ggetrs")]
332    #[builder(default = "ggetrs".to_string())]
333    pub caller_identity: String,
334}
335impl StringFunctionalAnnotationArgs {
336    #[must_use]
337    pub fn build_post(&self) -> Value {
338        json!({
339            "identifiers": self.identifiers.join("%0d"),
340            "species": self.species,
341            "caller_identity": self.caller_identity,
342            "allow_pubmed": self.allow_pubmed,
343            "only_pubmed": self.only_pubmed,
344        })
345    }
346}
347
348/// Tests if your network has more interactions than expected
349#[derive(Debug, Clone, Parser, Builder)]
350#[clap(next_help_heading = "STRING PPI Enrichment Arguments")]
351pub struct StringPpiEnrichmentArgs {
352    /// List of genes to retrieve network for
353    #[clap(required = true)]
354    pub identifiers: Vec<String>,
355
356    /// Species to retrieve network for (NCBI taxonomy ID)
357    #[clap(short, long, default_value = "9606")]
358    #[builder(default = 9606)]
359    pub species: usize,
360
361    /// threshold of significance to include a interaction, a number between 0 and 1000 (default depends on the network)
362    #[clap(short, long)]
363    pub required_score: Option<f64>,
364
365    /// using this parameter you can specify the background proteome of your experiment. Only STRING identifiers will be recognised (each must be seperated by "%0d") e.g. '7227.FBpp0077451%0d7227.FBpp0074373'. You can map STRING identifiers using mapping identifiers method.
366    #[clap(short, long)]
367    pub background: Option<Vec<String>>,
368
369    /// identifier of the caller to provide to the server
370    #[clap(short, long, default_value = "ggetrs")]
371    #[builder(default = "ggetrs".to_string())]
372    pub caller_identity: String,
373}
374impl StringPpiEnrichmentArgs {
375    #[must_use]
376    pub fn build_post(&self) -> Value {
377        let mut data = json!({
378            "identifiers": self.identifiers.join("%0d"),
379            "species": self.species,
380            "caller_identity": self.caller_identity,
381        });
382        if let Some(score) = self.required_score {
383            data["required_score"] = json!(score);
384        }
385        if let Some(background) = &self.background {
386            data["background"] = json!(background.join("%0d"));
387        }
388        data
389    }
390}