ggetrs_seq/
functions.rs

1use anyhow::{Result, bail};
2use ggetrs_ensembl::{lookup_symbol, types::LookupResponse};
3use reqwest::blocking::Client;
4use serde_json::json;
5
6use super::ResultSeqContainer;
7
8fn retrieve_sequence(ensembl_ids: &Vec<String>) -> Result<ResultSeqContainer> {
9    let ensembl_url = "http://rest.ensembl.org/sequence/id";
10    let data = json!({ "ids": ensembl_ids });
11    let response: ResultSeqContainer = Client::new()
12        .post(ensembl_url)
13        .header("content-type", "application/json")
14        .json(&data)
15        .send()?
16        .json::<ResultSeqContainer>()?;
17    Ok(response)
18}
19
20/// recovers all non ensembl ids from list of symbols
21fn strip_symbols(symbols: &[String]) -> Vec<String> {
22    symbols
23        .iter()
24        .filter(|x| !x.starts_with("ENS"))
25        .map(std::string::ToString::to_string)
26        .collect()
27}
28
29/// Validates all non ensembl ids are found in lookup response
30fn validate_full_recovery(non_ensembl_ids: &[String], response: &LookupResponse) -> Result<()> {
31    for n in non_ensembl_ids {
32        if response.get_id(n).is_none() {
33            bail!(format!("Unable to find ensembl id for symbol {n}"));
34        }
35    }
36    Ok(())
37}
38
39/// recover ensembl ids from response
40fn recover_ensembl_ids(symbols: &[String], response: &LookupResponse) -> Vec<String> {
41    symbols
42        .iter()
43        .map(|x| {
44            if x.starts_with("ENS") {
45                x.to_owned()
46            } else {
47                response.get_id(x).unwrap() // unwrap okay because I validate before calling this
48            }
49        })
50        .collect()
51}
52
53/// convert any non-ensembl ids to ensembl ids
54fn convert_to_ensembl_ids(symbols: &[String], species: &str) -> Result<Vec<String>> {
55    let non_ensembl_ids = strip_symbols(symbols);
56    let response = lookup_symbol(&non_ensembl_ids, species)?;
57    validate_full_recovery(&non_ensembl_ids, &response)?;
58    Ok(recover_ensembl_ids(symbols, &response))
59}
60
61pub fn sequence(
62    search_terms: &Vec<String>,
63    species: &Option<String>,
64) -> Result<ResultSeqContainer> {
65    // case where not all search terms are ensembl ids
66    if search_terms.iter().all(|x| x.starts_with("ENS")) {
67        retrieve_sequence(search_terms)
68    } else {
69        let Some(species_name) = species else {
70            bail!(
71                "Not all provided symbols are Ensembl IDs - so a species must be provided to identify them"
72            );
73        };
74        let ensembl_ids = convert_to_ensembl_ids(search_terms, species_name)?;
75        retrieve_sequence(&ensembl_ids)
76    }
77}
78
79#[cfg(test)]
80mod testing {
81    use super::sequence;
82
83    #[test]
84    fn test_seq_query() {
85        let terms = vec!["ENSG00000131095".to_string()];
86        let response = sequence(&terms, &None);
87        assert!(response.is_ok());
88    }
89
90    #[test]
91    fn test_uniprot_nonsense_query() {
92        let terms = vec!["AOSDKAPOWDNASD".to_string()];
93        let response = sequence(&terms, &None);
94        assert!(response.is_err());
95    }
96
97    #[test]
98    fn test_seq_query_non_ensembl() {
99        let terms = vec!["AP2S1".to_string()];
100        let response = sequence(&terms, &Some("homo_sapiens".to_string()));
101        assert!(response.is_ok());
102    }
103
104    #[test]
105    fn test_seq_query_non_ensembl_missing_species() {
106        let terms = vec!["AP2S1".to_string()];
107        let response = sequence(&terms, &None);
108        assert!(response.is_err());
109    }
110
111    #[test]
112    fn test_seq_query_non_ensembl_random_gene_name() {
113        let terms = vec!["SOMERANDOMGENENAME".to_string()];
114        let response = sequence(&terms, &Some("homo_sapiens".to_string()));
115        assert!(response.is_err());
116    }
117
118    #[test]
119    fn test_seq_query_mixed_symbols() {
120        let terms = vec!["ENSG00000131095".to_string(), "AP2S1".to_string()];
121        let response = sequence(&terms, &Some("homo_sapiens".to_string()));
122        assert!(response.is_ok());
123    }
124}