1use anyhow::{Result, bail};
2use ggetrs_ensembl::{lookup_symbol, types::LookupResponse};
3use reqwest::blocking::Client;
4use serde_json::json;
5
6use super::ResultSeqContainer;
7
8fn retrieve_sequence(ensembl_ids: &Vec<String>) -> Result<ResultSeqContainer> {
9 let ensembl_url = "http://rest.ensembl.org/sequence/id";
10 let data = json!({ "ids": ensembl_ids });
11 let response: ResultSeqContainer = Client::new()
12 .post(ensembl_url)
13 .header("content-type", "application/json")
14 .json(&data)
15 .send()?
16 .json::<ResultSeqContainer>()?;
17 Ok(response)
18}
19
20fn strip_symbols(symbols: &[String]) -> Vec<String> {
22 symbols
23 .iter()
24 .filter(|x| !x.starts_with("ENS"))
25 .map(std::string::ToString::to_string)
26 .collect()
27}
28
29fn validate_full_recovery(non_ensembl_ids: &[String], response: &LookupResponse) -> Result<()> {
31 for n in non_ensembl_ids {
32 if response.get_id(n).is_none() {
33 bail!(format!("Unable to find ensembl id for symbol {n}"));
34 }
35 }
36 Ok(())
37}
38
39fn recover_ensembl_ids(symbols: &[String], response: &LookupResponse) -> Vec<String> {
41 symbols
42 .iter()
43 .map(|x| {
44 if x.starts_with("ENS") {
45 x.to_owned()
46 } else {
47 response.get_id(x).unwrap() }
49 })
50 .collect()
51}
52
53fn convert_to_ensembl_ids(symbols: &[String], species: &str) -> Result<Vec<String>> {
55 let non_ensembl_ids = strip_symbols(symbols);
56 let response = lookup_symbol(&non_ensembl_ids, species)?;
57 validate_full_recovery(&non_ensembl_ids, &response)?;
58 Ok(recover_ensembl_ids(symbols, &response))
59}
60
61pub fn sequence(
62 search_terms: &Vec<String>,
63 species: &Option<String>,
64) -> Result<ResultSeqContainer> {
65 if search_terms.iter().all(|x| x.starts_with("ENS")) {
67 retrieve_sequence(search_terms)
68 } else {
69 let Some(species_name) = species else {
70 bail!(
71 "Not all provided symbols are Ensembl IDs - so a species must be provided to identify them"
72 );
73 };
74 let ensembl_ids = convert_to_ensembl_ids(search_terms, species_name)?;
75 retrieve_sequence(&ensembl_ids)
76 }
77}
78
79#[cfg(test)]
80mod testing {
81 use super::sequence;
82
83 #[test]
84 fn test_seq_query() {
85 let terms = vec!["ENSG00000131095".to_string()];
86 let response = sequence(&terms, &None);
87 assert!(response.is_ok());
88 }
89
90 #[test]
91 fn test_uniprot_nonsense_query() {
92 let terms = vec!["AOSDKAPOWDNASD".to_string()];
93 let response = sequence(&terms, &None);
94 assert!(response.is_err());
95 }
96
97 #[test]
98 fn test_seq_query_non_ensembl() {
99 let terms = vec!["AP2S1".to_string()];
100 let response = sequence(&terms, &Some("homo_sapiens".to_string()));
101 assert!(response.is_ok());
102 }
103
104 #[test]
105 fn test_seq_query_non_ensembl_missing_species() {
106 let terms = vec!["AP2S1".to_string()];
107 let response = sequence(&terms, &None);
108 assert!(response.is_err());
109 }
110
111 #[test]
112 fn test_seq_query_non_ensembl_random_gene_name() {
113 let terms = vec!["SOMERANDOMGENENAME".to_string()];
114 let response = sequence(&terms, &Some("homo_sapiens".to_string()));
115 assert!(response.is_err());
116 }
117
118 #[test]
119 fn test_seq_query_mixed_symbols() {
120 let terms = vec!["ENSG00000131095".to_string(), "AP2S1".to_string()];
121 let response = sequence(&terms, &Some("homo_sapiens".to_string()));
122 assert!(response.is_ok());
123 }
124}