1use std::collections::{HashMap, HashSet};
2
3use rusqlite::{Result, Row};
4use thiserror::Error;
5
6use crate::{db::GraphConnection, traits::*};
7
8#[derive(Debug, Clone)]
9pub struct ReferenceAlias {
10 pub reference_name: String,
11 pub refseq_accession_id: Option<String>,
12 pub genbank_accession_id: Option<String>,
13 pub ucsc_id: Option<String>,
14 pub ensembl_id: Option<String>,
15 pub custom_id: Option<String>,
16 pub chromosome: Option<i64>,
17}
18
19#[derive(Debug, Error)]
20pub enum ReferenceAliasError {
21 #[error("Database error: {0}")]
22 DatabaseError(#[from] rusqlite::Error),
23}
24
25impl Query for ReferenceAlias {
26 type Model = ReferenceAlias;
27
28 const TABLE_NAME: &'static str = "reference_aliases";
29
30 fn process_row(row: &Row) -> Self::Model {
31 ReferenceAlias {
32 reference_name: row.get(0).unwrap(),
33 refseq_accession_id: row.get(1).unwrap(),
34 genbank_accession_id: row.get(2).unwrap(),
35 ucsc_id: row.get(3).unwrap(),
36 ensembl_id: row.get(4).unwrap(),
37 custom_id: row.get(5).unwrap(),
38 chromosome: row.get(6).unwrap(),
39 }
40 }
41}
42
43impl ReferenceAlias {
44 #[allow(clippy::too_many_arguments)]
45 pub fn create(
46 conn: &GraphConnection,
47 reference_name: &str,
48 refseq_accession_id: Option<String>,
49 genbank_accession_id: Option<String>,
50 ucsc_id: Option<String>,
51 ensembl_id: Option<String>,
52 custom_id: Option<String>,
53 chromosome: Option<i64>,
54 ) -> rusqlite::Result<ReferenceAlias, ReferenceAliasError> {
55 conn.execute(
56 "INSERT INTO reference_aliases (reference_name, refseq_accession_id, genbank_accession_id, ucsc_id, ensembl_id, custom_id, chromosome) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
57 rusqlite::params![reference_name, refseq_accession_id, genbank_accession_id, ucsc_id, ensembl_id, custom_id, chromosome],
58 )?;
59
60 Ok(ReferenceAlias {
61 reference_name: reference_name.to_string(),
62 refseq_accession_id,
63 genbank_accession_id,
64 ucsc_id,
65 ensembl_id,
66 custom_id,
67 chromosome,
68 })
69 }
70
71 fn chromosome_aliases(alias_id: &str) -> Vec<String> {
72 vec![
73 format!("chr{}", alias_id),
74 format!("Chr{}", alias_id),
75 format!("chrom{}", alias_id),
76 format!("Chrom{}", alias_id),
77 format!("chromosome{}", alias_id),
78 format!("Chromosome{}", alias_id),
79 ]
80 }
81
82 fn compute_aliases(reference_alias: ReferenceAlias) -> HashSet<String> {
83 let mut aliases = HashSet::new();
84 if let Some(refseq_id) = reference_alias.refseq_accession_id {
85 aliases.insert(refseq_id.clone());
86 aliases.insert(format!("ref|{}|", refseq_id));
87 if refseq_id.contains('.') {
88 let refseq_without_version = refseq_id.split('.').next().unwrap();
89 aliases.insert(refseq_without_version.to_string());
90 aliases.insert(format!("ref|{}|", refseq_without_version));
91 }
92 }
93 if let Some(genbank_id) = reference_alias.genbank_accession_id.clone() {
94 aliases.insert(genbank_id.clone());
95 if genbank_id.contains('.') {
96 let genbank_without_version = genbank_id.split('.').next().unwrap();
97 aliases.insert(genbank_without_version.to_string());
98 }
99 }
100 if let Some(ucsc_id) = reference_alias.ucsc_id.clone() {
101 aliases.insert(ucsc_id.clone());
102 }
103 if let Some(ensembl_id) = reference_alias.ensembl_id.clone() {
104 aliases.insert(ensembl_id.clone());
105 aliases.extend(ReferenceAlias::chromosome_aliases(&ensembl_id));
106 }
107 if let Some(custom_id) = reference_alias.custom_id {
108 aliases.insert(custom_id.clone());
109 aliases.extend(ReferenceAlias::chromosome_aliases(&custom_id));
110 }
111 if let Some(chromosome) = reference_alias.chromosome {
112 aliases.insert(chromosome.to_string());
113 aliases.extend(ReferenceAlias::chromosome_aliases(&chromosome.to_string()));
114 }
115 aliases
116 }
117
118 pub fn get_references_by_alias(
119 conn: &GraphConnection,
120 references: Vec<String>,
121 ) -> Result<HashMap<String, String>, ReferenceAliasError> {
122 let mut references_by_alias = HashMap::new();
123 let reference_aliases = ReferenceAlias::all(conn);
124 for reference_alias in reference_aliases {
125 let aliases = ReferenceAlias::compute_aliases(reference_alias);
126 for reference in &references {
127 if aliases.contains(reference) {
128 for alias in &aliases {
129 references_by_alias.insert(alias.clone(), reference.to_string());
130 }
131 }
132 }
133 }
134 Ok(references_by_alias)
135 }
136}
137
138#[cfg(test)]
139mod tests {
140 use super::*;
141 use crate::test_helpers::get_connection;
142
143 #[test]
144 fn test_create() {
145 let conn = &mut get_connection(None).unwrap();
146 ReferenceAlias::create(
147 conn,
148 "Test Reference",
149 Some("REFSEQ123".to_string()),
150 Some("GENBANK123".to_string()),
151 Some("UCSC123".to_string()),
152 Some("ENSEMBL123".to_string()),
153 Some("CUSTOM123".to_string()),
154 Some(1),
155 )
156 .unwrap();
157 let new_entry = conn
158 .query_row(
159 "SELECT reference_name, refseq_accession_id, genbank_accession_id, ucsc_id, ensembl_id, custom_id, chromosome FROM reference_aliases WHERE reference_name = ?1",
160 rusqlite::params!["Test Reference"],
161 |row| {
162 Ok(ReferenceAlias {
163 reference_name: row.get(0)?,
164 refseq_accession_id: row.get(1)?,
165 genbank_accession_id: row.get(2)?,
166 ucsc_id: row.get(3)?,
167 ensembl_id: row.get(4)?,
168 custom_id: row.get(5)?,
169 chromosome: row.get(6)?,
170 })
171 },
172 )
173 .unwrap();
174 assert_eq!(new_entry.reference_name, "Test Reference");
175 assert_eq!(new_entry.refseq_accession_id, Some("REFSEQ123".to_string()));
176 }
177
178 #[test]
179 fn test_prepopulated_aliases() {
180 let conn = &mut get_connection(None).unwrap();
181 let reference_aliases = ReferenceAlias::all(conn);
182 assert_eq!(reference_aliases.len(), 107);
183 let first_e_coli_reference = reference_aliases
184 .iter()
185 .find(|alias| alias.genbank_accession_id == Some("U00096.3".to_string()))
186 .unwrap();
187 let aliases = ReferenceAlias::compute_aliases(first_e_coli_reference.clone());
188 assert!(aliases.contains("NC_000913.3"));
189 assert!(aliases.contains("NC_000913"));
190 assert!(aliases.contains("ref|NC_000913|"));
191 assert!(aliases.contains("U00096.3"));
192 assert!(aliases.contains("U00096"));
193
194 let first_yeast_reference = reference_aliases
195 .iter()
196 .find(|alias| alias.genbank_accession_id == Some("BK006935.2".to_string()))
197 .unwrap();
198 let aliases = ReferenceAlias::compute_aliases(first_yeast_reference.clone());
199 assert!(aliases.contains("BK006935.2"));
200 assert!(aliases.contains("BK006935"));
201 assert!(aliases.contains("NC_001133.9"));
202 assert!(aliases.contains("NC_001133"));
203 assert!(aliases.contains("ref|NC_001133|"));
204 assert!(aliases.contains("chrI"));
205 assert!(aliases.contains("chr1"));
206 }
207}