Skip to main content

gen_models/
reference_alias.rs

1use std::collections::{HashMap, HashSet};
2
3use rusqlite::{Result, Row};
4use thiserror::Error;
5
6use crate::{db::GraphConnection, traits::*};
7
8#[derive(Debug, Clone)]
9pub struct ReferenceAlias {
10    pub reference_name: String,
11    pub refseq_accession_id: Option<String>,
12    pub genbank_accession_id: Option<String>,
13    pub ucsc_id: Option<String>,
14    pub ensembl_id: Option<String>,
15    pub custom_id: Option<String>,
16    pub chromosome: Option<i64>,
17}
18
19#[derive(Debug, Error)]
20pub enum ReferenceAliasError {
21    #[error("Database error: {0}")]
22    DatabaseError(#[from] rusqlite::Error),
23}
24
25impl Query for ReferenceAlias {
26    type Model = ReferenceAlias;
27
28    const TABLE_NAME: &'static str = "reference_aliases";
29
30    fn process_row(row: &Row) -> Self::Model {
31        ReferenceAlias {
32            reference_name: row.get(0).unwrap(),
33            refseq_accession_id: row.get(1).unwrap(),
34            genbank_accession_id: row.get(2).unwrap(),
35            ucsc_id: row.get(3).unwrap(),
36            ensembl_id: row.get(4).unwrap(),
37            custom_id: row.get(5).unwrap(),
38            chromosome: row.get(6).unwrap(),
39        }
40    }
41}
42
43impl ReferenceAlias {
44    #[allow(clippy::too_many_arguments)]
45    pub fn create(
46        conn: &GraphConnection,
47        reference_name: &str,
48        refseq_accession_id: Option<String>,
49        genbank_accession_id: Option<String>,
50        ucsc_id: Option<String>,
51        ensembl_id: Option<String>,
52        custom_id: Option<String>,
53        chromosome: Option<i64>,
54    ) -> rusqlite::Result<ReferenceAlias, ReferenceAliasError> {
55        conn.execute(
56            "INSERT INTO reference_aliases (reference_name, refseq_accession_id, genbank_accession_id, ucsc_id, ensembl_id, custom_id, chromosome) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
57            rusqlite::params![reference_name, refseq_accession_id, genbank_accession_id, ucsc_id, ensembl_id, custom_id, chromosome],
58        )?;
59
60        Ok(ReferenceAlias {
61            reference_name: reference_name.to_string(),
62            refseq_accession_id,
63            genbank_accession_id,
64            ucsc_id,
65            ensembl_id,
66            custom_id,
67            chromosome,
68        })
69    }
70
71    fn chromosome_aliases(alias_id: &str) -> Vec<String> {
72        vec![
73            format!("chr{}", alias_id),
74            format!("Chr{}", alias_id),
75            format!("chrom{}", alias_id),
76            format!("Chrom{}", alias_id),
77            format!("chromosome{}", alias_id),
78            format!("Chromosome{}", alias_id),
79        ]
80    }
81
82    fn compute_aliases(reference_alias: ReferenceAlias) -> HashSet<String> {
83        let mut aliases = HashSet::new();
84        if let Some(refseq_id) = reference_alias.refseq_accession_id {
85            aliases.insert(refseq_id.clone());
86            aliases.insert(format!("ref|{}|", refseq_id));
87            if refseq_id.contains('.') {
88                let refseq_without_version = refseq_id.split('.').next().unwrap();
89                aliases.insert(refseq_without_version.to_string());
90                aliases.insert(format!("ref|{}|", refseq_without_version));
91            }
92        }
93        if let Some(genbank_id) = reference_alias.genbank_accession_id.clone() {
94            aliases.insert(genbank_id.clone());
95            if genbank_id.contains('.') {
96                let genbank_without_version = genbank_id.split('.').next().unwrap();
97                aliases.insert(genbank_without_version.to_string());
98            }
99        }
100        if let Some(ucsc_id) = reference_alias.ucsc_id.clone() {
101            aliases.insert(ucsc_id.clone());
102        }
103        if let Some(ensembl_id) = reference_alias.ensembl_id.clone() {
104            aliases.insert(ensembl_id.clone());
105            aliases.extend(ReferenceAlias::chromosome_aliases(&ensembl_id));
106        }
107        if let Some(custom_id) = reference_alias.custom_id {
108            aliases.insert(custom_id.clone());
109            aliases.extend(ReferenceAlias::chromosome_aliases(&custom_id));
110        }
111        if let Some(chromosome) = reference_alias.chromosome {
112            aliases.insert(chromosome.to_string());
113            aliases.extend(ReferenceAlias::chromosome_aliases(&chromosome.to_string()));
114        }
115        aliases
116    }
117
118    pub fn get_references_by_alias(
119        conn: &GraphConnection,
120        references: Vec<String>,
121    ) -> Result<HashMap<String, String>, ReferenceAliasError> {
122        let mut references_by_alias = HashMap::new();
123        let reference_aliases = ReferenceAlias::all(conn);
124        for reference_alias in reference_aliases {
125            let aliases = ReferenceAlias::compute_aliases(reference_alias);
126            for reference in &references {
127                if aliases.contains(reference) {
128                    for alias in &aliases {
129                        references_by_alias.insert(alias.clone(), reference.to_string());
130                    }
131                }
132            }
133        }
134        Ok(references_by_alias)
135    }
136}
137
138#[cfg(test)]
139mod tests {
140    use super::*;
141    use crate::test_helpers::get_connection;
142
143    #[test]
144    fn test_create() {
145        let conn = &mut get_connection(None).unwrap();
146        ReferenceAlias::create(
147            conn,
148            "Test Reference",
149            Some("REFSEQ123".to_string()),
150            Some("GENBANK123".to_string()),
151            Some("UCSC123".to_string()),
152            Some("ENSEMBL123".to_string()),
153            Some("CUSTOM123".to_string()),
154            Some(1),
155        )
156        .unwrap();
157        let new_entry = conn
158	    .query_row(
159		"SELECT reference_name, refseq_accession_id, genbank_accession_id, ucsc_id, ensembl_id, custom_id, chromosome FROM reference_aliases WHERE reference_name = ?1",
160		rusqlite::params!["Test Reference"],
161		|row| {
162		    Ok(ReferenceAlias {
163			reference_name: row.get(0)?,
164			refseq_accession_id: row.get(1)?,
165			genbank_accession_id: row.get(2)?,
166			ucsc_id: row.get(3)?,
167			ensembl_id: row.get(4)?,
168			custom_id: row.get(5)?,
169			chromosome: row.get(6)?,
170		    })
171		},
172	    )
173	    .unwrap();
174        assert_eq!(new_entry.reference_name, "Test Reference");
175        assert_eq!(new_entry.refseq_accession_id, Some("REFSEQ123".to_string()));
176    }
177
178    #[test]
179    fn test_prepopulated_aliases() {
180        let conn = &mut get_connection(None).unwrap();
181        let reference_aliases = ReferenceAlias::all(conn);
182        assert_eq!(reference_aliases.len(), 107);
183        let first_e_coli_reference = reference_aliases
184            .iter()
185            .find(|alias| alias.genbank_accession_id == Some("U00096.3".to_string()))
186            .unwrap();
187        let aliases = ReferenceAlias::compute_aliases(first_e_coli_reference.clone());
188        assert!(aliases.contains("NC_000913.3"));
189        assert!(aliases.contains("NC_000913"));
190        assert!(aliases.contains("ref|NC_000913|"));
191        assert!(aliases.contains("U00096.3"));
192        assert!(aliases.contains("U00096"));
193
194        let first_yeast_reference = reference_aliases
195            .iter()
196            .find(|alias| alias.genbank_accession_id == Some("BK006935.2".to_string()))
197            .unwrap();
198        let aliases = ReferenceAlias::compute_aliases(first_yeast_reference.clone());
199        assert!(aliases.contains("BK006935.2"));
200        assert!(aliases.contains("BK006935"));
201        assert!(aliases.contains("NC_001133.9"));
202        assert!(aliases.contains("NC_001133"));
203        assert!(aliases.contains("ref|NC_001133|"));
204        assert!(aliases.contains("chrI"));
205        assert!(aliases.contains("chr1"));
206    }
207}