biocommons_bioutils/assemblies/
mod.rs1use std::io::Read;
4
5use enum_map::{enum_map, Enum, EnumMap};
6use flate2::read::GzDecoder;
7use serde::Deserialize;
8
9const GRCH37_JSON_GZ: &[u8] = include_bytes!("_data/GRCh37.json.gz");
10const GRCH37_P10_JSON_GZ: &[u8] = include_bytes!("_data/GRCh37.p10.json.gz");
11const GRCH38_JSON_GZ: &[u8] = include_bytes!("_data/GRCh38.json.gz");
12
13#[derive(Debug, Deserialize, Enum, Clone, Copy, PartialEq, Eq, Hash)]
14pub enum Assembly {
15 Grch37,
16 Grch37p10,
17 Grch38,
18}
19
20impl Assembly {
21 fn load_assembly_info(&self) -> AssemblyInfo {
23 let payload = match self {
24 Assembly::Grch37 => GRCH37_JSON_GZ,
25 Assembly::Grch37p10 => GRCH37_P10_JSON_GZ,
26 Assembly::Grch38 => GRCH38_JSON_GZ,
27 };
28 let mut d = GzDecoder::new(payload);
29 let mut grch37_json = String::new();
30 d.read_to_string(&mut grch37_json)
31 .expect("should not happen; invalid gzip in embedded data");
32 serde_json::from_str::<AssemblyInfo>(&grch37_json)
33 .expect("should not happen; invalid JSON in embedded data")
34 }
35}
36
37#[derive(Debug, Deserialize)]
38pub struct Sequence {
39 pub aliases: Vec<String>,
40 pub assembly_unit: String,
41 pub genbank_ac: String,
42 pub length: usize,
43 pub name: String,
44 pub refseq_ac: String,
45 pub relationship: String,
46 pub sequence_role: String,
47}
48
49#[derive(Debug, Deserialize)]
50pub struct AssemblyInfo {
51 pub date: String,
52 pub description: String,
53 pub genbank_ac: String,
54 pub name: String,
55 pub refseq_ac: String,
56 pub sequences: Vec<Sequence>,
57 pub submitter: String,
58}
59
60lazy_static::lazy_static! {
61 pub static ref ASSEMBLY_INFOS: EnumMap<Assembly, AssemblyInfo> = enum_map! {
63 Assembly::Grch37 => Assembly::Grch37.load_assembly_info(),
64 Assembly::Grch37p10 => Assembly::Grch37p10.load_assembly_info(),
65 Assembly::Grch38 => Assembly::Grch38.load_assembly_info(),
66 };
67}
68
69#[cfg(test)]
70mod test {
71 use pretty_assertions::assert_eq;
72
73 use crate::assemblies::{Assembly, ASSEMBLY_INFOS};
74
75 #[test]
76 fn smoke() {
77 assert_eq!(ASSEMBLY_INFOS[Assembly::Grch37].sequences.len(), 92);
78 assert_eq!(ASSEMBLY_INFOS[Assembly::Grch37p10].sequences.len(), 275);
79 assert_eq!(ASSEMBLY_INFOS[Assembly::Grch38].sequences.len(), 455);
80 }
81}
82
83