Skip to main content

hpo/term/
internal.rs

1use crate::annotations::OrphaDiseases;
2use crate::annotations::{AnnotationId, OrphaDiseaseId};
3use crate::parser::binary::term::{from_bytes_v1, from_bytes_v2};
4use crate::parser::binary::{BinaryVersion, Bytes};
5use std::hash::Hash;
6
7use crate::annotations::{GeneId, Genes};
8use crate::annotations::{OmimDiseaseId, OmimDiseases};
9use crate::term::{HpoGroup, HpoTermId, InformationContent};
10use crate::DEFAULT_NUM_PARENTS;
11use crate::{HpoError, DEFAULT_NUM_GENES};
12use crate::{HpoResult, DEFAULT_NUM_ALL_PARENTS};
13use crate::{HpoTerm, DEFAULT_NUM_OMIM, DEFAULT_NUM_ORPHA};
14
15#[derive(Clone, Debug)]
16pub(crate) struct HpoTermInternal {
17    id: HpoTermId,
18    name: String,
19    parents: HpoGroup,
20    all_parents: HpoGroup,
21    children: HpoGroup,
22    genes: Genes,
23    omim_diseases: OmimDiseases,
24    orpha_diseases: OrphaDiseases,
25    ic: InformationContent,
26    obsolete: bool,
27    replacement: Option<HpoTermId>,
28}
29
30impl Hash for HpoTermInternal {
31    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
32        self.id.hash(state);
33    }
34}
35
36impl Default for HpoTermInternal {
37    fn default() -> Self {
38        HpoTermInternal::new(String::from("HP:0000000"), 0u32.into())
39    }
40}
41
42impl HpoTermInternal {
43    pub fn new(name: String, id: HpoTermId) -> HpoTermInternal {
44        HpoTermInternal {
45            id,
46            name,
47            parents: HpoGroup::with_capacity(DEFAULT_NUM_PARENTS),
48            all_parents: HpoGroup::with_capacity(DEFAULT_NUM_ALL_PARENTS),
49            children: HpoGroup::with_capacity(DEFAULT_NUM_PARENTS),
50            genes: Genes::with_capacity(DEFAULT_NUM_GENES),
51            omim_diseases: OmimDiseases::with_capacity(DEFAULT_NUM_OMIM),
52            orpha_diseases: OrphaDiseases::with_capacity(DEFAULT_NUM_ORPHA),
53            ic: InformationContent::default(),
54            obsolete: false,
55            replacement: None,
56        }
57    }
58
59    pub fn try_new(id: &str, name: &str) -> HpoResult<HpoTermInternal> {
60        let id = HpoTermId::try_from(id)?;
61        Ok(Self::new(name.to_string(), id))
62    }
63
64    pub fn id(&self) -> &HpoTermId {
65        &self.id
66    }
67
68    pub fn name(&self) -> &str {
69        &self.name
70    }
71
72    pub fn parents(&self) -> &HpoGroup {
73        &self.parents
74    }
75
76    pub fn children(&self) -> &HpoGroup {
77        &self.children
78    }
79
80    pub fn all_parents(&self) -> &HpoGroup {
81        &self.all_parents
82    }
83
84    pub fn all_parents_mut(&mut self) -> &mut HpoGroup {
85        &mut self.all_parents
86    }
87
88    pub fn genes(&self) -> &Genes {
89        &self.genes
90    }
91
92    pub fn omim_diseases(&self) -> &OmimDiseases {
93        &self.omim_diseases
94    }
95
96    pub fn orpha_diseases(&self) -> &OrphaDiseases {
97        &self.orpha_diseases
98    }
99
100    pub fn parents_cached(&self) -> bool {
101        if self.parents.is_empty() {
102            true
103        } else {
104            !self.all_parents.is_empty()
105        }
106    }
107
108    pub fn add_parent<I: Into<HpoTermId>>(&mut self, parent_id: I) {
109        self.parents.insert(parent_id.into());
110    }
111
112    pub fn add_child<I: Into<HpoTermId>>(&mut self, child_id: I) {
113        self.children.insert(child_id.into());
114    }
115
116    pub fn add_gene(&mut self, gene_id: GeneId) -> bool {
117        self.genes.insert(gene_id)
118    }
119
120    pub fn add_omim_disease(&mut self, omim_disease_id: OmimDiseaseId) -> bool {
121        self.omim_diseases.insert(omim_disease_id)
122    }
123
124    pub fn add_orpha_disease(&mut self, orpha_disease_id: OrphaDiseaseId) -> bool {
125        self.orpha_diseases.insert(orpha_disease_id)
126    }
127
128    pub fn information_content(&self) -> &InformationContent {
129        &self.ic
130    }
131
132    pub fn information_content_mut(&mut self) -> &mut InformationContent {
133        &mut self.ic
134    }
135
136    pub fn obsolete(&self) -> bool {
137        self.obsolete
138    }
139
140    pub fn obsolete_mut(&mut self) -> &mut bool {
141        &mut self.obsolete
142    }
143
144    pub fn replacement(&self) -> Option<HpoTermId> {
145        self.replacement
146    }
147
148    pub fn replacement_mut(&mut self) -> &mut Option<HpoTermId> {
149        &mut self.replacement
150    }
151
152    /// Returns a binary representation of the `HpoTermInternal`
153    ///
154    /// The binary layout is defined as:
155    ///
156    /// | Byte offset | Number of bytes | Description |
157    /// | --- | --- | --- |
158    /// | 0 | 4 | The total length of the binary data blob as big-endian `u32` |
159    /// | 4 | 4 | The Term ID as big-endian `u32` |
160    /// | 8 | 1 | The length of the Term Name (converted to a u8 vector) as a `u8` |
161    /// | 9 | n | The Term name as u8 vector. If the name has more than 255 bytes, it is trimmed to 255 |
162    /// | 9 + n | 1 | Flag to indicate if term is obsolete
163    /// | 10 + n | 4 | Term ID of a replacement term as big-endian `u32` or `0` if `None` |
164    ///
165    /// # Panics
166    ///
167    /// This method will panic if the total byte length is longer than `u32::MAX`
168    pub fn as_bytes(&self) -> Vec<u8> {
169        // 4 bytes for total length
170        // 4 bytes for TermID (big-endian)
171        // 1 byte for Name length (u8) -> Name cannot be longer than 255 bytes
172        // 1 byte for obsolete flag
173        // 4 byte for replacement term
174        // name in u8 encoded
175        let name = self.name().as_bytes();
176        let name_length = std::cmp::min(name.len(), 255);
177        let size = name_length + 4 + 4 + 1 + 1 + 4;
178
179        let mut res = Vec::with_capacity(size);
180
181        // 4 bytes for total length
182        res.append(&mut u32::try_from(size).unwrap().to_be_bytes().to_vec());
183
184        // 4 bytes to Term-ID
185        res.append(&mut self.id.to_be_bytes().to_vec());
186
187        // 1 byte for Length of Term Name (can't be longer than 255 bytes)
188        // casting is safe, since name_length is < 256
189        #[allow(clippy::cast_possible_truncation)]
190        res.push(name_length as u8);
191
192        // Term name (up to 255 bytes)
193        for c in name.iter().take(name_length) {
194            res.push(*c);
195        }
196
197        // 1 byte for various flags, currently only obsolete flag
198        if self.obsolete {
199            res.push(1u8);
200        } else {
201            res.push(0u8);
202        }
203
204        // 4 bytes for replace term (or 0 if `None`)
205        res.append(
206            &mut self
207                .replacement
208                .unwrap_or(0u32.into())
209                .to_be_bytes()
210                .to_vec(),
211        );
212
213        res
214    }
215
216    /// Returns a binary representation of Term - Parent connections
217    ///
218    /// The binary layout is defined as:
219    ///
220    /// | Byte offset | Number of bytes | Description |
221    /// | --- | --- | --- |
222    /// | 0 | 4 | The number of parent terms as big-endian `u32` |
223    /// | 4 | 4 | The Term ID of the term as big-endian `u32` |
224    /// | 8 | 4 * n | The Term ID of all parents as big-endian `u32` |
225    ///
226    /// # Panics
227    ///
228    /// This method will panic if there are more than `u32::MAX` parents
229    pub fn parents_as_byte(&self) -> Vec<u8> {
230        let mut term_parents: Vec<u8> = Vec::new();
231        let n_parents: u32 = self.parents().len().try_into().unwrap();
232        term_parents.append(&mut n_parents.to_be_bytes().to_vec());
233        term_parents.append(&mut self.id().to_be_bytes().to_vec());
234        for parent in self.parents() {
235            term_parents.append(&mut parent.to_be_bytes().to_vec());
236        }
237        term_parents
238    }
239}
240
241impl PartialEq for HpoTermInternal {
242    fn eq(&self, other: &Self) -> bool {
243        self.id == other.id
244    }
245}
246
247impl Eq for HpoTermInternal {}
248
249impl TryFrom<Bytes<'_>> for HpoTermInternal {
250    type Error = HpoError;
251    /// Crates an `HpoTermInternal` from raw bytes
252    ///
253    /// See [`HpoTermInternal::as_bytes`] for description of the byte layout
254    fn try_from(bytes: Bytes) -> Result<Self, Self::Error> {
255        match bytes.version() {
256            BinaryVersion::V1 => from_bytes_v1(bytes),
257            _ => from_bytes_v2(bytes),
258        }
259    }
260}
261
262impl From<&HpoTerm<'_>> for HpoTermInternal {
263    fn from(term: &HpoTerm) -> Self {
264        let mut internal = Self::new(term.name().to_string(), term.id());
265        *internal.obsolete_mut() = term.is_obsolete();
266        *internal.replacement_mut() = term.replaced_by().map(|repl| repl.id());
267        internal
268    }
269}
270
271#[cfg(test)]
272mod test {
273    use super::*;
274    use crate::parser::binary::BinaryTermBuilder;
275
276    #[test]
277    fn to_bytes() {
278        let term = HpoTermInternal::new(String::from("Foobar"), 123u32.into());
279
280        let bytes: Vec<u8> = term.as_bytes();
281
282        let term_len = u32::from_be_bytes(bytes[0..4].try_into().unwrap()) as usize;
283        assert_eq!(term_len, 4 + 4 + 1 + 6 + 5);
284        let term_id = u32::from_be_bytes(bytes[4..8].try_into().unwrap());
285        assert_eq!(term_id, 123);
286        let name_len = bytes[8] as usize;
287        assert_eq!(name_len, 6);
288        let name = String::from_utf8(bytes[9..9 + name_len].to_vec()).unwrap();
289        assert_eq!(name, "Foobar");
290    }
291
292    #[test]
293    fn from_bytes() {
294        let term = HpoTermInternal::new(String::from("Foobar"), 123u32.into());
295        let bytes: Vec<u8> = term.as_bytes();
296        let term2 = HpoTermInternal::try_from(Bytes::new(&bytes[..], BinaryVersion::V2)).unwrap();
297        assert_eq!(term2.name(), term.name());
298        assert_eq!(term2.id(), term.id());
299    }
300
301    #[test]
302    fn from_multiple_bytes() {
303        let mut v: Vec<u8> = Vec::new();
304
305        let test_terms = [
306            ("t1", 1u32),
307            ("Term with a very long name", 2u32),
308            ("", 3u32),
309            ("Abnormality", 4u32),
310        ];
311
312        for (name, id) in test_terms {
313            let t = HpoTermInternal::new(String::from(name), id.into());
314            println!("Building: {t:?}");
315            v.append(&mut t.as_bytes());
316        }
317
318        let mut term_iter = BinaryTermBuilder::new(Bytes::new(&v, BinaryVersion::V2));
319
320        for (name, id) in test_terms {
321            let term = term_iter.next().unwrap();
322            println!("Checking: {term:?} [{name}-{id}]");
323            assert_eq!(term.name(), name);
324            assert_eq!(term.id().as_u32(), id);
325        }
326
327        assert!(term_iter.next().is_none());
328    }
329}