chiral_common/data/doc/
smiles.rs1use serde::{Serialize, Deserialize};
6use crate::traits::{Serialization, SerializedFormat};
7use chiral_derive::Serialization;
8
9#[derive(Serialize, Deserialize, Serialization)]
10pub struct DocSMILES {
11 ids: Vec<crate::data::types::EntryID>,
12 smiles: Vec<crate::app::chem::types::SMILES>,
13}
14
15impl DocSMILES {
16 pub fn empty() -> Self {
17 Self { ids: vec![], smiles: vec![] }
18 }
19
20 pub fn new(ids_in: Vec<crate::data::types::EntryID>, smiles_in: Vec<crate::app::chem::types::SMILES>) -> Self {
21 let mut perm = permutation::sort(&ids_in);
22 let mut ids = ids_in;
23 perm.apply_slice_in_place(&mut ids);
24 let mut smiles = smiles_in;
25 perm.apply_slice_in_place(&mut smiles);
26
27 Self { ids, smiles }
28 }
29
30 pub fn get_smiles(&self, id: &crate::data::types::EntryID) -> Option<&crate::app::chem::types::SMILES> {
31 match self.ids.binary_search(id) {
32 Ok(index) => self.smiles.get(index),
33 Err(_) => None
34 }
35 }
36
37 pub fn extract_ids(&self, range: &std::ops::Range<usize>) -> Vec<crate::data::types::EntryID> { self.get_ids().as_slice()[range.to_owned()].to_vec() }
38 pub fn extract_smiles_vec(&self, range: &std::ops::Range<usize>) -> Vec<crate::app::chem::types::SMILES> { self.get_smiles_vec().as_slice()[range.to_owned()].to_vec() }
39 pub fn extract(&self, range: &std::ops::Range<usize>) -> Self { Self::new(self.extract_ids(range), self.extract_smiles_vec(range)) }
40
41 pub fn get_smiles_vec(&self) -> &Vec<crate::app::chem::types::SMILES> { &self.smiles }
42 pub fn get_ids(&self) -> &Vec<crate::data::types::EntryID> { &self.ids }
43 pub fn len(&self) -> usize { self.ids.len() }
44}
45
46impl crate::data::Empty for DocSMILES {
47 fn empty() -> Self {
48 Self::new(vec![], vec![])
49 }
50}
51
52impl crate::data::Dummy for DocSMILES {
53 fn dummy() -> Self {
54 let ids = vec![
55 "label_1".to_string(),
56 "label_3".to_string(),
57 "label_2".to_string(),
58 "label_4".to_string()
59 ];
60 let smiles = vecC[C@H](O)CCn2c(c(c(c2c1ccc(F)cc1)c3ccccc3)C(=O)Nc4ccccc4)C(C)C"),
64 String::from("CC(=O)Nc1ccc(O)cc1")
65 ];
66
67 Self::new(ids, smiles)
68 }
69}
70
71impl From<crate::data::SourceChembl> for DocSMILES {
72 fn from(sc: crate::data::SourceChembl) -> Self {
73 let (smiles, ids) = sc.get_smiles_id_pairs();
74 DocSMILES::new(ids, smiles)
75 }
76}
77
78pub type DocStoreSMILES = std::collections::HashMap<crate::kinds::Dataset, DocSMILES>;
80
81impl crate::data::Info for DocStoreSMILES {
82 fn info(&self) -> String {
83 format!("{:15} {:>15}\n", "name", "entries")
84 + vec!["=";31].join("").as_str() + "\n"
85 + self.iter()
86 .map(|(k, v)| format!("{:15} {:15}", k, v.len()))
87 .collect::<Vec<String>>()
88 .join("\n")
89 .as_str()
90 }
91}
92
93
94#[cfg(test)]
95mod tests {
96 use super::*;
97 use crate::data::{Dummy, Info};
98
99 #[test]
100 fn test_doc_smiles() {
101 let doc = DocSMILES::dummy();
102 assert_eq!(doc.ids.len(), 4);
103 assert_eq!(doc.smiles.len(), 4);
104 assert_eq!(doc.get_smiles(&"label_3".to_string()), Some(&String::from("N1=C(c3c(Sc2c1cccc2)cccc3)N4CCN(CCOCCO)CC4")));
105 assert_eq!(doc.get_smiles(&"label_5".to_string()), None);
106 }
107
108 #[test]
109 fn test_chembl() {
110 let mut sc = crate::data::SourceChembl::new();
111 sc.set_path(&std::ffi::OsString::from("../../../chiral-db-example-data/ChEMBL/chembl_30_chemreps_100.txt"));
112 sc.load_all();
113 let doc = DocSMILES::from(sc);
114 assert_eq!(doc.ids.len(), 100);
115 assert_eq!(doc.smiles.len(), 100);
116 }
117
118 #[test]
119 fn test_store() {
120 let mut store = DocStoreSMILES::new();
121 let doc_1 = DocSMILES::dummy();
122 store.insert(crate::kinds::Dataset::Dummy, doc_1);
123 let mut sc = crate::data::SourceChembl::new();
124 sc.set_path(&std::ffi::OsString::from("../../../chiral-db-example-data/ChEMBL/chembl_30_chemreps_10k.txt"));
125 sc.load_all();
126 let doc_2 = DocSMILES::from(sc);
127 store.insert(crate::kinds::Dataset::TestChembl, doc_2);
128 println!("{}", store.info());
129 assert!(store.info().contains("test_chembl"));
130 assert!(store.info().contains("dummy"));
131 assert!(store.info().contains("4"));
132 assert!(store.info().contains("100"));
133 }
134
135 #[test]
136 fn test_permutation() {
137 let dsk = crate::kinds::Dataset::TestChembl;
138 let filepath = std::path::PathBuf::from("../../../chiral-db-example-data/ChEMBL");
139 let doc = crate::data::load_from_path::<DocSMILES>(&dsk, &filepath);
140 let id = "CHEMBL10030".to_string();
141 assert_eq!(doc.get_smiles(&id).unwrap().to_string(), "O=C(c1ccc(OCCN2CCCC2)cc1)c1c(-c2ccc(O)cc2)sc2cc(O)ccc12".to_string());
142 }
143}