1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
use serde::{Serialize, Deserialize};
#[derive(Serialize, Deserialize)]
pub struct DocSMILES {
ids: Vec<chiral_common::DatasetID>,
smiles: Vec<chiral_common::SMILES>,
}
impl DocSMILES {
pub fn new(ids_in: Vec<chiral_common::DatasetID>, smiles_in: Vec<chiral_common::SMILES>) -> Self {
let mut perm = permutation::sort(&ids_in);
let mut ids = ids_in;
perm.apply_slice_in_place(&mut ids);
let mut smiles = smiles_in;
perm.apply_slice_in_place(&mut smiles);
Self { ids, smiles }
}
pub fn serialize(&self) -> chiral_common::SerializedFormat { serde_json::to_string(self).unwrap() }
pub fn deserialize(content: &chiral_common::SerializedFormat) -> Self { serde_json::from_str(content).unwrap() }
pub fn get_smiles(&self, id: &chiral_common::DatasetID) -> Option<&chiral_common::SMILES> {
match self.ids.binary_search(id) {
Ok(index) => self.smiles.get(index),
Err(_) => None
}
}
pub fn get_smiles_vec(&self) -> &Vec<chiral_common::SMILES> { &self.smiles }
pub fn get_ids(&self) -> &Vec<chiral_common::DatasetID> { &self.ids }
pub fn len(&self) -> usize { self.ids.len() }
}
impl crate::Dummy for DocSMILES {
fn dummy() -> Self {
let ids = vec![
"label_1".to_string(),
"label_3".to_string(),
"label_2".to_string(),
"label_4".to_string()
];
let smiles = vecC[C@H](O)CCn2c(c(c(c2c1ccc(F)cc1)c3ccccc3)C(=O)Nc4ccccc4)C(C)C"),
String::from("CC(=O)Nc1ccc(O)cc1")
];
Self::new(ids, smiles)
}
}
impl From<crate::SourceChembl> for DocSMILES {
fn from(sc: crate::SourceChembl) -> Self {
let (smiles, ids) = sc.get_smiles_id_pairs();
DocSMILES::new(ids, smiles)
}
}
pub type DocStoreSMILES = std::collections::HashMap<chiral_common::kinds::Dataset, DocSMILES>;
impl crate::Info for DocStoreSMILES {
fn info(&self) -> String {
format!("{:15} {:>15}\n", "name", "entries")
+ vec!["=";31].join("").as_str() + "\n"
+ self.iter()
.map(|(k, v)| format!("{:15} {:15}", k, v.len()))
.collect::<Vec<String>>()
.join("\n")
.as_str()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{Dummy, Info};
#[test]
fn test_doc_smiles() {
let doc = DocSMILES::dummy();
assert_eq!(doc.ids.len(), 4);
assert_eq!(doc.smiles.len(), 4);
assert_eq!(doc.get_smiles(&"label_3".to_string()), Some(&String::from("N1=C(c3c(Sc2c1cccc2)cccc3)N4CCN(CCOCCO)CC4")));
assert_eq!(doc.get_smiles(&"label_5".to_string()), None);
}
#[test]
fn test_chembl() {
let mut sc = crate::SourceChembl::new();
sc.set_path(&std::ffi::OsString::from("../../../chiral-db-example-data/ChEMBL/chembl_30_chemreps_100.txt"));
sc.load_all();
let doc = DocSMILES::from(sc);
assert_eq!(doc.ids.len(), 100);
assert_eq!(doc.smiles.len(), 100);
}
#[test]
fn test_store() {
let mut store = DocStoreSMILES::new();
let doc_1 = DocSMILES::dummy();
store.insert(chiral_common::kinds::Dataset::Dummy, doc_1);
let mut sc = crate::SourceChembl::new();
sc.set_path(&std::ffi::OsString::from("../../../chiral-db-example-data/ChEMBL/chembl_30_chemreps_10k.txt"));
sc.load_all();
let doc_2 = DocSMILES::from(sc);
store.insert(chiral_common::kinds::Dataset::TestChembl, doc_2);
println!("{}", store.info());
assert!(store.info().contains("test_chembl"));
assert!(store.info().contains("dummy"));
assert!(store.info().contains("4"));
assert!(store.info().contains("100"));
}
#[test]
fn test_permutation() {
let dsk = chiral_common::kinds::Dataset::TestChembl;
let doc = crate::load::<DocSMILES>(&dsk);
let id = "CHEMBL10030".to_string();
assert_eq!(doc.get_smiles(&id).unwrap().to_string(), "O=C(c1ccc(OCCN2CCCC2)cc1)c1c(-c2ccc(O)cc2)sc2cc(O)ccc12".to_string());
}
}