1use super::descriptors::MolecularDescriptors;
8use serde::{Deserialize, Serialize};
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct MlPropertyResult {
13 pub logp: f64,
15 pub molar_refractivity: f64,
17 pub log_solubility: f64,
19 pub lipinski: LipinskiResult,
21 pub druglikeness: f64,
23 pub uncertainty: PredictionUncertainty,
25}
26
27#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct PredictionUncertainty {
30 pub confidence: f64,
32 pub logp_std: f64,
34 pub solubility_std: f64,
36 pub warnings: Vec<String>,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct LipinskiResult {
43 pub mw_ok: bool,
45 pub logp_ok: bool,
47 pub hbd_ok: bool,
49 pub hba_ok: bool,
51 pub violations: u8,
53 pub passes: bool,
55}
56
57fn predict_logp(desc: &MolecularDescriptors) -> f64 {
62 let base = 0.120 * desc.n_heavy_atoms as f64;
64 let h_correction = -0.230 * desc.n_hbd as f64; let ring_correction = 0.150 * desc.n_rings as f64;
66 let aromatic_correction = 0.080 * desc.n_aromatic as f64;
67 let polar_correction = -0.310 * desc.n_hba as f64;
68 let sp3_correction = -0.180 * desc.fsp3;
69 let mw_term = 0.005 * (desc.molecular_weight - 100.0);
70
71 base + h_correction
72 + ring_correction
73 + aromatic_correction
74 + polar_correction
75 + sp3_correction
76 + mw_term
77}
78
79fn predict_molar_refractivity(desc: &MolecularDescriptors) -> f64 {
83 let base = 2.536 * desc.sum_polarizability;
86 let ring_correction = 1.20 * desc.n_rings as f64;
87 let aromatic = 0.80 * desc.n_aromatic as f64;
88 base + ring_correction + aromatic
89}
90
91fn predict_solubility(desc: &MolecularDescriptors, logp: f64) -> f64 {
95 let frac_aromatic = if desc.n_heavy_atoms > 0 {
96 desc.n_aromatic as f64 / desc.n_heavy_atoms as f64
97 } else {
98 0.0
99 };
100 0.16 - 0.63 * logp - 0.0062 * desc.molecular_weight + 0.066 * desc.n_rotatable_bonds as f64
101 - 0.74 * frac_aromatic
102}
103
104fn druglikeness_score(desc: &MolecularDescriptors, logp: f64) -> f64 {
106 let mut score = 1.0;
107
108 if desc.molecular_weight > 500.0 {
110 score -= 0.2 * ((desc.molecular_weight - 500.0) / 200.0).min(1.0);
111 }
112 if logp > 5.0 {
114 score -= 0.2 * ((logp - 5.0) / 3.0).min(1.0);
115 } else if logp < -2.0 {
116 score -= 0.15;
117 }
118 if desc.n_hbd > 5 {
120 score -= 0.15;
121 }
122 if desc.n_hba > 10 {
124 score -= 0.15;
125 }
126 if desc.n_rotatable_bonds > 10 {
128 score -= 0.1 * ((desc.n_rotatable_bonds as f64 - 10.0) / 5.0).min(1.0);
129 }
130 score += 0.05 * desc.fsp3;
132
133 score.clamp(0.0, 1.0)
134}
135
136fn estimate_uncertainty(desc: &MolecularDescriptors) -> PredictionUncertainty {
138 let mut confidence: f64 = 1.0;
139 let mut warnings = Vec::new();
140
141 if desc.molecular_weight > 900.0 {
143 confidence -= 0.3;
144 warnings.push("MW > 900 — outside typical training domain".to_string());
145 } else if desc.molecular_weight > 600.0 {
146 confidence -= 0.1;
147 }
148
149 if desc.n_heavy_atoms > 70 {
150 confidence -= 0.2;
151 warnings.push("Large molecule (>70 heavy atoms) — reduced accuracy".to_string());
152 }
153
154 if desc.n_rings > 8 {
155 confidence -= 0.15;
156 warnings.push("Many rings — polycyclic compounds have higher uncertainty".to_string());
157 }
158
159 if desc.n_rotatable_bonds > 15 {
160 confidence -= 0.1;
161 }
162
163 let logp_std = 0.6 + 0.002 * desc.molecular_weight.max(0.0); let sol_std = 0.8 + 0.003 * desc.molecular_weight.max(0.0);
166
167 PredictionUncertainty {
168 confidence: confidence.clamp(0.1, 1.0),
169 logp_std: logp_std / confidence.max(0.3),
170 solubility_std: sol_std / confidence.max(0.3),
171 warnings,
172 }
173}
174
175pub fn predict_properties(desc: &MolecularDescriptors) -> MlPropertyResult {
180 let logp = predict_logp(desc);
181 let mr = predict_molar_refractivity(desc);
182 let log_s = predict_solubility(desc, logp);
183
184 let mw_ok = desc.molecular_weight <= 500.0;
185 let logp_ok = logp <= 5.0;
186 let hbd_ok = desc.n_hbd <= 5;
187 let hba_ok = desc.n_hba <= 10;
188 let violations = [!mw_ok, !logp_ok, !hbd_ok, !hba_ok]
189 .iter()
190 .filter(|&&v| v)
191 .count() as u8;
192
193 let lipinski = LipinskiResult {
194 mw_ok,
195 logp_ok,
196 hbd_ok,
197 hba_ok,
198 violations,
199 passes: violations <= 1,
200 };
201
202 let druglikeness = druglikeness_score(desc, logp);
203
204 let uncertainty = estimate_uncertainty(desc);
205
206 MlPropertyResult {
207 logp,
208 molar_refractivity: mr,
209 log_solubility: log_s,
210 lipinski,
211 druglikeness,
212 uncertainty,
213 }
214}
215
216#[cfg(test)]
217mod tests {
218 use super::*;
219 use crate::ml::descriptors::compute_descriptors;
220
221 #[test]
222 fn test_predict_water() {
223 let elements = [8u8, 1, 1];
224 let bonds = [(0, 1, 1u8), (0, 2, 1)];
225 let desc = compute_descriptors(&elements, &bonds, &[], &[]);
226 let result = predict_properties(&desc);
227 assert!(
229 result.logp < 1.0,
230 "Water logP should be low: {}",
231 result.logp
232 );
233 assert!(result.lipinski.passes, "Water should pass Lipinski");
234 }
235
236 #[test]
237 fn test_lipinski_violations() {
238 let desc = MolecularDescriptors {
240 molecular_weight: 800.0,
241 n_heavy_atoms: 60,
242 n_hydrogens: 20,
243 n_bonds: 80,
244 n_rotatable_bonds: 15,
245 n_hbd: 8,
246 n_hba: 15,
247 fsp3: 0.1,
248 total_abs_charge: 5.0,
249 max_charge: 0.5,
250 min_charge: -0.5,
251 wiener_index: 5000.0,
252 n_rings: 5,
253 n_aromatic: 12,
254 balaban_j: 2.0,
255 sum_electronegativity: 150.0,
256 sum_polarizability: 80.0,
257 };
258 let result = predict_properties(&desc);
259 assert!(
260 result.lipinski.violations >= 2,
261 "Should have multiple violations"
262 );
263 assert!(!result.lipinski.passes, "Should fail Lipinski");
264 }
265
266 #[test]
267 fn test_druglikeness_range() {
268 let elements = [6u8, 6, 8, 1, 1, 1, 1, 1, 1];
269 let bonds = [
270 (0, 1, 1u8),
271 (1, 2, 1),
272 (0, 3, 1),
273 (0, 4, 1),
274 (0, 5, 1),
275 (1, 6, 1),
276 (1, 7, 1),
277 (2, 8, 1),
278 ];
279 let desc = compute_descriptors(&elements, &bonds, &[], &[]);
280 let result = predict_properties(&desc);
281 assert!(result.druglikeness >= 0.0 && result.druglikeness <= 1.0);
282 }
283}