1use conv::ValueInto;
4use indexmap::{IndexMap, IndexSet};
5use ndarray::Array2;
6use serde::de::DeserializeOwned;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use std::fmt;
10use std::fs::File;
11use std::io;
12use std::io::BufReader;
13use std::path::Path;
14use thiserror::Error;
15
16mod chemical_record;
17mod identifier;
18mod model_record;
19mod segment;
20
21pub use chemical_record::{ChemicalRecord, SegmentCount};
22pub use identifier::{Identifier, IdentifierOption};
23pub use model_record::{BinaryRecord, FromSegments, FromSegmentsBinary, PureRecord};
24pub use segment::SegmentRecord;
25
26pub trait Parameter
32where
33 Self: Sized,
34{
35 type Pure: Clone + DeserializeOwned;
36 type Binary: Clone + DeserializeOwned + Default;
37
38 fn from_records(
40 pure_records: Vec<PureRecord<Self::Pure>>,
41 binary_records: Option<Array2<Self::Binary>>,
42 ) -> Result<Self, ParameterError>;
43
44 fn new_pure(pure_record: PureRecord<Self::Pure>) -> Result<Self, ParameterError> {
46 Self::from_records(vec![pure_record], None)
47 }
48
49 fn new_binary(
52 pure_records: Vec<PureRecord<Self::Pure>>,
53 binary_record: Option<Self::Binary>,
54 ) -> Result<Self, ParameterError> {
55 let binary_record = binary_record.map(|br| {
56 Array2::from_shape_fn([2, 2], |(i, j)| {
57 if i == j {
58 Self::Binary::default()
59 } else {
60 br.clone()
61 }
62 })
63 });
64 Self::from_records(pure_records, binary_record)
65 }
66
67 fn from_model_records(model_records: Vec<Self::Pure>) -> Result<Self, ParameterError> {
70 let pure_records = model_records
71 .into_iter()
72 .map(|r| PureRecord::new(Default::default(), Default::default(), r))
73 .collect();
74 Self::from_records(pure_records, None)
75 }
76
77 #[allow(clippy::type_complexity)]
79 fn records(&self) -> (&[PureRecord<Self::Pure>], Option<&Array2<Self::Binary>>);
80
81 #[allow(clippy::expect_fun_call)]
86 fn binary_matrix_from_records(
87 pure_records: &[PureRecord<Self::Pure>],
88 binary_records: &[BinaryRecord<Identifier, Self::Binary>],
89 identifier_option: IdentifierOption,
90 ) -> Option<Array2<Self::Binary>> {
91 if binary_records.is_empty() {
92 return None;
93 }
94
95 let binary_map: HashMap<(String, String), Self::Binary> = {
97 binary_records
98 .iter()
99 .filter_map(|br| {
100 let id1 = br.id1.as_string(identifier_option);
101 let id2 = br.id2.as_string(identifier_option);
102 id1.and_then(|id1| id2.map(|id2| ((id1, id2), br.model_record.clone())))
103 })
104 .collect()
105 };
106 let n = pure_records.len();
107 Some(Array2::from_shape_fn([n, n], |(i, j)| {
108 let id1 = pure_records[i]
109 .identifier
110 .as_string(identifier_option)
111 .expect(&format!(
112 "No identifier for given identifier_option for pure record {}.",
113 i
114 ));
115 let id2 = pure_records[j]
116 .identifier
117 .as_string(identifier_option)
118 .expect(&format!(
119 "No identifier for given identifier_option for pure record {}.",
120 j
121 ));
122 binary_map
123 .get(&(id1.clone(), id2.clone()))
124 .or_else(|| binary_map.get(&(id2, id1)))
125 .cloned()
126 .unwrap_or_default()
127 }))
128 }
129
130 fn from_json<P>(
132 substances: Vec<&str>,
133 file_pure: P,
134 file_binary: Option<P>,
135 identifier_option: IdentifierOption,
136 ) -> Result<Self, ParameterError>
137 where
138 P: AsRef<Path>,
139 {
140 Self::from_multiple_json(&[(substances, file_pure)], file_binary, identifier_option)
141 }
142
143 fn from_multiple_json<P>(
145 input: &[(Vec<&str>, P)],
146 file_binary: Option<P>,
147 identifier_option: IdentifierOption,
148 ) -> Result<Self, ParameterError>
149 where
150 P: AsRef<Path>,
151 {
152 let nsubstances = input
154 .iter()
155 .fold(0, |acc, (substances, _)| acc + substances.len());
156
157 let queried: IndexSet<String> = input
159 .iter()
160 .flat_map(|(substances, _)| substances)
161 .map(|substance| substance.to_string())
162 .collect();
163
164 if queried.len() != nsubstances {
166 return Err(ParameterError::IncompatibleParameters(
167 "A substance was defined more than once.".to_string(),
168 ));
169 }
170
171 let mut records: Vec<PureRecord<Self::Pure>> = Vec::with_capacity(nsubstances);
172
173 for (substances, file) in input {
175 records.extend(PureRecord::<Self::Pure>::from_json(
176 substances,
177 file,
178 identifier_option,
179 )?);
180 }
181
182 let binary_records = if let Some(path) = file_binary {
183 let file = File::open(path)?;
184 let reader = BufReader::new(file);
185 serde_json::from_reader(reader)?
186 } else {
187 Vec::new()
188 };
189 let record_matrix =
190 Self::binary_matrix_from_records(&records, &binary_records, identifier_option);
191 Self::from_records(records, record_matrix)
192 }
193
194 fn from_segments<C: SegmentCount>(
199 chemical_records: Vec<C>,
200 segment_records: Vec<SegmentRecord<Self::Pure>>,
201 binary_segment_records: Option<Vec<BinaryRecord<String, f64>>>,
202 ) -> Result<Self, ParameterError>
203 where
204 Self::Pure: FromSegments<C::Count>,
205 Self::Binary: FromSegmentsBinary<C::Count>,
206 {
207 let pure_records = chemical_records
210 .iter()
211 .map(|cr| {
212 cr.segment_map(&segment_records).and_then(|segments| {
213 PureRecord::from_segments(cr.identifier().into_owned(), segments)
214 })
215 })
216 .collect::<Result<Vec<_>, _>>()?;
217
218 let binary_map: HashMap<_, _> = binary_segment_records
221 .into_iter()
222 .flat_map(|seg| seg.into_iter())
223 .map(|br| ((br.id1, br.id2), br.model_record))
224 .collect();
225
226 let segment_counts: Vec<_> = chemical_records
228 .iter()
229 .map(|cr| cr.segment_count())
230 .collect();
231
232 let n = pure_records.len();
236 let mut binary_records = Array2::default([n, n]);
237 for i in 0..n {
238 for j in i + 1..n {
239 let mut vec = Vec::new();
240 for (id1, &n1) in segment_counts[i].iter() {
241 for (id2, &n2) in segment_counts[j].iter() {
242 let binary = binary_map
243 .get(&(id1.clone(), id2.clone()))
244 .or_else(|| binary_map.get(&(id2.clone(), id1.clone())))
245 .cloned()
246 .unwrap_or_default();
247 vec.push((binary, n1, n2));
248 }
249 }
250 let kij = Self::Binary::from_segments_binary(&vec)?;
251 binary_records[(i, j)] = kij.clone();
252 binary_records[(j, i)] = kij;
253 }
254 }
255
256 Self::from_records(pure_records, Some(binary_records))
257 }
258
259 fn from_json_segments<P>(
264 substances: &[&str],
265 file_pure: P,
266 file_segments: P,
267 file_binary: Option<P>,
268 identifier_option: IdentifierOption,
269 ) -> Result<Self, ParameterError>
270 where
271 P: AsRef<Path>,
272 Self::Pure: FromSegments<usize>,
273 Self::Binary: FromSegmentsBinary<usize>,
274 {
275 let queried: IndexSet<String> = substances
276 .iter()
277 .map(|identifier| identifier.to_string())
278 .collect();
279
280 let file = File::open(file_pure)?;
281 let reader = BufReader::new(file);
282 let chemical_records: Vec<ChemicalRecord> = serde_json::from_reader(reader)?;
283 let mut record_map: HashMap<_, _> = chemical_records
284 .into_iter()
285 .filter_map(|record| {
286 record
287 .identifier
288 .as_string(identifier_option)
289 .map(|i| (i, record))
290 })
291 .collect();
292
293 let available: IndexSet<String> = record_map
295 .keys()
296 .map(|identifier| identifier.to_string())
297 .collect();
298 if !queried.is_subset(&available) {
299 let missing: Vec<String> = queried.difference(&available).cloned().collect();
300 let msg = format!("{:?}", missing);
301 return Err(ParameterError::ComponentsNotFound(msg));
302 };
303
304 let chemical_records: Vec<_> = queried
306 .iter()
307 .filter_map(|identifier| record_map.remove(&identifier.clone()))
308 .collect();
309
310 let segment_records: Vec<SegmentRecord<Self::Pure>> =
312 SegmentRecord::from_json(file_segments)?;
313
314 let binary_records = file_binary
316 .map(|file_binary| {
317 let reader = BufReader::new(File::open(file_binary)?);
318 let binary_records: Result<Vec<BinaryRecord<String, f64>>, ParameterError> =
319 Ok(serde_json::from_reader(reader)?);
320 binary_records
321 })
322 .transpose()?;
323
324 Self::from_segments(chemical_records, segment_records, binary_records)
325 }
326
327 fn subset(&self, component_list: &[usize]) -> Self {
334 let (pure_records, binary_records) = self.records();
335 let pure_records = component_list
336 .iter()
337 .map(|&i| pure_records[i].clone())
338 .collect();
339 let n = component_list.len();
340 let binary_records = binary_records.map(|br| {
341 Array2::from_shape_fn([n, n], |(i, j)| {
342 br[(component_list[i], component_list[j])].clone()
343 })
344 });
345
346 Self::from_records(pure_records, binary_records)
347 .expect("failed to create subset from parameters.")
348 }
349}
350
351#[derive(Serialize, Deserialize, Clone, Default)]
353pub struct NoBinaryModelRecord;
354
355impl From<f64> for NoBinaryModelRecord {
356 fn from(_: f64) -> Self {
357 Self
358 }
359}
360
361impl From<NoBinaryModelRecord> for f64 {
362 fn from(_: NoBinaryModelRecord) -> Self {
363 0.0 }
365}
366
367impl<T: Copy + ValueInto<f64>> FromSegmentsBinary<T> for NoBinaryModelRecord {
368 fn from_segments_binary(_segments: &[(f64, T, T)]) -> Result<Self, ParameterError> {
369 Ok(Self)
370 }
371}
372
373impl fmt::Display for NoBinaryModelRecord {
374 fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result {
375 Ok(())
376 }
377}
378
379pub trait ParameterHetero: Sized {
381 type Chemical: Clone;
382 type Pure: Clone + DeserializeOwned;
383 type Binary: Clone + DeserializeOwned;
384
385 fn from_segments<C: Clone + Into<Self::Chemical>>(
387 chemical_records: Vec<C>,
388 segment_records: Vec<SegmentRecord<Self::Pure>>,
389 binary_segment_records: Option<Vec<BinaryRecord<String, Self::Binary>>>,
390 ) -> Result<Self, ParameterError>;
391
392 #[allow(clippy::type_complexity)]
394 fn records(
395 &self,
396 ) -> (
397 &[Self::Chemical],
398 &[SegmentRecord<Self::Pure>],
399 &Option<Vec<BinaryRecord<String, Self::Binary>>>,
400 );
401
402 fn from_json_segments<P>(
404 substances: &[&str],
405 file_pure: P,
406 file_segments: P,
407 file_binary: Option<P>,
408 identifier_option: IdentifierOption,
409 ) -> Result<Self, ParameterError>
410 where
411 P: AsRef<Path>,
412 ChemicalRecord: Into<Self::Chemical>,
413 {
414 let queried: IndexSet<String> = substances
415 .iter()
416 .map(|identifier| identifier.to_string())
417 .collect();
418
419 let reader = BufReader::new(File::open(file_pure)?);
420 let chemical_records: Vec<ChemicalRecord> = serde_json::from_reader(reader)?;
421 let mut record_map: IndexMap<_, _> = chemical_records
422 .into_iter()
423 .filter_map(|record| {
424 record
425 .identifier
426 .as_string(identifier_option)
427 .map(|i| (i, record))
428 })
429 .collect();
430
431 let available: IndexSet<String> = record_map
433 .keys()
434 .map(|identifier| identifier.to_string())
435 .collect();
436 if !queried.is_subset(&available) {
437 let missing: Vec<String> = queried.difference(&available).cloned().collect();
438 return Err(ParameterError::ComponentsNotFound(format!("{:?}", missing)));
439 };
440
441 let chemical_records: Vec<_> = queried
443 .iter()
444 .filter_map(|identifier| record_map.shift_remove(&identifier.clone()))
445 .collect();
446
447 let segment_records: Vec<SegmentRecord<Self::Pure>> =
449 SegmentRecord::from_json(file_segments)?;
450
451 let binary_records = file_binary
453 .map(|file_binary| {
454 let reader = BufReader::new(File::open(file_binary)?);
455 let binary_records: Result<
456 Vec<BinaryRecord<String, Self::Binary>>,
457 ParameterError,
458 > = Ok(serde_json::from_reader(reader)?);
459 binary_records
460 })
461 .transpose()?;
462
463 Self::from_segments(chemical_records, segment_records, binary_records)
464 }
465
466 fn subset(&self, component_list: &[usize]) -> Self {
468 let (chemical_records, segment_records, binary_segment_records) = self.records();
469 let chemical_records: Vec<_> = component_list
470 .iter()
471 .map(|&i| chemical_records[i].clone())
472 .collect();
473 Self::from_segments(
474 chemical_records,
475 segment_records.to_vec(),
476 binary_segment_records.clone(),
477 )
478 .unwrap()
479 }
480}
481
482#[derive(Error, Debug)]
484pub enum ParameterError {
485 #[error(transparent)]
486 FileIO(#[from] io::Error),
487 #[error(transparent)]
488 Serde(#[from] serde_json::Error),
489 #[error("The following component(s) were not found: {0}")]
490 ComponentsNotFound(String),
491 #[error("The identifier '{0}' is not known. ['cas', 'name', 'iupacname', 'smiles', inchi', 'formula']")]
492 IdentifierNotFound(String),
493 #[error("Information missing.")]
494 InsufficientInformation,
495 #[error("Incompatible parameters: {0}")]
496 IncompatibleParameters(String),
497}