1use bigsmiles::BigSmiles;
2use rand::distr::weighted::WeightedIndex;
3use rand::prelude::*;
4use rand::rngs::StdRng;
5
6use crate::{
7 distribution::ChainLengthDistribution,
8 error::PolySimError,
9 polymer::{PolymerChain, PolymerEnsemble},
10 properties::molecular_weight::average_mass,
11};
12
13use super::linear::{
14 build_copolymer_smiles, build_linear_smiles, gradient_fraction, GradientProfile,
15};
16
17pub const DEFAULT_NUM_CHAINS: usize = 100;
19
20pub struct EnsembleBuilder<D: ChainLengthDistribution> {
22 bigsmiles: BigSmiles,
23 distribution: D,
24 mn: f64,
25 pdi: f64,
26 num_chains: usize,
27 seed: Option<u64>,
28}
29
30impl<D: ChainLengthDistribution> EnsembleBuilder<D> {
31 pub fn new(bigsmiles: BigSmiles, distribution: D, mn: f64, pdi: f64) -> Self {
38 Self {
39 bigsmiles,
40 distribution,
41 mn,
42 pdi,
43 num_chains: DEFAULT_NUM_CHAINS,
44 seed: None,
45 }
46 }
47
48 pub fn num_chains(mut self, n: usize) -> Self {
50 self.num_chains = n;
51 self
52 }
53
54 pub fn seed(mut self, seed: u64) -> Self {
56 self.seed = Some(seed);
57 self
58 }
59
60 pub fn homopolymer_ensemble(&self) -> Result<PolymerEnsemble, PolySimError> {
68 let stoch = self
69 .bigsmiles
70 .first_stochastic()
71 .ok_or(PolySimError::NoStochasticObject)?;
72
73 if stoch.repeat_units.len() != 1 {
74 return Err(PolySimError::RepeatUnitCount {
75 architecture: "homopolymer",
76 got: stoch.repeat_units.len(),
77 need_min: 1,
78 });
79 }
80
81 let smiles_raw = &stoch.repeat_units[0].smiles_raw;
82
83 let mw1 = average_mass(&PolymerChain::new(
86 build_linear_smiles(smiles_raw, 1)?,
87 1,
88 0.0,
89 ));
90 let mw2 = average_mass(&PolymerChain::new(
91 build_linear_smiles(smiles_raw, 2)?,
92 2,
93 0.0,
94 ));
95 let m0 = mw2 - mw1;
96 let m_end = mw1 - m0;
97
98 let target_mn_corrected = self.mn - m_end;
100
101 let mut rng = self.make_rng();
103 let lengths = self.distribution.sample(
104 target_mn_corrected,
105 self.pdi,
106 m0,
107 self.num_chains,
108 &mut *rng,
109 );
110
111 let chains: Result<Vec<PolymerChain>, PolySimError> = lengths
113 .into_iter()
114 .map(|n| {
115 let smiles = build_linear_smiles(smiles_raw, n)?;
116 let chain = PolymerChain::new(smiles, n, 0.0);
117 let mn = average_mass(&chain);
118 Ok(PolymerChain::new(chain.smiles, n, mn))
119 })
120 .collect();
121
122 PolymerEnsemble::new(chains?)
123 }
124
125 pub fn random_copolymer_ensemble(
129 &self,
130 fractions: &[f64],
131 ) -> Result<PolymerEnsemble, PolySimError> {
132 let sum: f64 = fractions.iter().sum();
133 if (sum - 1.0).abs() > 1e-6 {
134 return Err(PolySimError::InvalidFractions { sum });
135 }
136
137 let (units, m0_avg, m_end) = self.copolymer_calibration(fractions)?;
138
139 let target_mn_corrected = self.mn - m_end;
140 let mut rng = self.make_rng();
141 let lengths = self.distribution.sample(
142 target_mn_corrected,
143 self.pdi,
144 m0_avg,
145 self.num_chains,
146 &mut *rng,
147 );
148
149 let dist = WeightedIndex::new(fractions)
150 .map_err(|e| PolySimError::BuildStrategy(format!("invalid weight fractions: {e}")))?;
151
152 let chains: Result<Vec<PolymerChain>, PolySimError> = lengths
153 .into_iter()
154 .map(|n| {
155 let sequence: Vec<&str> = (0..n).map(|_| units[dist.sample(&mut *rng)]).collect();
156 let smiles = build_copolymer_smiles(&sequence)?;
157 let chain = PolymerChain::new(smiles, n, 0.0);
158 let mn = average_mass(&chain);
159 Ok(PolymerChain::new(chain.smiles, n, mn))
160 })
161 .collect();
162
163 PolymerEnsemble::new(chains?)
164 }
165
166 pub fn alternating_copolymer_ensemble(&self) -> Result<PolymerEnsemble, PolySimError> {
168 let stoch = self
169 .bigsmiles
170 .first_stochastic()
171 .ok_or(PolySimError::NoStochasticObject)?;
172
173 if stoch.repeat_units.len() < 2 {
174 return Err(PolySimError::RepeatUnitCount {
175 architecture: "alternating copolymer",
176 got: stoch.repeat_units.len(),
177 need_min: 2,
178 });
179 }
180
181 let units: Vec<&str> = stoch
182 .repeat_units
183 .iter()
184 .map(|f| f.smiles_raw.as_str())
185 .collect();
186 let k = units.len();
187
188 let cycle_smiles: Vec<&str> = units.to_vec();
190 let cycle1 = build_copolymer_smiles(&cycle_smiles)?;
191 let cycle2_seq: Vec<&str> = units.iter().chain(units.iter()).copied().collect();
192 let cycle2 = build_copolymer_smiles(&cycle2_seq)?;
193
194 let mw1 = average_mass(&PolymerChain::new(cycle1, k, 0.0));
195 let mw2 = average_mass(&PolymerChain::new(cycle2, k * 2, 0.0));
196 let m0_cycle = mw2 - mw1; let m_end = mw1 - m0_cycle;
198
199 let target_mn_corrected = self.mn - m_end;
200 let m0_per_unit = m0_cycle / k as f64;
202
203 let mut rng = self.make_rng();
204 let lengths = self.distribution.sample(
205 target_mn_corrected,
206 self.pdi,
207 m0_per_unit,
208 self.num_chains,
209 &mut *rng,
210 );
211
212 let chains: Result<Vec<PolymerChain>, PolySimError> = lengths
213 .into_iter()
214 .map(|n| {
215 let sequence: Vec<&str> = (0..n).map(|i| units[i % k]).collect();
216 let smiles = build_copolymer_smiles(&sequence)?;
217 let chain = PolymerChain::new(smiles, n, 0.0);
218 let mn = average_mass(&chain);
219 Ok(PolymerChain::new(chain.smiles, n, mn))
220 })
221 .collect();
222
223 PolymerEnsemble::new(chains?)
224 }
225
226 pub fn block_copolymer_ensemble(
231 &self,
232 block_ratios: &[f64],
233 ) -> Result<PolymerEnsemble, PolySimError> {
234 let sum: f64 = block_ratios.iter().sum();
235 if (sum - 1.0).abs() > 1e-6 {
236 return Err(PolySimError::InvalidBlockRatios { sum });
237 }
238
239 let stoch = self
240 .bigsmiles
241 .first_stochastic()
242 .ok_or(PolySimError::NoStochasticObject)?;
243
244 if stoch.repeat_units.len() < 2 {
245 return Err(PolySimError::RepeatUnitCount {
246 architecture: "block copolymer",
247 got: stoch.repeat_units.len(),
248 need_min: 2,
249 });
250 }
251
252 if block_ratios.len() != stoch.repeat_units.len() {
253 return Err(PolySimError::RepeatUnitCount {
254 architecture: "block copolymer (ratios count mismatch)",
255 got: block_ratios.len(),
256 need_min: stoch.repeat_units.len(),
257 });
258 }
259
260 let units: Vec<&str> = stoch
261 .repeat_units
262 .iter()
263 .map(|f| f.smiles_raw.as_str())
264 .collect();
265
266 let mut unit_masses = Vec::with_capacity(units.len());
268 let mut m_end_sum = 0.0;
269 for &unit in &units {
270 let mw1 = average_mass(&PolymerChain::new(build_linear_smiles(unit, 1)?, 1, 0.0));
271 let mw2 = average_mass(&PolymerChain::new(build_linear_smiles(unit, 2)?, 2, 0.0));
272 let m0 = mw2 - mw1;
273 unit_masses.push(m0);
274 m_end_sum += mw1 - m0;
275 }
276 let m_end = m_end_sum / units.len() as f64;
277 let m0_avg: f64 = block_ratios
278 .iter()
279 .zip(unit_masses.iter())
280 .map(|(r, m)| r * m)
281 .sum();
282
283 let target_mn_corrected = self.mn - m_end;
284 let mut rng = self.make_rng();
285 let lengths = self.distribution.sample(
286 target_mn_corrected,
287 self.pdi,
288 m0_avg,
289 self.num_chains,
290 &mut *rng,
291 );
292
293 let chains: Result<Vec<PolymerChain>, PolySimError> = lengths
294 .into_iter()
295 .map(|n| {
296 let block_lengths: Vec<usize> = distribute_n_by_ratios(n, block_ratios);
298 let sequence: Vec<&str> = block_lengths
299 .iter()
300 .zip(units.iter())
301 .flat_map(|(&len, &unit)| std::iter::repeat_n(unit, len))
302 .collect();
303 let smiles = build_copolymer_smiles(&sequence)?;
304 let total = sequence.len();
305 let chain = PolymerChain::new(smiles, total, 0.0);
306 let mn = average_mass(&chain);
307 Ok(PolymerChain::new(chain.smiles, total, mn))
308 })
309 .collect();
310
311 PolymerEnsemble::new(chains?)
312 }
313
314 pub fn gradient_copolymer_ensemble(
319 &self,
320 profile: &GradientProfile,
321 ) -> Result<PolymerEnsemble, PolySimError> {
322 let stoch = self
323 .bigsmiles
324 .first_stochastic()
325 .ok_or(PolySimError::NoStochasticObject)?;
326
327 if stoch.repeat_units.len() != 2 {
328 return Err(PolySimError::RepeatUnitCount {
329 architecture: "gradient copolymer",
330 got: stoch.repeat_units.len(),
331 need_min: 2,
332 });
333 }
334
335 let units: Vec<&str> = stoch
336 .repeat_units
337 .iter()
338 .map(|f| f.smiles_raw.as_str())
339 .collect();
340
341 let mut unit_masses = Vec::with_capacity(2);
343 let mut m_end_sum = 0.0;
344 for &unit in &units {
345 let mw1 = average_mass(&PolymerChain::new(build_linear_smiles(unit, 1)?, 1, 0.0));
346 let mw2 = average_mass(&PolymerChain::new(build_linear_smiles(unit, 2)?, 2, 0.0));
347 let m0 = mw2 - mw1;
348 unit_masses.push(m0);
349 m_end_sum += mw1 - m0;
350 }
351 let m_end = m_end_sum / 2.0;
352
353 let n_sample = 100usize;
356 let avg_f_a: f64 = (0..n_sample)
357 .map(|i| gradient_fraction(profile, i, n_sample))
358 .sum::<f64>()
359 / n_sample as f64;
360 let m0_avg = avg_f_a * unit_masses[0] + (1.0 - avg_f_a) * unit_masses[1];
361
362 let target_mn_corrected = self.mn - m_end;
363 let mut rng = self.make_rng();
364 let lengths = self.distribution.sample(
365 target_mn_corrected,
366 self.pdi,
367 m0_avg,
368 self.num_chains,
369 &mut *rng,
370 );
371
372 let chains: Result<Vec<PolymerChain>, PolySimError> = lengths
373 .into_iter()
374 .map(|n| {
375 let sequence: Vec<&str> = (0..n)
376 .map(|i| {
377 let f_a = gradient_fraction(profile, i, n);
378 let pick: f64 = rng.random();
379 if pick < f_a {
380 units[0]
381 } else {
382 units[1]
383 }
384 })
385 .collect();
386 let smiles = build_copolymer_smiles(&sequence)?;
387 let chain = PolymerChain::new(smiles, n, 0.0);
388 let mn = average_mass(&chain);
389 Ok(PolymerChain::new(chain.smiles, n, mn))
390 })
391 .collect();
392
393 PolymerEnsemble::new(chains?)
394 }
395
396 fn make_rng(&self) -> Box<dyn rand::RngCore> {
399 match self.seed {
400 Some(s) => Box::new(StdRng::seed_from_u64(s)),
401 None => Box::new(rand::rng()),
402 }
403 }
404
405 fn copolymer_calibration(
407 &self,
408 fractions: &[f64],
409 ) -> Result<(Vec<&str>, f64, f64), PolySimError> {
410 let stoch = self
411 .bigsmiles
412 .first_stochastic()
413 .ok_or(PolySimError::NoStochasticObject)?;
414
415 if stoch.repeat_units.len() < 2 {
416 return Err(PolySimError::RepeatUnitCount {
417 architecture: "random copolymer",
418 got: stoch.repeat_units.len(),
419 need_min: 2,
420 });
421 }
422
423 if fractions.len() != stoch.repeat_units.len() {
424 return Err(PolySimError::RepeatUnitCount {
425 architecture: "random copolymer (fractions count mismatch)",
426 got: fractions.len(),
427 need_min: stoch.repeat_units.len(),
428 });
429 }
430
431 let units: Vec<&str> = stoch
432 .repeat_units
433 .iter()
434 .map(|f| f.smiles_raw.as_str())
435 .collect();
436
437 let mut unit_masses = Vec::with_capacity(units.len());
438 let mut m_end_sum = 0.0;
439 for &unit in &units {
440 let mw1 = average_mass(&PolymerChain::new(build_linear_smiles(unit, 1)?, 1, 0.0));
441 let mw2 = average_mass(&PolymerChain::new(build_linear_smiles(unit, 2)?, 2, 0.0));
442 let m0 = mw2 - mw1;
443 unit_masses.push(m0);
444 m_end_sum += mw1 - m0;
445 }
446
447 let m_end = m_end_sum / units.len() as f64;
448 let m0_avg: f64 = fractions
449 .iter()
450 .zip(unit_masses.iter())
451 .map(|(f, m)| f * m)
452 .sum();
453
454 Ok((units, m0_avg, m_end))
455 }
456}
457
458fn distribute_n_by_ratios(n: usize, ratios: &[f64]) -> Vec<usize> {
461 let n_f = n as f64;
462 let raw: Vec<f64> = ratios.iter().map(|r| r * n_f).collect();
463 let mut floors: Vec<usize> = raw.iter().map(|&x| x.floor() as usize).collect();
464 let mut remainder: Vec<(usize, f64)> = raw
465 .iter()
466 .enumerate()
467 .map(|(i, &x)| (i, x - x.floor()))
468 .collect();
469
470 let allocated: usize = floors.iter().sum();
471 let mut deficit = n.saturating_sub(allocated);
472
473 remainder.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
475 for (i, _) in &remainder {
476 if deficit == 0 {
477 break;
478 }
479 floors[*i] += 1;
480 deficit -= 1;
481 }
482
483 floors
484}