Skip to main content

polysim_core/builder/
ensemble.rs

1use bigsmiles::BigSmiles;
2use rand::distr::weighted::WeightedIndex;
3use rand::prelude::*;
4use rand::rngs::StdRng;
5
6use crate::{
7    distribution::ChainLengthDistribution,
8    error::PolySimError,
9    polymer::{PolymerChain, PolymerEnsemble},
10    properties::molecular_weight::average_mass,
11};
12
13use super::linear::{
14    build_copolymer_smiles, build_linear_smiles, gradient_fraction, GradientProfile,
15};
16
17/// Default number of chains in an ensemble.
18pub const DEFAULT_NUM_CHAINS: usize = 100;
19
20/// Builder that generates a polydisperse ensemble of polymer chains.
21pub struct EnsembleBuilder<D: ChainLengthDistribution> {
22    bigsmiles: BigSmiles,
23    distribution: D,
24    mn: f64,
25    pdi: f64,
26    num_chains: usize,
27    seed: Option<u64>,
28}
29
30impl<D: ChainLengthDistribution> EnsembleBuilder<D> {
31    /// Creates a new ensemble builder.
32    ///
33    /// - `bigsmiles` — parsed BigSMILES describing the polymer.
34    /// - `distribution` — chain length distribution model.
35    /// - `mn` — target number-average molecular weight (g/mol).
36    /// - `pdi` — target polydispersity index (Mw/Mn).
37    pub fn new(bigsmiles: BigSmiles, distribution: D, mn: f64, pdi: f64) -> Self {
38        Self {
39            bigsmiles,
40            distribution,
41            mn,
42            pdi,
43            num_chains: DEFAULT_NUM_CHAINS,
44            seed: None,
45        }
46    }
47
48    /// Override the number of chains to generate (default: 100).
49    pub fn num_chains(mut self, n: usize) -> Self {
50        self.num_chains = n;
51        self
52    }
53
54    /// Set a seed for reproducible sampling.
55    pub fn seed(mut self, seed: u64) -> Self {
56        self.seed = Some(seed);
57        self
58    }
59
60    /// Build a polydisperse ensemble of homopolymer chains.
61    ///
62    /// # Errors
63    ///
64    /// - [`PolySimError::NoStochasticObject`] if no `{...}` block found.
65    /// - [`PolySimError::RepeatUnitCount`] if ≠ 1 repeat unit.
66    /// - [`PolySimError::EmptyEnsemble`] if `num_chains` is 0.
67    pub fn homopolymer_ensemble(&self) -> Result<PolymerEnsemble, PolySimError> {
68        let stoch = self
69            .bigsmiles
70            .first_stochastic()
71            .ok_or(PolySimError::NoStochasticObject)?;
72
73        if stoch.repeat_units.len() != 1 {
74            return Err(PolySimError::RepeatUnitCount {
75                architecture: "homopolymer",
76                got: stoch.repeat_units.len(),
77                need_min: 1,
78            });
79        }
80
81        let smiles_raw = &stoch.repeat_units[0].smiles_raw;
82
83        // Two-point calibration to separate repeat-unit mass (m0) from
84        // end-group mass (m_end): MW(n) = n × m0 + m_end.
85        let mw1 = average_mass(&PolymerChain::new(
86            build_linear_smiles(smiles_raw, 1)?,
87            1,
88            0.0,
89        ));
90        let mw2 = average_mass(&PolymerChain::new(
91            build_linear_smiles(smiles_raw, 2)?,
92            2,
93            0.0,
94        ));
95        let m0 = mw2 - mw1;
96        let m_end = mw1 - m0;
97
98        // Adjust target Mn to account for end groups: Mn_target = Xn × m0 + m_end
99        let target_mn_corrected = self.mn - m_end;
100
101        // Sample chain lengths from distribution.
102        let mut rng = self.make_rng();
103        let lengths = self.distribution.sample(
104            target_mn_corrected,
105            self.pdi,
106            m0,
107            self.num_chains,
108            &mut *rng,
109        );
110
111        // Build each chain.
112        let chains: Result<Vec<PolymerChain>, PolySimError> = lengths
113            .into_iter()
114            .map(|n| {
115                let smiles = build_linear_smiles(smiles_raw, n)?;
116                let chain = PolymerChain::new(smiles, n, 0.0);
117                let mn = average_mass(&chain);
118                Ok(PolymerChain::new(chain.smiles, n, mn))
119            })
120            .collect();
121
122        PolymerEnsemble::new(chains?)
123    }
124
125    /// Build a polydisperse ensemble of random copolymer chains.
126    ///
127    /// `fractions` — weight fraction of each repeat unit (must sum to 1.0).
128    pub fn random_copolymer_ensemble(
129        &self,
130        fractions: &[f64],
131    ) -> Result<PolymerEnsemble, PolySimError> {
132        let sum: f64 = fractions.iter().sum();
133        if (sum - 1.0).abs() > 1e-6 {
134            return Err(PolySimError::InvalidFractions { sum });
135        }
136
137        let (units, m0_avg, m_end) = self.copolymer_calibration(fractions)?;
138
139        let target_mn_corrected = self.mn - m_end;
140        let mut rng = self.make_rng();
141        let lengths = self.distribution.sample(
142            target_mn_corrected,
143            self.pdi,
144            m0_avg,
145            self.num_chains,
146            &mut *rng,
147        );
148
149        let dist = WeightedIndex::new(fractions)
150            .map_err(|e| PolySimError::BuildStrategy(format!("invalid weight fractions: {e}")))?;
151
152        let chains: Result<Vec<PolymerChain>, PolySimError> = lengths
153            .into_iter()
154            .map(|n| {
155                let sequence: Vec<&str> = (0..n).map(|_| units[dist.sample(&mut *rng)]).collect();
156                let smiles = build_copolymer_smiles(&sequence)?;
157                let chain = PolymerChain::new(smiles, n, 0.0);
158                let mn = average_mass(&chain);
159                Ok(PolymerChain::new(chain.smiles, n, mn))
160            })
161            .collect();
162
163        PolymerEnsemble::new(chains?)
164    }
165
166    /// Build a polydisperse ensemble of alternating copolymer chains.
167    pub fn alternating_copolymer_ensemble(&self) -> Result<PolymerEnsemble, PolySimError> {
168        let stoch = self
169            .bigsmiles
170            .first_stochastic()
171            .ok_or(PolySimError::NoStochasticObject)?;
172
173        if stoch.repeat_units.len() < 2 {
174            return Err(PolySimError::RepeatUnitCount {
175                architecture: "alternating copolymer",
176                got: stoch.repeat_units.len(),
177                need_min: 2,
178            });
179        }
180
181        let units: Vec<&str> = stoch
182            .repeat_units
183            .iter()
184            .map(|f| f.smiles_raw.as_str())
185            .collect();
186        let k = units.len();
187
188        // Calibrate using one full cycle as the "composite repeat unit".
189        let cycle_smiles: Vec<&str> = units.to_vec();
190        let cycle1 = build_copolymer_smiles(&cycle_smiles)?;
191        let cycle2_seq: Vec<&str> = units.iter().chain(units.iter()).copied().collect();
192        let cycle2 = build_copolymer_smiles(&cycle2_seq)?;
193
194        let mw1 = average_mass(&PolymerChain::new(cycle1, k, 0.0));
195        let mw2 = average_mass(&PolymerChain::new(cycle2, k * 2, 0.0));
196        let m0_cycle = mw2 - mw1; // mass per full cycle
197        let m_end = mw1 - m0_cycle;
198
199        let target_mn_corrected = self.mn - m_end;
200        // m0 for distribution = mass per individual unit (average)
201        let m0_per_unit = m0_cycle / k as f64;
202
203        let mut rng = self.make_rng();
204        let lengths = self.distribution.sample(
205            target_mn_corrected,
206            self.pdi,
207            m0_per_unit,
208            self.num_chains,
209            &mut *rng,
210        );
211
212        let chains: Result<Vec<PolymerChain>, PolySimError> = lengths
213            .into_iter()
214            .map(|n| {
215                let sequence: Vec<&str> = (0..n).map(|i| units[i % k]).collect();
216                let smiles = build_copolymer_smiles(&sequence)?;
217                let chain = PolymerChain::new(smiles, n, 0.0);
218                let mn = average_mass(&chain);
219                Ok(PolymerChain::new(chain.smiles, n, mn))
220            })
221            .collect();
222
223        PolymerEnsemble::new(chains?)
224    }
225
226    /// Build a polydisperse ensemble of block copolymer chains.
227    ///
228    /// `block_ratios` — relative proportion of each block (must sum to 1.0).
229    /// Each chain has a different total n, distributed according to the ratios.
230    pub fn block_copolymer_ensemble(
231        &self,
232        block_ratios: &[f64],
233    ) -> Result<PolymerEnsemble, PolySimError> {
234        let sum: f64 = block_ratios.iter().sum();
235        if (sum - 1.0).abs() > 1e-6 {
236            return Err(PolySimError::InvalidBlockRatios { sum });
237        }
238
239        let stoch = self
240            .bigsmiles
241            .first_stochastic()
242            .ok_or(PolySimError::NoStochasticObject)?;
243
244        if stoch.repeat_units.len() < 2 {
245            return Err(PolySimError::RepeatUnitCount {
246                architecture: "block copolymer",
247                got: stoch.repeat_units.len(),
248                need_min: 2,
249            });
250        }
251
252        if block_ratios.len() != stoch.repeat_units.len() {
253            return Err(PolySimError::RepeatUnitCount {
254                architecture: "block copolymer (ratios count mismatch)",
255                got: block_ratios.len(),
256                need_min: stoch.repeat_units.len(),
257            });
258        }
259
260        let units: Vec<&str> = stoch
261            .repeat_units
262            .iter()
263            .map(|f| f.smiles_raw.as_str())
264            .collect();
265
266        // Calibrate: weighted average mass per unit
267        let mut unit_masses = Vec::with_capacity(units.len());
268        let mut m_end_sum = 0.0;
269        for &unit in &units {
270            let mw1 = average_mass(&PolymerChain::new(build_linear_smiles(unit, 1)?, 1, 0.0));
271            let mw2 = average_mass(&PolymerChain::new(build_linear_smiles(unit, 2)?, 2, 0.0));
272            let m0 = mw2 - mw1;
273            unit_masses.push(m0);
274            m_end_sum += mw1 - m0;
275        }
276        let m_end = m_end_sum / units.len() as f64;
277        let m0_avg: f64 = block_ratios
278            .iter()
279            .zip(unit_masses.iter())
280            .map(|(r, m)| r * m)
281            .sum();
282
283        let target_mn_corrected = self.mn - m_end;
284        let mut rng = self.make_rng();
285        let lengths = self.distribution.sample(
286            target_mn_corrected,
287            self.pdi,
288            m0_avg,
289            self.num_chains,
290            &mut *rng,
291        );
292
293        let chains: Result<Vec<PolymerChain>, PolySimError> = lengths
294            .into_iter()
295            .map(|n| {
296                // Distribute n across blocks proportionally to ratios
297                let block_lengths: Vec<usize> = distribute_n_by_ratios(n, block_ratios);
298                let sequence: Vec<&str> = block_lengths
299                    .iter()
300                    .zip(units.iter())
301                    .flat_map(|(&len, &unit)| std::iter::repeat_n(unit, len))
302                    .collect();
303                let smiles = build_copolymer_smiles(&sequence)?;
304                let total = sequence.len();
305                let chain = PolymerChain::new(smiles, total, 0.0);
306                let mn = average_mass(&chain);
307                Ok(PolymerChain::new(chain.smiles, total, mn))
308            })
309            .collect();
310
311        PolymerEnsemble::new(chains?)
312    }
313
314    /// Build a polydisperse ensemble of gradient copolymer chains.
315    ///
316    /// Each chain has composition that varies from `f_start` to `f_end`
317    /// according to `profile`. The BigSMILES must contain exactly 2 repeat units.
318    pub fn gradient_copolymer_ensemble(
319        &self,
320        profile: &GradientProfile,
321    ) -> Result<PolymerEnsemble, PolySimError> {
322        let stoch = self
323            .bigsmiles
324            .first_stochastic()
325            .ok_or(PolySimError::NoStochasticObject)?;
326
327        if stoch.repeat_units.len() != 2 {
328            return Err(PolySimError::RepeatUnitCount {
329                architecture: "gradient copolymer",
330                got: stoch.repeat_units.len(),
331                need_min: 2,
332            });
333        }
334
335        let units: Vec<&str> = stoch
336            .repeat_units
337            .iter()
338            .map(|f| f.smiles_raw.as_str())
339            .collect();
340
341        // Calibrate mass per unit for each type
342        let mut unit_masses = Vec::with_capacity(2);
343        let mut m_end_sum = 0.0;
344        for &unit in &units {
345            let mw1 = average_mass(&PolymerChain::new(build_linear_smiles(unit, 1)?, 1, 0.0));
346            let mw2 = average_mass(&PolymerChain::new(build_linear_smiles(unit, 2)?, 2, 0.0));
347            let m0 = mw2 - mw1;
348            unit_masses.push(m0);
349            m_end_sum += mw1 - m0;
350        }
351        let m_end = m_end_sum / 2.0;
352
353        // Average mass per unit using the mean gradient fraction across the chain
354        // For a chain of variable length, approximate with 100-point average.
355        let n_sample = 100usize;
356        let avg_f_a: f64 = (0..n_sample)
357            .map(|i| gradient_fraction(profile, i, n_sample))
358            .sum::<f64>()
359            / n_sample as f64;
360        let m0_avg = avg_f_a * unit_masses[0] + (1.0 - avg_f_a) * unit_masses[1];
361
362        let target_mn_corrected = self.mn - m_end;
363        let mut rng = self.make_rng();
364        let lengths = self.distribution.sample(
365            target_mn_corrected,
366            self.pdi,
367            m0_avg,
368            self.num_chains,
369            &mut *rng,
370        );
371
372        let chains: Result<Vec<PolymerChain>, PolySimError> = lengths
373            .into_iter()
374            .map(|n| {
375                let sequence: Vec<&str> = (0..n)
376                    .map(|i| {
377                        let f_a = gradient_fraction(profile, i, n);
378                        let pick: f64 = rng.random();
379                        if pick < f_a {
380                            units[0]
381                        } else {
382                            units[1]
383                        }
384                    })
385                    .collect();
386                let smiles = build_copolymer_smiles(&sequence)?;
387                let chain = PolymerChain::new(smiles, n, 0.0);
388                let mn = average_mass(&chain);
389                Ok(PolymerChain::new(chain.smiles, n, mn))
390            })
391            .collect();
392
393        PolymerEnsemble::new(chains?)
394    }
395
396    // --- private helpers ---
397
398    fn make_rng(&self) -> Box<dyn rand::RngCore> {
399        match self.seed {
400            Some(s) => Box::new(StdRng::seed_from_u64(s)),
401            None => Box::new(rand::rng()),
402        }
403    }
404
405    /// Extracts units and computes weighted average mass for copolymer calibration.
406    fn copolymer_calibration(
407        &self,
408        fractions: &[f64],
409    ) -> Result<(Vec<&str>, f64, f64), PolySimError> {
410        let stoch = self
411            .bigsmiles
412            .first_stochastic()
413            .ok_or(PolySimError::NoStochasticObject)?;
414
415        if stoch.repeat_units.len() < 2 {
416            return Err(PolySimError::RepeatUnitCount {
417                architecture: "random copolymer",
418                got: stoch.repeat_units.len(),
419                need_min: 2,
420            });
421        }
422
423        if fractions.len() != stoch.repeat_units.len() {
424            return Err(PolySimError::RepeatUnitCount {
425                architecture: "random copolymer (fractions count mismatch)",
426                got: fractions.len(),
427                need_min: stoch.repeat_units.len(),
428            });
429        }
430
431        let units: Vec<&str> = stoch
432            .repeat_units
433            .iter()
434            .map(|f| f.smiles_raw.as_str())
435            .collect();
436
437        let mut unit_masses = Vec::with_capacity(units.len());
438        let mut m_end_sum = 0.0;
439        for &unit in &units {
440            let mw1 = average_mass(&PolymerChain::new(build_linear_smiles(unit, 1)?, 1, 0.0));
441            let mw2 = average_mass(&PolymerChain::new(build_linear_smiles(unit, 2)?, 2, 0.0));
442            let m0 = mw2 - mw1;
443            unit_masses.push(m0);
444            m_end_sum += mw1 - m0;
445        }
446
447        let m_end = m_end_sum / units.len() as f64;
448        let m0_avg: f64 = fractions
449            .iter()
450            .zip(unit_masses.iter())
451            .map(|(f, m)| f * m)
452            .sum();
453
454        Ok((units, m0_avg, m_end))
455    }
456}
457
458/// Distributes total n across blocks proportionally to ratios.
459/// Ensures sum of block lengths equals n (uses largest-remainder method).
460fn distribute_n_by_ratios(n: usize, ratios: &[f64]) -> Vec<usize> {
461    let n_f = n as f64;
462    let raw: Vec<f64> = ratios.iter().map(|r| r * n_f).collect();
463    let mut floors: Vec<usize> = raw.iter().map(|&x| x.floor() as usize).collect();
464    let mut remainder: Vec<(usize, f64)> = raw
465        .iter()
466        .enumerate()
467        .map(|(i, &x)| (i, x - x.floor()))
468        .collect();
469
470    let allocated: usize = floors.iter().sum();
471    let mut deficit = n.saturating_sub(allocated);
472
473    // Sort by fractional part descending, allocate remaining units
474    remainder.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
475    for (i, _) in &remainder {
476        if deficit == 0 {
477            break;
478        }
479        floors[*i] += 1;
480        deficit -= 1;
481    }
482
483    floors
484}