Skip to main content

polysim_core/builder/
linear.rs

1use bigsmiles::{BigSmiles, BigSmilesSegment, StochasticObject};
2
3use crate::{
4    error::PolySimError,
5    polymer::PolymerChain,
6    properties::molecular_weight::{average_mass, monoisotopic_mass},
7};
8
9use super::strategy::BuildStrategy;
10
11/// Builder for linear polymer architectures.
12///
13/// Supports homopolymers, random/alternating/block copolymers — all derived
14/// from a single BigSMILES string.
15pub struct LinearBuilder {
16    bigsmiles: BigSmiles,
17    strategy: BuildStrategy,
18}
19
20impl LinearBuilder {
21    /// Creates a new builder from a parsed BigSMILES and a build strategy.
22    pub fn new(bigsmiles: BigSmiles, strategy: BuildStrategy) -> Self {
23        Self {
24            bigsmiles,
25            strategy,
26        }
27    }
28
29    /// Generates a linear homopolymer (single repeat unit, repeated *n* times).
30    ///
31    /// # Errors
32    ///
33    /// - [`PolySimError::NoStochasticObject`] if the BigSMILES contains no
34    ///   stochastic object (`{...}`).
35    /// - [`PolySimError::RepeatUnitCount`] if the stochastic object contains ≠ 1
36    ///   repeat unit.
37    /// - [`PolySimError::BuildStrategy`] if the strategy yields *n* = 0.
38    ///
39    /// # Example
40    ///
41    /// ```rust
42    /// use polysim_core::{parse, builder::{linear::LinearBuilder, BuildStrategy}};
43    ///
44    /// let bs = parse("{[]CC(C)[]}").unwrap(); // polypropylene
45    /// let chain = LinearBuilder::new(bs, BuildStrategy::ByRepeatCount(3))
46    ///     .homopolymer()
47    ///     .unwrap();
48    ///
49    /// assert_eq!(chain.smiles, "CC(C)CC(C)CC(C)");
50    /// assert_eq!(chain.repeat_count, 3);
51    /// ```
52    pub fn homopolymer(&self) -> Result<PolymerChain, PolySimError> {
53        let stoch =
54            find_first_stochastic(&self.bigsmiles).ok_or(PolySimError::NoStochasticObject)?;
55
56        if stoch.repeat_units.len() != 1 {
57            return Err(PolySimError::RepeatUnitCount {
58                architecture: "homopolymer",
59                got: stoch.repeat_units.len(),
60                need: 1,
61            });
62        }
63
64        let fragment = &stoch.repeat_units[0];
65        let n = self.resolve_n(&fragment.smiles_raw)?;
66
67        if n == 0 {
68            return Err(PolySimError::BuildStrategy(
69                "repeat count must be ≥ 1".to_string(),
70            ));
71        }
72
73        let smiles = build_linear_smiles(&fragment.smiles_raw, n)?;
74        let chain = PolymerChain::new(smiles, n, 0.0);
75        let mn = average_mass(&chain);
76        Ok(PolymerChain::new(chain.smiles, n, mn))
77    }
78
79    /// Generates a random (statistical) copolymer.
80    ///
81    /// `fractions` — weight fraction of each repeat unit (must sum to 1.0).
82    /// The BigSMILES must contain exactly `fractions.len()` repeat units.
83    pub fn random_copolymer(&self, fractions: &[f64]) -> Result<PolymerChain, PolySimError> {
84        let sum: f64 = fractions.iter().sum();
85        if (sum - 1.0).abs() > 1e-6 {
86            return Err(PolySimError::InvalidFractions { sum });
87        }
88        todo!("implement random copolymer generation")
89    }
90
91    /// Generates an alternating copolymer (–A–B–A–B–).
92    ///
93    /// The BigSMILES must contain exactly 2 repeat units.
94    pub fn alternating_copolymer(&self) -> Result<PolymerChain, PolySimError> {
95        todo!("implement alternating copolymer generation")
96    }
97
98    /// Generates a block copolymer (–AAAA–BBBB–).
99    ///
100    /// `block_lengths` — number of repeat units per block, in order.
101    /// The BigSMILES must contain exactly `block_lengths.len()` repeat units.
102    pub fn block_copolymer(&self, _block_lengths: &[usize]) -> Result<PolymerChain, PolySimError> {
103        todo!("implement block copolymer generation")
104    }
105
106    fn resolve_n(&self, smiles_raw: &str) -> Result<usize, PolySimError> {
107        match &self.strategy {
108            BuildStrategy::ByRepeatCount(n) => Ok(*n),
109            BuildStrategy::ByTargetMn(target) => {
110                resolve_n_by_mass(smiles_raw, *target, average_mass)
111            }
112            BuildStrategy::ByExactMass(target) => {
113                resolve_n_by_mass(smiles_raw, *target, monoisotopic_mass)
114            }
115        }
116    }
117}
118
119// --- internal helpers -------------------------------------------------------
120
121fn find_first_stochastic(bs: &BigSmiles) -> Option<&StochasticObject> {
122    bs.segments.iter().find_map(|seg| match seg {
123        BigSmilesSegment::Stochastic(obj) => Some(obj),
124        _ => None,
125    })
126}
127
128/// Déduit le nombre de répétitions à partir d'une masse cible.
129///
130/// Construit deux chaînes d'essai (n=1 et n=2) pour déterminer la masse par
131/// unité et la masse des groupements terminaux, puis résout par extrapolation
132/// linéaire : MW(n) = n × mw_per_unit + mw_end.
133///
134/// `mass_fn` peut être [`average_mass`] (pour [`BuildStrategy::ByTargetMn`]) ou
135/// [`monoisotopic_mass`] (pour [`BuildStrategy::ByExactMass`]).
136fn resolve_n_by_mass(
137    smiles_raw: &str,
138    target: f64,
139    mass_fn: fn(&PolymerChain) -> f64,
140) -> Result<usize, PolySimError> {
141    let mw1 = mass_fn(&PolymerChain::new(
142        build_linear_smiles(smiles_raw, 1)?,
143        1,
144        0.0,
145    ));
146    let mw2 = mass_fn(&PolymerChain::new(
147        build_linear_smiles(smiles_raw, 2)?,
148        2,
149        0.0,
150    ));
151    let mw_per_unit = mw2 - mw1;
152    let mw_end = mw1 - mw_per_unit;
153    let n = ((target - mw_end) / mw_per_unit).round().max(1.0) as usize;
154    Ok(n)
155}
156
157/// Builds the SMILES string for a linear chain of `n` repeat units.
158///
159/// Ring closure numbers are renumbered for each copy. Because each copy is
160/// self-contained (every ring opened within a copy is also closed within that
161/// copy), the offsets cycle over 1..=99, allowing chains of arbitrary length.
162///
163/// # Errors
164///
165/// Returns [`PolySimError::RingNumberOverflow`] if the repeat unit itself uses
166/// more than 99 distinct ring-closure numbers (already invalid SMILES).
167fn build_linear_smiles(smiles_raw: &str, n: usize) -> Result<String, PolySimError> {
168    let max_ring = max_ring_number(smiles_raw);
169
170    // Pathological case: the repeat unit alone already overflows SMILES ring numbers.
171    if max_ring > 99 {
172        return Err(PolySimError::RingNumberOverflow {
173            max_ring,
174            max_supported: 99,
175        });
176    }
177
178    // Number of distinct copies before ring numbers must be recycled.
179    // Since each copy closes its own rings before the next copy starts,
180    // the same numbers can be safely reused.
181    let cycle_length: usize = if max_ring == 0 {
182        usize::MAX // no ring closures — no cycling needed
183    } else {
184        99 / max_ring as usize
185    };
186
187    let mut result = String::with_capacity(smiles_raw.len() * n);
188    for i in 0..n {
189        let slot = i % cycle_length;
190        let offset = slot as u32 * max_ring;
191        result.push_str(&renumber_ring_closures(smiles_raw, offset));
192    }
193    Ok(result)
194}
195
196/// Returns the highest ring-closure number used in a SMILES string.
197///
198/// Digits inside `[...]` (isotopes, hydrogen counts, charges, atom classes)
199/// are ignored.
200fn max_ring_number(smiles: &str) -> u32 {
201    let mut max = 0u32;
202    let mut in_bracket = false;
203    let mut chars = smiles.chars().peekable();
204
205    while let Some(c) = chars.next() {
206        match c {
207            '[' => in_bracket = true,
208            ']' => in_bracket = false,
209            _ if in_bracket => {}
210            '%' => {
211                // Two-digit notation: %dd
212                let d1 = chars.next().unwrap_or('0');
213                let d2 = chars.next().unwrap_or('0');
214                if d1.is_ascii_digit() && d2.is_ascii_digit() {
215                    let n = (d1 as u32 - '0' as u32) * 10 + (d2 as u32 - '0' as u32);
216                    max = max.max(n);
217                }
218            }
219            c if c.is_ascii_digit() => {
220                max = max.max(c as u32 - '0' as u32);
221            }
222            _ => {}
223        }
224    }
225    max
226}
227
228/// Returns a copy of `smiles` with every ring-closure number incremented by `offset`.
229///
230/// When `offset` is 0 the string is returned unchanged.
231/// Digits inside `[...]` are never modified.
232fn renumber_ring_closures(smiles: &str, offset: u32) -> String {
233    if offset == 0 {
234        return smiles.to_string();
235    }
236    let mut result = String::with_capacity(smiles.len() + 4);
237    let mut in_bracket = false;
238    let mut chars = smiles.chars().peekable();
239
240    while let Some(c) = chars.next() {
241        match c {
242            '[' => {
243                in_bracket = true;
244                result.push(c);
245            }
246            ']' => {
247                in_bracket = false;
248                result.push(c);
249            }
250            _ if in_bracket => result.push(c),
251            '%' => {
252                let d1 = chars.next().unwrap_or('0');
253                let d2 = chars.next().unwrap_or('0');
254                if d1.is_ascii_digit() && d2.is_ascii_digit() {
255                    let n = (d1 as u32 - '0' as u32) * 10 + (d2 as u32 - '0' as u32);
256                    let new_n = n + offset;
257                    result.push('%');
258                    result.push_str(&format!("{new_n:02}"));
259                } else {
260                    result.push('%');
261                    result.push(d1);
262                    result.push(d2);
263                }
264            }
265            c if c.is_ascii_digit() => {
266                let n = c as u32 - '0' as u32;
267                let new_n = n + offset;
268                if new_n <= 9 {
269                    result.push(char::from_digit(new_n, 10).unwrap());
270                } else {
271                    result.push('%');
272                    result.push_str(&format!("{new_n:02}"));
273                }
274            }
275            _ => result.push(c),
276        }
277    }
278    result
279}