Skip to main content

polysim_core/builder/
linear.rs

1use bigsmiles::{BigSmiles, BigSmilesSegment};
2use rand::distr::weighted::WeightedIndex;
3use rand::prelude::*;
4use rand::rngs::StdRng;
5
6use crate::{
7    error::PolySimError,
8    polymer::{Architecture, MonomerUnit, PolymerChain},
9    properties::molecular_weight::{average_mass, monoisotopic_mass},
10};
11
12use super::strategy::BuildStrategy;
13
14/// Gradient composition profile for gradient copolymers.
15#[derive(Debug, Clone)]
16pub enum GradientProfile {
17    /// Linear gradient: f_a(i) = f_start + (f_end - f_start) * i / (n-1)
18    Linear { f_start: f64, f_end: f64 },
19    /// Sigmoid gradient: f_a(i) = f_start + (f_end - f_start) * sigmoid((i/n - 0.5) * 10)
20    Sigmoid { f_start: f64, f_end: f64 },
21}
22
23/// Builder for linear polymer architectures.
24///
25/// Supports homopolymers, random/alternating/block copolymers — all derived
26/// from a single BigSMILES string.
27pub struct LinearBuilder {
28    bigsmiles: BigSmiles,
29    strategy: BuildStrategy,
30    seed: Option<u64>,
31}
32
33impl LinearBuilder {
34    /// Creates a new builder from a parsed BigSMILES and a build strategy.
35    pub fn new(bigsmiles: BigSmiles, strategy: BuildStrategy) -> Self {
36        Self {
37            bigsmiles,
38            strategy,
39            seed: None,
40        }
41    }
42
43    /// Set a random seed for reproducible copolymer generation.
44    pub fn seed(mut self, seed: u64) -> Self {
45        self.seed = Some(seed);
46        self
47    }
48
49    /// Generates a linear homopolymer (single repeat unit, repeated *n* times).
50    ///
51    /// # Errors
52    ///
53    /// - [`PolySimError::NoStochasticObject`] if the BigSMILES contains no
54    ///   stochastic object (`{...}`).
55    /// - [`PolySimError::RepeatUnitCount`] if the stochastic object contains ≠ 1
56    ///   repeat unit.
57    /// - [`PolySimError::BuildStrategy`] if the strategy yields *n* = 0.
58    ///
59    /// # Example
60    ///
61    /// ```rust
62    /// use polysim_core::{parse, builder::{linear::LinearBuilder, BuildStrategy}};
63    ///
64    /// let bs = parse("{[]CC(C)[]}").unwrap(); // polypropylene
65    /// let chain = LinearBuilder::new(bs, BuildStrategy::ByRepeatCount(3))
66    ///     .homopolymer()
67    ///     .unwrap();
68    ///
69    /// assert_eq!(chain.smiles, "CC(C)CC(C)CC(C)");
70    /// assert_eq!(chain.repeat_count, 3);
71    /// ```
72    pub fn homopolymer(&self) -> Result<PolymerChain, PolySimError> {
73        let stoch = self
74            .bigsmiles
75            .first_stochastic()
76            .ok_or(PolySimError::NoStochasticObject)?;
77
78        if stoch.repeat_units.len() != 1 {
79            return Err(PolySimError::RepeatUnitCount {
80                architecture: "homopolymer",
81                got: stoch.repeat_units.len(),
82                need_min: 1,
83            });
84        }
85
86        let fragment = &stoch.repeat_units[0];
87        let n = self.resolve_n(&fragment.smiles_raw)?;
88
89        if n == 0 {
90            return Err(PolySimError::BuildStrategy(
91                "repeat count must be ≥ 1".to_string(),
92            ));
93        }
94
95        let body = build_linear_smiles(&fragment.smiles_raw, n)?;
96        let smiles = self.with_end_groups(&body);
97        let chain = PolymerChain::new(smiles, n, 0.0);
98        let mn = average_mass(&chain);
99        Ok(PolymerChain::new(chain.smiles, n, mn))
100    }
101
102    /// Generates a random (statistical) copolymer.
103    ///
104    /// `fractions` — weight fraction of each repeat unit (must sum to 1.0).
105    /// The BigSMILES must contain exactly `fractions.len()` repeat units.
106    ///
107    /// Uses an optional seed (set via [`Self::seed`]) for reproducibility.
108    pub fn random_copolymer(&self, fractions: &[f64]) -> Result<PolymerChain, PolySimError> {
109        let sum: f64 = fractions.iter().sum();
110        if (sum - 1.0).abs() > 1e-6 {
111            return Err(PolySimError::InvalidFractions { sum });
112        }
113
114        let stoch = self
115            .bigsmiles
116            .first_stochastic()
117            .ok_or(PolySimError::NoStochasticObject)?;
118
119        if stoch.repeat_units.len() < 2 {
120            return Err(PolySimError::RepeatUnitCount {
121                architecture: "random copolymer",
122                got: stoch.repeat_units.len(),
123                need_min: 2,
124            });
125        }
126
127        if fractions.len() != stoch.repeat_units.len() {
128            return Err(PolySimError::RepeatUnitCount {
129                architecture: "random copolymer (fractions count mismatch)",
130                got: fractions.len(),
131                need_min: stoch.repeat_units.len(),
132            });
133        }
134
135        let units: Vec<&str> = stoch
136            .repeat_units
137            .iter()
138            .map(|f| f.smiles_raw.as_str())
139            .collect();
140
141        let mut rng: Box<dyn RngCore> = match self.seed {
142            Some(s) => Box::new(StdRng::seed_from_u64(s)),
143            None => Box::new(rand::rng()),
144        };
145
146        let dist = WeightedIndex::new(fractions)
147            .map_err(|e| PolySimError::BuildStrategy(format!("invalid weight fractions: {e}")))?;
148
149        let sequence = match &self.strategy {
150            BuildStrategy::ByRepeatCount(n) => {
151                let n = *n;
152                if n == 0 {
153                    return Err(PolySimError::BuildStrategy(
154                        "repeat count must be ≥ 1".to_string(),
155                    ));
156                }
157                (0..n).map(|_| dist.sample(&mut *rng)).collect::<Vec<_>>()
158            }
159            BuildStrategy::ByTargetMn(target) => {
160                build_incremental_sequence(&units, *target, average_mass, &mut *rng, &dist)?
161            }
162            BuildStrategy::ByExactMass(target) => {
163                build_incremental_sequence(&units, *target, monoisotopic_mass, &mut *rng, &dist)?
164            }
165        };
166
167        let smiles_seq: Vec<&str> = sequence.iter().map(|&i| units[i]).collect();
168        let body = build_copolymer_smiles(&smiles_seq)?;
169        let smiles = self.with_end_groups(&body);
170        let n = sequence.len();
171        let chain = PolymerChain::new(smiles, n, 0.0);
172        let mn = average_mass(&chain);
173        Ok(PolymerChain::new(chain.smiles, n, mn))
174    }
175
176    /// Generates an alternating copolymer (–A–B–A–B– or –A–B–C–A–B–C–).
177    ///
178    /// The BigSMILES must contain at least 2 repeat units.
179    pub fn alternating_copolymer(&self) -> Result<PolymerChain, PolySimError> {
180        let stoch = self
181            .bigsmiles
182            .first_stochastic()
183            .ok_or(PolySimError::NoStochasticObject)?;
184
185        if stoch.repeat_units.len() < 2 {
186            return Err(PolySimError::RepeatUnitCount {
187                architecture: "alternating copolymer",
188                got: stoch.repeat_units.len(),
189                need_min: 2,
190            });
191        }
192
193        let units: Vec<&str> = stoch
194            .repeat_units
195            .iter()
196            .map(|f| f.smiles_raw.as_str())
197            .collect();
198        let k = units.len();
199
200        let sequence: Vec<usize> = match &self.strategy {
201            BuildStrategy::ByRepeatCount(n) => {
202                let n = *n;
203                if n == 0 {
204                    return Err(PolySimError::BuildStrategy(
205                        "repeat count must be ≥ 1".to_string(),
206                    ));
207                }
208                (0..n).map(|i| i % k).collect()
209            }
210            BuildStrategy::ByTargetMn(target) => {
211                build_incremental_alternating(&units, *target, average_mass)?
212            }
213            BuildStrategy::ByExactMass(target) => {
214                build_incremental_alternating(&units, *target, monoisotopic_mass)?
215            }
216        };
217
218        let smiles_seq: Vec<&str> = sequence.iter().map(|&i| units[i]).collect();
219        let body = build_copolymer_smiles(&smiles_seq)?;
220        let smiles = self.with_end_groups(&body);
221        let n = sequence.len();
222        let chain = PolymerChain::new(smiles, n, 0.0);
223        let mn = average_mass(&chain);
224        Ok(PolymerChain::new(chain.smiles, n, mn))
225    }
226
227    /// Generates a block copolymer (–AAAA–BBBB–).
228    ///
229    /// `block_lengths` — number of repeat units per block, in order.
230    /// The BigSMILES must contain exactly `block_lengths.len()` repeat units.
231    ///
232    /// The `BuildStrategy` is ignored — `block_lengths` fully determines the chain.
233    pub fn block_copolymer(&self, block_lengths: &[usize]) -> Result<PolymerChain, PolySimError> {
234        let stoch = self
235            .bigsmiles
236            .first_stochastic()
237            .ok_or(PolySimError::NoStochasticObject)?;
238
239        if stoch.repeat_units.len() < 2 {
240            return Err(PolySimError::RepeatUnitCount {
241                architecture: "block copolymer",
242                got: stoch.repeat_units.len(),
243                need_min: 2,
244            });
245        }
246
247        if block_lengths.len() != stoch.repeat_units.len() {
248            return Err(PolySimError::RepeatUnitCount {
249                architecture: "block copolymer (block_lengths count mismatch)",
250                got: block_lengths.len(),
251                need_min: stoch.repeat_units.len(),
252            });
253        }
254
255        let units: Vec<&str> = stoch
256            .repeat_units
257            .iter()
258            .map(|f| f.smiles_raw.as_str())
259            .collect();
260
261        let smiles_seq: Vec<&str> = block_lengths
262            .iter()
263            .zip(units.iter())
264            .flat_map(|(&len, &unit)| std::iter::repeat_n(unit, len))
265            .collect();
266
267        let n = smiles_seq.len();
268        if n == 0 {
269            return Err(PolySimError::BuildStrategy(
270                "total block length must be ≥ 1".to_string(),
271            ));
272        }
273
274        let body = build_copolymer_smiles(&smiles_seq)?;
275        let smiles = self.with_end_groups(&body);
276        let chain = PolymerChain::new(smiles, n, 0.0);
277        let mn = average_mass(&chain);
278        Ok(PolymerChain::new(chain.smiles, n, mn))
279    }
280
281    /// Generates a gradient copolymer where the composition of monomer A varies
282    /// along the chain according to the given [`GradientProfile`].
283    ///
284    /// The BigSMILES must contain exactly 2 repeat units (A and B).
285    pub fn gradient_copolymer(
286        &self,
287        profile: &GradientProfile,
288    ) -> Result<PolymerChain, PolySimError> {
289        let stoch = self
290            .bigsmiles
291            .first_stochastic()
292            .ok_or(PolySimError::NoStochasticObject)?;
293
294        if stoch.repeat_units.len() != 2 {
295            return Err(PolySimError::RepeatUnitCount {
296                architecture: "gradient copolymer",
297                got: stoch.repeat_units.len(),
298                need_min: 2,
299            });
300        }
301
302        let units: Vec<&str> = stoch
303            .repeat_units
304            .iter()
305            .map(|f| f.smiles_raw.as_str())
306            .collect();
307
308        // Resolve chain length using unit A
309        let n = self.resolve_n(units[0])?;
310        if n == 0 {
311            return Err(PolySimError::BuildStrategy(
312                "repeat count must be >= 1".to_string(),
313            ));
314        }
315
316        let mut rng: Box<dyn RngCore> = match self.seed {
317            Some(s) => Box::new(StdRng::seed_from_u64(s)),
318            None => Box::new(rand::rng()),
319        };
320
321        let mut count_a: usize = 0;
322        let mut sequence = Vec::with_capacity(n);
323
324        for i in 0..n {
325            let f_a = gradient_fraction(profile, i, n);
326            let pick: f64 = rng.random();
327            let idx = if pick < f_a { 0 } else { 1 };
328            if idx == 0 {
329                count_a += 1;
330            }
331            sequence.push(idx);
332        }
333
334        let smiles_seq: Vec<&str> = sequence.iter().map(|&i| units[i]).collect();
335        let body = build_copolymer_smiles(&smiles_seq)?;
336        let smiles = self.with_end_groups(&body);
337        let chain = PolymerChain::new(smiles, n, 0.0);
338        let mn = average_mass(&chain);
339
340        let frac_a = count_a as f64 / n as f64;
341        let composition = vec![
342            MonomerUnit::new(units[0], frac_a),
343            MonomerUnit::new(units[1], 1.0 - frac_a),
344        ];
345
346        Ok(PolymerChain::new(chain.smiles, n, mn)
347            .with_composition(composition)
348            .with_architecture(Architecture::Gradient))
349    }
350
351    /// Generates a cyclic homopolymer (ring closure connecting first and last atom).
352    ///
353    /// The BigSMILES must contain exactly 1 repeat unit.
354    pub fn cyclic_homopolymer(&self) -> Result<PolymerChain, PolySimError> {
355        let stoch = self
356            .bigsmiles
357            .first_stochastic()
358            .ok_or(PolySimError::NoStochasticObject)?;
359
360        if stoch.repeat_units.len() != 1 {
361            return Err(PolySimError::RepeatUnitCount {
362                architecture: "cyclic homopolymer",
363                got: stoch.repeat_units.len(),
364                need_min: 1,
365            });
366        }
367
368        let fragment = &stoch.repeat_units[0];
369        let n = self.resolve_n(&fragment.smiles_raw)?;
370
371        if n == 0 {
372            return Err(PolySimError::BuildStrategy(
373                "repeat count must be >= 1".to_string(),
374            ));
375        }
376
377        let linear = build_linear_smiles(&fragment.smiles_raw, n)?;
378        let smiles = make_cyclic_smiles(&linear);
379        let chain = PolymerChain::new(smiles, n, 0.0);
380        let mn = average_mass(&chain);
381        Ok(PolymerChain::new(chain.smiles, n, mn).with_architecture(Architecture::Cyclic))
382    }
383
384    /// Prepends prefix and appends suffix SMILES segments from the BigSMILES.
385    fn with_end_groups(&self, body: &str) -> String {
386        let prefix = collect_smiles_segments(self.bigsmiles.prefix_segments());
387        let suffix = collect_smiles_segments(self.bigsmiles.suffix_segments());
388        let mut result = String::with_capacity(prefix.len() + body.len() + suffix.len());
389        result.push_str(&prefix);
390        result.push_str(body);
391        result.push_str(&suffix);
392        result
393    }
394
395    fn resolve_n(&self, smiles_raw: &str) -> Result<usize, PolySimError> {
396        match &self.strategy {
397            BuildStrategy::ByRepeatCount(n) => Ok(*n),
398            BuildStrategy::ByTargetMn(target) => {
399                resolve_n_by_mass(smiles_raw, *target, average_mass)
400            }
401            BuildStrategy::ByExactMass(target) => {
402                resolve_n_by_mass(smiles_raw, *target, monoisotopic_mass)
403            }
404        }
405    }
406}
407
408// --- internal helpers -------------------------------------------------------
409
410/// Déduit le nombre de répétitions à partir d'une masse cible.
411///
412/// Construit deux chaînes d'essai (n=1 et n=2) pour déterminer la masse par
413/// unité et la masse des groupements terminaux, puis résout par extrapolation
414/// linéaire : MW(n) = n × mw_per_unit + mw_end.
415///
416/// `mass_fn` peut être [`average_mass`] (pour [`BuildStrategy::ByTargetMn`]) ou
417/// [`monoisotopic_mass`] (pour [`BuildStrategy::ByExactMass`]).
418pub(crate) fn resolve_n_by_mass(
419    smiles_raw: &str,
420    target: f64,
421    mass_fn: fn(&PolymerChain) -> f64,
422) -> Result<usize, PolySimError> {
423    let mw1 = mass_fn(&PolymerChain::new(
424        build_linear_smiles(smiles_raw, 1)?,
425        1,
426        0.0,
427    ));
428    let mw2 = mass_fn(&PolymerChain::new(
429        build_linear_smiles(smiles_raw, 2)?,
430        2,
431        0.0,
432    ));
433    let mw_per_unit = mw2 - mw1;
434    let mw_end = mw1 - mw_per_unit;
435    let n = ((target - mw_end) / mw_per_unit).round().max(1.0) as usize;
436    Ok(n)
437}
438
439/// Calibrates per-unit masses for each distinct repeat unit via 2-point method.
440///
441/// Returns `(unit_masses, m_end)` where:
442/// - `unit_masses[i]` is the mass contribution of one copy of unit i
443/// - `m_end` is the end-group mass (constant across all units)
444fn calibrate_unit_masses(
445    units: &[&str],
446    mass_fn: fn(&PolymerChain) -> f64,
447) -> Result<(Vec<f64>, f64), PolySimError> {
448    let mut unit_masses = Vec::with_capacity(units.len());
449    let mut m_end_sum = 0.0;
450
451    for &unit in units {
452        let mw1 = mass_fn(&PolymerChain::new(build_linear_smiles(unit, 1)?, 1, 0.0));
453        let mw2 = mass_fn(&PolymerChain::new(build_linear_smiles(unit, 2)?, 2, 0.0));
454        let m0 = mw2 - mw1;
455        unit_masses.push(m0);
456        m_end_sum += mw1 - m0;
457    }
458
459    // Average end-group mass (should be ~identical for all units, but average for safety)
460    let m_end = m_end_sum / units.len() as f64;
461    Ok((unit_masses, m_end))
462}
463
464/// Builds a copolymer unit sequence incrementally for random copolymers.
465///
466/// Adds units one at a time (sampled from weighted distribution), tracking
467/// accumulated mass. Stops when closest to the target mass.
468fn build_incremental_sequence(
469    units: &[&str],
470    target: f64,
471    mass_fn: fn(&PolymerChain) -> f64,
472    rng: &mut dyn RngCore,
473    dist: &WeightedIndex<f64>,
474) -> Result<Vec<usize>, PolySimError> {
475    let (unit_masses, m_end) = calibrate_unit_masses(units, mass_fn)?;
476
477    let mut sequence = Vec::new();
478    let mut running_mass = m_end;
479
480    loop {
481        let idx = dist.sample(rng);
482        running_mass += unit_masses[idx];
483        sequence.push(idx);
484
485        if running_mass >= target {
486            // Check if removing last unit gets closer
487            if sequence.len() > 1 {
488                let mass_without = running_mass - unit_masses[idx];
489                if (mass_without - target).abs() < (running_mass - target).abs() {
490                    sequence.pop();
491                }
492            }
493            break;
494        }
495    }
496
497    Ok(sequence)
498}
499
500/// Builds a copolymer unit sequence incrementally for alternating copolymers.
501fn build_incremental_alternating(
502    units: &[&str],
503    target: f64,
504    mass_fn: fn(&PolymerChain) -> f64,
505) -> Result<Vec<usize>, PolySimError> {
506    let (unit_masses, m_end) = calibrate_unit_masses(units, mass_fn)?;
507    let k = units.len();
508
509    let mut sequence = Vec::new();
510    let mut running_mass = m_end;
511
512    loop {
513        let idx = sequence.len() % k;
514        running_mass += unit_masses[idx];
515        sequence.push(idx);
516
517        if running_mass >= target {
518            if sequence.len() > 1 {
519                let mass_without = running_mass - unit_masses[idx];
520                if (mass_without - target).abs() < (running_mass - target).abs() {
521                    sequence.pop();
522                }
523            }
524            break;
525        }
526    }
527
528    Ok(sequence)
529}
530
531/// Builds the SMILES string for a linear chain of `n` repeat units.
532///
533/// Ring closure numbers are renumbered for each copy. Because each copy is
534/// self-contained (every ring opened within a copy is also closed within that
535/// copy), the offsets cycle over 1..=99, allowing chains of arbitrary length.
536///
537/// # Errors
538///
539/// Returns [`PolySimError::RingNumberOverflow`] if the repeat unit itself uses
540/// more than 99 distinct ring-closure numbers (already invalid SMILES).
541pub(crate) fn build_linear_smiles(smiles_raw: &str, n: usize) -> Result<String, PolySimError> {
542    let max_ring = max_ring_number(smiles_raw);
543
544    // Pathological case: the repeat unit alone already overflows SMILES ring numbers.
545    if max_ring > 99 {
546        return Err(PolySimError::RingNumberOverflow {
547            max_ring,
548            max_supported: 99,
549        });
550    }
551
552    // Number of distinct copies before ring numbers must be recycled.
553    // Since each copy closes its own rings before the next copy starts,
554    // the same numbers can be safely reused.
555    let cycle_length: usize = if max_ring == 0 {
556        usize::MAX // no ring closures — no cycling needed
557    } else {
558        99 / max_ring as usize
559    };
560
561    let mut result = String::with_capacity(smiles_raw.len() * n);
562    for i in 0..n {
563        let slot = i % cycle_length;
564        let offset = slot as u32 * max_ring;
565        result.push_str(&renumber_ring_closures(smiles_raw, offset));
566    }
567    Ok(result)
568}
569
570/// Builds the SMILES string for a copolymer from a heterogeneous sequence of
571/// repeat-unit SMILES fragments.
572///
573/// Ring closure numbers are renumbered globally so they never collide across
574/// consecutive units, regardless of which unit type follows which.
575pub(crate) fn build_copolymer_smiles(unit_sequence: &[&str]) -> Result<String, PolySimError> {
576    // Compute max ring number across ALL distinct units.
577    let global_max_ring = unit_sequence
578        .iter()
579        .map(|u| max_ring_number(u))
580        .max()
581        .unwrap_or(0);
582
583    if global_max_ring > 99 {
584        return Err(PolySimError::RingNumberOverflow {
585            max_ring: global_max_ring,
586            max_supported: 99,
587        });
588    }
589
590    let cycle_length: usize = if global_max_ring == 0 {
591        usize::MAX
592    } else {
593        99 / global_max_ring as usize
594    };
595
596    let total_len: usize = unit_sequence.iter().map(|u| u.len()).sum();
597    let mut result = String::with_capacity(total_len + unit_sequence.len() * 4);
598
599    for (i, &unit) in unit_sequence.iter().enumerate() {
600        let slot = i % cycle_length;
601        let offset = slot as u32 * global_max_ring;
602        result.push_str(&renumber_ring_closures(unit, offset));
603    }
604
605    Ok(result)
606}
607
608/// Returns the highest ring-closure number used in a SMILES string.
609///
610/// Digits inside `[...]` (isotopes, hydrogen counts, charges, atom classes)
611/// are ignored.
612pub(crate) fn max_ring_number(smiles: &str) -> u32 {
613    let mut max = 0u32;
614    let mut in_bracket = false;
615    let mut chars = smiles.chars().peekable();
616
617    while let Some(c) = chars.next() {
618        match c {
619            '[' => in_bracket = true,
620            ']' => in_bracket = false,
621            _ if in_bracket => {}
622            '%' => {
623                // Two-digit notation: %dd
624                let d1 = chars.next().unwrap_or('0');
625                let d2 = chars.next().unwrap_or('0');
626                if d1.is_ascii_digit() && d2.is_ascii_digit() {
627                    let n = (d1 as u32 - '0' as u32) * 10 + (d2 as u32 - '0' as u32);
628                    max = max.max(n);
629                }
630            }
631            c if c.is_ascii_digit() => {
632                max = max.max(c as u32 - '0' as u32);
633            }
634            _ => {}
635        }
636    }
637    max
638}
639
640/// Returns a copy of `smiles` with every ring-closure number incremented by `offset`.
641///
642/// When `offset` is 0 the string is returned unchanged.
643/// Digits inside `[...]` are never modified.
644pub(crate) fn renumber_ring_closures(smiles: &str, offset: u32) -> String {
645    if offset == 0 {
646        return smiles.to_string();
647    }
648    let mut result = String::with_capacity(smiles.len() + 4);
649    let mut in_bracket = false;
650    let mut chars = smiles.chars().peekable();
651
652    while let Some(c) = chars.next() {
653        match c {
654            '[' => {
655                in_bracket = true;
656                result.push(c);
657            }
658            ']' => {
659                in_bracket = false;
660                result.push(c);
661            }
662            _ if in_bracket => result.push(c),
663            '%' => {
664                let d1 = chars.next().unwrap_or('0');
665                let d2 = chars.next().unwrap_or('0');
666                if d1.is_ascii_digit() && d2.is_ascii_digit() {
667                    let n = (d1 as u32 - '0' as u32) * 10 + (d2 as u32 - '0' as u32);
668                    let new_n = n + offset;
669                    result.push('%');
670                    result.push_str(&format!("{new_n:02}"));
671                } else {
672                    result.push('%');
673                    result.push(d1);
674                    result.push(d2);
675                }
676            }
677            c if c.is_ascii_digit() => {
678                let n = c as u32 - '0' as u32;
679                let new_n = n + offset;
680                if new_n <= 9 {
681                    result.push(char::from_digit(new_n, 10).unwrap());
682                } else {
683                    result.push('%');
684                    result.push_str(&format!("{new_n:02}"));
685                }
686            }
687            _ => result.push(c),
688        }
689    }
690    result
691}
692
693/// Extracts plain SMILES text from a slice of BigSMILES segments,
694/// ignoring any stochastic objects.
695pub(crate) fn collect_smiles_segments(segs: &[BigSmilesSegment]) -> String {
696    segs.iter()
697        .filter_map(|s| match s {
698            BigSmilesSegment::Smiles(mol) => Some(format!("{mol}")),
699            _ => None,
700        })
701        .collect()
702}
703
704/// Computes the fraction of monomer A at position `i` in a chain of length `n`.
705pub(crate) fn gradient_fraction(profile: &GradientProfile, i: usize, n: usize) -> f64 {
706    match profile {
707        GradientProfile::Linear { f_start, f_end } => {
708            if n <= 1 {
709                *f_start
710            } else {
711                f_start + (f_end - f_start) * i as f64 / (n - 1) as f64
712            }
713        }
714        GradientProfile::Sigmoid { f_start, f_end } => {
715            if n <= 1 {
716                *f_start
717            } else {
718                let x = (i as f64 / n as f64 - 0.5) * 10.0;
719                let sigma = 1.0 / (1.0 + (-x).exp());
720                f_start + (f_end - f_start) * sigma
721            }
722        }
723    }
724}
725
726/// Converts a linear SMILES into a cyclic one by inserting ring closure label "1"
727/// after the first atom and appending "1" at the end.
728///
729/// Handles bracket atoms (`[...]`) and two-letter organic atoms (`Cl`, `Br`).
730fn make_cyclic_smiles(linear: &str) -> String {
731    let mut result = String::with_capacity(linear.len() + 2);
732    let mut chars = linear.chars().peekable();
733
734    // Find and copy the first atom, then insert "1"
735    if let Some(c) = chars.next() {
736        if c == '[' {
737            // Bracket atom: copy up to and including ']'
738            result.push(c);
739            for ch in chars.by_ref() {
740                result.push(ch);
741                if ch == ']' {
742                    break;
743                }
744            }
745        } else {
746            result.push(c);
747            // Check for two-letter organic atoms (Cl, Br, Si, etc.)
748            if c.is_ascii_uppercase() {
749                if let Some(&next) = chars.peek() {
750                    if next.is_ascii_lowercase() && next != 'c'
751                        || matches!(
752                            (c, next),
753                            ('C', 'l')
754                                | ('B', 'r')
755                                | ('S', 'i')
756                                | ('S', 'e')
757                                | ('A', 'l')
758                                | ('A', 's')
759                                | ('A', 'r')
760                                | ('A', 't')
761                                | ('M', 'g')
762                                | ('N', 'a')
763                                | ('G', 'e')
764                        )
765                    {
766                        result.push(chars.next().unwrap());
767                    }
768                }
769            }
770        }
771        result.push('1');
772    }
773
774    // Copy the rest
775    for ch in chars {
776        result.push(ch);
777    }
778
779    // Append ring closure at end
780    result.push('1');
781    result
782}