1use bigsmiles::{BigSmiles, BigSmilesSegment};
2use rand::distr::weighted::WeightedIndex;
3use rand::prelude::*;
4use rand::rngs::StdRng;
5
6use crate::{
7 error::PolySimError,
8 polymer::{Architecture, MonomerUnit, PolymerChain},
9 properties::molecular_weight::{average_mass, monoisotopic_mass},
10};
11
12use super::strategy::BuildStrategy;
13
14#[derive(Debug, Clone)]
16pub enum GradientProfile {
17 Linear { f_start: f64, f_end: f64 },
19 Sigmoid { f_start: f64, f_end: f64 },
21}
22
23pub struct LinearBuilder {
28 bigsmiles: BigSmiles,
29 strategy: BuildStrategy,
30 seed: Option<u64>,
31}
32
33impl LinearBuilder {
34 pub fn new(bigsmiles: BigSmiles, strategy: BuildStrategy) -> Self {
36 Self {
37 bigsmiles,
38 strategy,
39 seed: None,
40 }
41 }
42
43 pub fn seed(mut self, seed: u64) -> Self {
45 self.seed = Some(seed);
46 self
47 }
48
49 pub fn homopolymer(&self) -> Result<PolymerChain, PolySimError> {
73 let stoch = self
74 .bigsmiles
75 .first_stochastic()
76 .ok_or(PolySimError::NoStochasticObject)?;
77
78 if stoch.repeat_units.len() != 1 {
79 return Err(PolySimError::RepeatUnitCount {
80 architecture: "homopolymer",
81 got: stoch.repeat_units.len(),
82 need_min: 1,
83 });
84 }
85
86 let fragment = &stoch.repeat_units[0];
87 let n = self.resolve_n(&fragment.smiles_raw)?;
88
89 if n == 0 {
90 return Err(PolySimError::BuildStrategy(
91 "repeat count must be ≥ 1".to_string(),
92 ));
93 }
94
95 let body = build_linear_smiles(&fragment.smiles_raw, n)?;
96 let smiles = self.with_end_groups(&body);
97 let chain = PolymerChain::new(smiles, n, 0.0);
98 let mn = average_mass(&chain);
99 Ok(PolymerChain::new(chain.smiles, n, mn))
100 }
101
102 pub fn random_copolymer(&self, fractions: &[f64]) -> Result<PolymerChain, PolySimError> {
109 let sum: f64 = fractions.iter().sum();
110 if (sum - 1.0).abs() > 1e-6 {
111 return Err(PolySimError::InvalidFractions { sum });
112 }
113
114 let stoch = self
115 .bigsmiles
116 .first_stochastic()
117 .ok_or(PolySimError::NoStochasticObject)?;
118
119 if stoch.repeat_units.len() < 2 {
120 return Err(PolySimError::RepeatUnitCount {
121 architecture: "random copolymer",
122 got: stoch.repeat_units.len(),
123 need_min: 2,
124 });
125 }
126
127 if fractions.len() != stoch.repeat_units.len() {
128 return Err(PolySimError::RepeatUnitCount {
129 architecture: "random copolymer (fractions count mismatch)",
130 got: fractions.len(),
131 need_min: stoch.repeat_units.len(),
132 });
133 }
134
135 let units: Vec<&str> = stoch
136 .repeat_units
137 .iter()
138 .map(|f| f.smiles_raw.as_str())
139 .collect();
140
141 let mut rng: Box<dyn RngCore> = match self.seed {
142 Some(s) => Box::new(StdRng::seed_from_u64(s)),
143 None => Box::new(rand::rng()),
144 };
145
146 let dist = WeightedIndex::new(fractions)
147 .map_err(|e| PolySimError::BuildStrategy(format!("invalid weight fractions: {e}")))?;
148
149 let sequence = match &self.strategy {
150 BuildStrategy::ByRepeatCount(n) => {
151 let n = *n;
152 if n == 0 {
153 return Err(PolySimError::BuildStrategy(
154 "repeat count must be ≥ 1".to_string(),
155 ));
156 }
157 (0..n).map(|_| dist.sample(&mut *rng)).collect::<Vec<_>>()
158 }
159 BuildStrategy::ByTargetMn(target) => {
160 build_incremental_sequence(&units, *target, average_mass, &mut *rng, &dist)?
161 }
162 BuildStrategy::ByExactMass(target) => {
163 build_incremental_sequence(&units, *target, monoisotopic_mass, &mut *rng, &dist)?
164 }
165 };
166
167 let smiles_seq: Vec<&str> = sequence.iter().map(|&i| units[i]).collect();
168 let body = build_copolymer_smiles(&smiles_seq)?;
169 let smiles = self.with_end_groups(&body);
170 let n = sequence.len();
171 let chain = PolymerChain::new(smiles, n, 0.0);
172 let mn = average_mass(&chain);
173 Ok(PolymerChain::new(chain.smiles, n, mn))
174 }
175
176 pub fn alternating_copolymer(&self) -> Result<PolymerChain, PolySimError> {
180 let stoch = self
181 .bigsmiles
182 .first_stochastic()
183 .ok_or(PolySimError::NoStochasticObject)?;
184
185 if stoch.repeat_units.len() < 2 {
186 return Err(PolySimError::RepeatUnitCount {
187 architecture: "alternating copolymer",
188 got: stoch.repeat_units.len(),
189 need_min: 2,
190 });
191 }
192
193 let units: Vec<&str> = stoch
194 .repeat_units
195 .iter()
196 .map(|f| f.smiles_raw.as_str())
197 .collect();
198 let k = units.len();
199
200 let sequence: Vec<usize> = match &self.strategy {
201 BuildStrategy::ByRepeatCount(n) => {
202 let n = *n;
203 if n == 0 {
204 return Err(PolySimError::BuildStrategy(
205 "repeat count must be ≥ 1".to_string(),
206 ));
207 }
208 (0..n).map(|i| i % k).collect()
209 }
210 BuildStrategy::ByTargetMn(target) => {
211 build_incremental_alternating(&units, *target, average_mass)?
212 }
213 BuildStrategy::ByExactMass(target) => {
214 build_incremental_alternating(&units, *target, monoisotopic_mass)?
215 }
216 };
217
218 let smiles_seq: Vec<&str> = sequence.iter().map(|&i| units[i]).collect();
219 let body = build_copolymer_smiles(&smiles_seq)?;
220 let smiles = self.with_end_groups(&body);
221 let n = sequence.len();
222 let chain = PolymerChain::new(smiles, n, 0.0);
223 let mn = average_mass(&chain);
224 Ok(PolymerChain::new(chain.smiles, n, mn))
225 }
226
227 pub fn block_copolymer(&self, block_lengths: &[usize]) -> Result<PolymerChain, PolySimError> {
234 let stoch = self
235 .bigsmiles
236 .first_stochastic()
237 .ok_or(PolySimError::NoStochasticObject)?;
238
239 if stoch.repeat_units.len() < 2 {
240 return Err(PolySimError::RepeatUnitCount {
241 architecture: "block copolymer",
242 got: stoch.repeat_units.len(),
243 need_min: 2,
244 });
245 }
246
247 if block_lengths.len() != stoch.repeat_units.len() {
248 return Err(PolySimError::RepeatUnitCount {
249 architecture: "block copolymer (block_lengths count mismatch)",
250 got: block_lengths.len(),
251 need_min: stoch.repeat_units.len(),
252 });
253 }
254
255 let units: Vec<&str> = stoch
256 .repeat_units
257 .iter()
258 .map(|f| f.smiles_raw.as_str())
259 .collect();
260
261 let smiles_seq: Vec<&str> = block_lengths
262 .iter()
263 .zip(units.iter())
264 .flat_map(|(&len, &unit)| std::iter::repeat_n(unit, len))
265 .collect();
266
267 let n = smiles_seq.len();
268 if n == 0 {
269 return Err(PolySimError::BuildStrategy(
270 "total block length must be ≥ 1".to_string(),
271 ));
272 }
273
274 let body = build_copolymer_smiles(&smiles_seq)?;
275 let smiles = self.with_end_groups(&body);
276 let chain = PolymerChain::new(smiles, n, 0.0);
277 let mn = average_mass(&chain);
278 Ok(PolymerChain::new(chain.smiles, n, mn))
279 }
280
281 pub fn gradient_copolymer(
286 &self,
287 profile: &GradientProfile,
288 ) -> Result<PolymerChain, PolySimError> {
289 let stoch = self
290 .bigsmiles
291 .first_stochastic()
292 .ok_or(PolySimError::NoStochasticObject)?;
293
294 if stoch.repeat_units.len() != 2 {
295 return Err(PolySimError::RepeatUnitCount {
296 architecture: "gradient copolymer",
297 got: stoch.repeat_units.len(),
298 need_min: 2,
299 });
300 }
301
302 let units: Vec<&str> = stoch
303 .repeat_units
304 .iter()
305 .map(|f| f.smiles_raw.as_str())
306 .collect();
307
308 let n = self.resolve_n(units[0])?;
310 if n == 0 {
311 return Err(PolySimError::BuildStrategy(
312 "repeat count must be >= 1".to_string(),
313 ));
314 }
315
316 let mut rng: Box<dyn RngCore> = match self.seed {
317 Some(s) => Box::new(StdRng::seed_from_u64(s)),
318 None => Box::new(rand::rng()),
319 };
320
321 let mut count_a: usize = 0;
322 let mut sequence = Vec::with_capacity(n);
323
324 for i in 0..n {
325 let f_a = gradient_fraction(profile, i, n);
326 let pick: f64 = rng.random();
327 let idx = if pick < f_a { 0 } else { 1 };
328 if idx == 0 {
329 count_a += 1;
330 }
331 sequence.push(idx);
332 }
333
334 let smiles_seq: Vec<&str> = sequence.iter().map(|&i| units[i]).collect();
335 let body = build_copolymer_smiles(&smiles_seq)?;
336 let smiles = self.with_end_groups(&body);
337 let chain = PolymerChain::new(smiles, n, 0.0);
338 let mn = average_mass(&chain);
339
340 let frac_a = count_a as f64 / n as f64;
341 let composition = vec![
342 MonomerUnit::new(units[0], frac_a),
343 MonomerUnit::new(units[1], 1.0 - frac_a),
344 ];
345
346 Ok(PolymerChain::new(chain.smiles, n, mn)
347 .with_composition(composition)
348 .with_architecture(Architecture::Gradient))
349 }
350
351 pub fn cyclic_homopolymer(&self) -> Result<PolymerChain, PolySimError> {
355 let stoch = self
356 .bigsmiles
357 .first_stochastic()
358 .ok_or(PolySimError::NoStochasticObject)?;
359
360 if stoch.repeat_units.len() != 1 {
361 return Err(PolySimError::RepeatUnitCount {
362 architecture: "cyclic homopolymer",
363 got: stoch.repeat_units.len(),
364 need_min: 1,
365 });
366 }
367
368 let fragment = &stoch.repeat_units[0];
369 let n = self.resolve_n(&fragment.smiles_raw)?;
370
371 if n == 0 {
372 return Err(PolySimError::BuildStrategy(
373 "repeat count must be >= 1".to_string(),
374 ));
375 }
376
377 let linear = build_linear_smiles(&fragment.smiles_raw, n)?;
378 let smiles = make_cyclic_smiles(&linear);
379 let chain = PolymerChain::new(smiles, n, 0.0);
380 let mn = average_mass(&chain);
381 Ok(PolymerChain::new(chain.smiles, n, mn).with_architecture(Architecture::Cyclic))
382 }
383
384 fn with_end_groups(&self, body: &str) -> String {
386 let prefix = collect_smiles_segments(self.bigsmiles.prefix_segments());
387 let suffix = collect_smiles_segments(self.bigsmiles.suffix_segments());
388 let mut result = String::with_capacity(prefix.len() + body.len() + suffix.len());
389 result.push_str(&prefix);
390 result.push_str(body);
391 result.push_str(&suffix);
392 result
393 }
394
395 fn resolve_n(&self, smiles_raw: &str) -> Result<usize, PolySimError> {
396 match &self.strategy {
397 BuildStrategy::ByRepeatCount(n) => Ok(*n),
398 BuildStrategy::ByTargetMn(target) => {
399 resolve_n_by_mass(smiles_raw, *target, average_mass)
400 }
401 BuildStrategy::ByExactMass(target) => {
402 resolve_n_by_mass(smiles_raw, *target, monoisotopic_mass)
403 }
404 }
405 }
406}
407
408pub(crate) fn resolve_n_by_mass(
419 smiles_raw: &str,
420 target: f64,
421 mass_fn: fn(&PolymerChain) -> f64,
422) -> Result<usize, PolySimError> {
423 let mw1 = mass_fn(&PolymerChain::new(
424 build_linear_smiles(smiles_raw, 1)?,
425 1,
426 0.0,
427 ));
428 let mw2 = mass_fn(&PolymerChain::new(
429 build_linear_smiles(smiles_raw, 2)?,
430 2,
431 0.0,
432 ));
433 let mw_per_unit = mw2 - mw1;
434 let mw_end = mw1 - mw_per_unit;
435 let n = ((target - mw_end) / mw_per_unit).round().max(1.0) as usize;
436 Ok(n)
437}
438
439fn calibrate_unit_masses(
445 units: &[&str],
446 mass_fn: fn(&PolymerChain) -> f64,
447) -> Result<(Vec<f64>, f64), PolySimError> {
448 let mut unit_masses = Vec::with_capacity(units.len());
449 let mut m_end_sum = 0.0;
450
451 for &unit in units {
452 let mw1 = mass_fn(&PolymerChain::new(build_linear_smiles(unit, 1)?, 1, 0.0));
453 let mw2 = mass_fn(&PolymerChain::new(build_linear_smiles(unit, 2)?, 2, 0.0));
454 let m0 = mw2 - mw1;
455 unit_masses.push(m0);
456 m_end_sum += mw1 - m0;
457 }
458
459 let m_end = m_end_sum / units.len() as f64;
461 Ok((unit_masses, m_end))
462}
463
464fn build_incremental_sequence(
469 units: &[&str],
470 target: f64,
471 mass_fn: fn(&PolymerChain) -> f64,
472 rng: &mut dyn RngCore,
473 dist: &WeightedIndex<f64>,
474) -> Result<Vec<usize>, PolySimError> {
475 let (unit_masses, m_end) = calibrate_unit_masses(units, mass_fn)?;
476
477 let mut sequence = Vec::new();
478 let mut running_mass = m_end;
479
480 loop {
481 let idx = dist.sample(rng);
482 running_mass += unit_masses[idx];
483 sequence.push(idx);
484
485 if running_mass >= target {
486 if sequence.len() > 1 {
488 let mass_without = running_mass - unit_masses[idx];
489 if (mass_without - target).abs() < (running_mass - target).abs() {
490 sequence.pop();
491 }
492 }
493 break;
494 }
495 }
496
497 Ok(sequence)
498}
499
500fn build_incremental_alternating(
502 units: &[&str],
503 target: f64,
504 mass_fn: fn(&PolymerChain) -> f64,
505) -> Result<Vec<usize>, PolySimError> {
506 let (unit_masses, m_end) = calibrate_unit_masses(units, mass_fn)?;
507 let k = units.len();
508
509 let mut sequence = Vec::new();
510 let mut running_mass = m_end;
511
512 loop {
513 let idx = sequence.len() % k;
514 running_mass += unit_masses[idx];
515 sequence.push(idx);
516
517 if running_mass >= target {
518 if sequence.len() > 1 {
519 let mass_without = running_mass - unit_masses[idx];
520 if (mass_without - target).abs() < (running_mass - target).abs() {
521 sequence.pop();
522 }
523 }
524 break;
525 }
526 }
527
528 Ok(sequence)
529}
530
531pub(crate) fn build_linear_smiles(smiles_raw: &str, n: usize) -> Result<String, PolySimError> {
542 let max_ring = max_ring_number(smiles_raw);
543
544 if max_ring > 99 {
546 return Err(PolySimError::RingNumberOverflow {
547 max_ring,
548 max_supported: 99,
549 });
550 }
551
552 let cycle_length: usize = if max_ring == 0 {
556 usize::MAX } else {
558 99 / max_ring as usize
559 };
560
561 let mut result = String::with_capacity(smiles_raw.len() * n);
562 for i in 0..n {
563 let slot = i % cycle_length;
564 let offset = slot as u32 * max_ring;
565 result.push_str(&renumber_ring_closures(smiles_raw, offset));
566 }
567 Ok(result)
568}
569
570pub(crate) fn build_copolymer_smiles(unit_sequence: &[&str]) -> Result<String, PolySimError> {
576 let global_max_ring = unit_sequence
578 .iter()
579 .map(|u| max_ring_number(u))
580 .max()
581 .unwrap_or(0);
582
583 if global_max_ring > 99 {
584 return Err(PolySimError::RingNumberOverflow {
585 max_ring: global_max_ring,
586 max_supported: 99,
587 });
588 }
589
590 let cycle_length: usize = if global_max_ring == 0 {
591 usize::MAX
592 } else {
593 99 / global_max_ring as usize
594 };
595
596 let total_len: usize = unit_sequence.iter().map(|u| u.len()).sum();
597 let mut result = String::with_capacity(total_len + unit_sequence.len() * 4);
598
599 for (i, &unit) in unit_sequence.iter().enumerate() {
600 let slot = i % cycle_length;
601 let offset = slot as u32 * global_max_ring;
602 result.push_str(&renumber_ring_closures(unit, offset));
603 }
604
605 Ok(result)
606}
607
608pub(crate) fn max_ring_number(smiles: &str) -> u32 {
613 let mut max = 0u32;
614 let mut in_bracket = false;
615 let mut chars = smiles.chars().peekable();
616
617 while let Some(c) = chars.next() {
618 match c {
619 '[' => in_bracket = true,
620 ']' => in_bracket = false,
621 _ if in_bracket => {}
622 '%' => {
623 let d1 = chars.next().unwrap_or('0');
625 let d2 = chars.next().unwrap_or('0');
626 if d1.is_ascii_digit() && d2.is_ascii_digit() {
627 let n = (d1 as u32 - '0' as u32) * 10 + (d2 as u32 - '0' as u32);
628 max = max.max(n);
629 }
630 }
631 c if c.is_ascii_digit() => {
632 max = max.max(c as u32 - '0' as u32);
633 }
634 _ => {}
635 }
636 }
637 max
638}
639
640pub(crate) fn renumber_ring_closures(smiles: &str, offset: u32) -> String {
645 if offset == 0 {
646 return smiles.to_string();
647 }
648 let mut result = String::with_capacity(smiles.len() + 4);
649 let mut in_bracket = false;
650 let mut chars = smiles.chars().peekable();
651
652 while let Some(c) = chars.next() {
653 match c {
654 '[' => {
655 in_bracket = true;
656 result.push(c);
657 }
658 ']' => {
659 in_bracket = false;
660 result.push(c);
661 }
662 _ if in_bracket => result.push(c),
663 '%' => {
664 let d1 = chars.next().unwrap_or('0');
665 let d2 = chars.next().unwrap_or('0');
666 if d1.is_ascii_digit() && d2.is_ascii_digit() {
667 let n = (d1 as u32 - '0' as u32) * 10 + (d2 as u32 - '0' as u32);
668 let new_n = n + offset;
669 result.push('%');
670 result.push_str(&format!("{new_n:02}"));
671 } else {
672 result.push('%');
673 result.push(d1);
674 result.push(d2);
675 }
676 }
677 c if c.is_ascii_digit() => {
678 let n = c as u32 - '0' as u32;
679 let new_n = n + offset;
680 if new_n <= 9 {
681 result.push(char::from_digit(new_n, 10).unwrap());
682 } else {
683 result.push('%');
684 result.push_str(&format!("{new_n:02}"));
685 }
686 }
687 _ => result.push(c),
688 }
689 }
690 result
691}
692
693pub(crate) fn collect_smiles_segments(segs: &[BigSmilesSegment]) -> String {
696 segs.iter()
697 .filter_map(|s| match s {
698 BigSmilesSegment::Smiles(mol) => Some(format!("{mol}")),
699 _ => None,
700 })
701 .collect()
702}
703
704pub(crate) fn gradient_fraction(profile: &GradientProfile, i: usize, n: usize) -> f64 {
706 match profile {
707 GradientProfile::Linear { f_start, f_end } => {
708 if n <= 1 {
709 *f_start
710 } else {
711 f_start + (f_end - f_start) * i as f64 / (n - 1) as f64
712 }
713 }
714 GradientProfile::Sigmoid { f_start, f_end } => {
715 if n <= 1 {
716 *f_start
717 } else {
718 let x = (i as f64 / n as f64 - 0.5) * 10.0;
719 let sigma = 1.0 / (1.0 + (-x).exp());
720 f_start + (f_end - f_start) * sigma
721 }
722 }
723 }
724}
725
726fn make_cyclic_smiles(linear: &str) -> String {
731 let mut result = String::with_capacity(linear.len() + 2);
732 let mut chars = linear.chars().peekable();
733
734 if let Some(c) = chars.next() {
736 if c == '[' {
737 result.push(c);
739 for ch in chars.by_ref() {
740 result.push(ch);
741 if ch == ']' {
742 break;
743 }
744 }
745 } else {
746 result.push(c);
747 if c.is_ascii_uppercase() {
749 if let Some(&next) = chars.peek() {
750 if next.is_ascii_lowercase() && next != 'c'
751 || matches!(
752 (c, next),
753 ('C', 'l')
754 | ('B', 'r')
755 | ('S', 'i')
756 | ('S', 'e')
757 | ('A', 'l')
758 | ('A', 's')
759 | ('A', 'r')
760 | ('A', 't')
761 | ('M', 'g')
762 | ('N', 'a')
763 | ('G', 'e')
764 )
765 {
766 result.push(chars.next().unwrap());
767 }
768 }
769 }
770 }
771 result.push('1');
772 }
773
774 for ch in chars {
776 result.push(ch);
777 }
778
779 result.push('1');
781 result
782}