use crate::writer::write;
use chematic_core::{AtomIdx, Molecule, MoleculeBuilder};
use std::collections::HashSet;
pub fn random_smiles(mol: &Molecule, seed: u64) -> String {
let permutation = generate_permutation(mol.atom_count(), seed);
let permuted = apply_permutation(mol, &permutation);
write(&permuted)
}
pub fn random_smiles_vect(mol: &Molecule, count: usize, seed: u64) -> Vec<String> {
let mut seen = HashSet::new();
let mut result = Vec::new();
let max_attempts = count.saturating_mul(3).max(10);
for attempt in 0..max_attempts {
if result.len() >= count {
break;
}
let smiles = random_smiles(mol, seed.wrapping_add(attempt as u64));
if seen.insert(smiles.clone()) {
result.push(smiles);
}
}
result
}
struct Xorshift64 {
state: u64,
}
impl Xorshift64 {
fn new(seed: u64) -> Self {
let state = if seed == 0 { 1 } else { seed };
Xorshift64 { state }
}
fn next(&mut self) -> u64 {
let mut x = self.state;
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
self.state = x;
x
}
fn range(&mut self, max: usize) -> usize {
if max == 0 {
0
} else {
(self.next() as usize) % max
}
}
}
fn generate_permutation(n: usize, seed: u64) -> Vec<usize> {
let mut rng = Xorshift64::new(seed);
let mut perm: Vec<usize> = (0..n).collect();
for i in (1..n).rev() {
let j = rng.range(i + 1);
perm.swap(i, j);
}
perm
}
fn apply_permutation(mol: &Molecule, permutation: &[usize]) -> Molecule {
let mut builder = MoleculeBuilder::new();
for &old_idx in permutation {
let atom = mol.atom(AtomIdx(old_idx as u32));
builder.add_atom(atom.clone());
}
let mut old_to_new = vec![0usize; mol.atom_count()];
for (new_idx, &old_idx) in permutation.iter().enumerate() {
old_to_new[old_idx] = new_idx;
}
for (_, bond_entry) in mol.bonds() {
let old_a = bond_entry.atom1;
let old_b = bond_entry.atom2;
let new_a = AtomIdx(old_to_new[old_a.0 as usize] as u32);
let new_b = AtomIdx(old_to_new[old_b.0 as usize] as u32);
let _ = builder.add_bond(new_a, new_b, bond_entry.order);
}
builder.build()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parse;
#[test]
fn test_random_smiles_single() {
let mol = parse("CC").unwrap();
let smiles = random_smiles(&mol, 42);
assert!(!smiles.is_empty());
}
#[test]
fn test_random_smiles_vect_generates_multiple() {
let mol = parse("CCCC").unwrap();
let variants = random_smiles_vect(&mol, 3, 42);
assert!(!variants.is_empty());
for s in &variants {
assert!(!s.is_empty());
}
}
#[test]
fn test_random_smiles_vect_unique() {
let mol = parse("CCCC").unwrap();
let variants = random_smiles_vect(&mol, 10, 100);
let set: HashSet<_> = variants.iter().cloned().collect();
assert!(set.len() > 1);
}
#[test]
fn test_random_smiles_roundtrip() {
let original_smiles = "CC(C)O";
let mol = parse(original_smiles).unwrap();
let random = random_smiles(&mol, 99);
let mol2 = parse(&random);
assert!(mol2.is_ok());
}
#[test]
fn test_permutation_deterministic() {
let mol = parse("CCCC").unwrap();
let s1 = random_smiles(&mol, 77);
let s2 = random_smiles(&mol, 77);
assert_eq!(s1, s2);
}
#[test]
fn test_permutation_different_seeds() {
let mol = parse("CCCC").unwrap();
let s1 = random_smiles(&mol, 1);
let s2 = random_smiles(&mol, 2);
assert!(!s1.is_empty());
assert!(!s2.is_empty());
}
}