1#![forbid(unsafe_code)]
18
19use chematic_core::{AtomIdx, BondOrder, Molecule, implicit_hcount};
20use chematic_perception::find_sssr;
21use std::collections::{HashSet, VecDeque};
22
23#[derive(Debug, Clone, PartialEq, Eq)]
29pub enum IupacError {
30 Empty,
32 NotSupported,
34}
35
36impl core::fmt::Display for IupacError {
37 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
38 match self {
39 Self::Empty => write!(f, "empty molecule"),
40 Self::NotSupported => write!(f, "IUPAC name not supported for this structure"),
41 }
42 }
43}
44
45impl std::error::Error for IupacError {}
46
47pub fn name(mol: &Molecule) -> Result<String, IupacError> {
56 if mol.atom_count() == 0 {
57 return Err(IupacError::Empty);
58 }
59 Namer::new(mol).name()
60}
61
62struct Namer<'a> {
67 mol: &'a Molecule,
68}
69
70impl<'a> Namer<'a> {
71 fn new(mol: &'a Molecule) -> Self {
72 Self { mol }
73 }
74
75 fn name(&self) -> Result<String, IupacError> {
76 let mol = self.mol;
77
78 if count_components(mol) != 1 {
79 return Err(IupacError::NotSupported);
80 }
81
82 let rings = find_sssr(mol);
83 let ring_atoms: HashSet<AtomIdx> = rings
84 .rings()
85 .iter()
86 .flat_map(|r| r.iter().copied())
87 .collect();
88
89 let carbons: Vec<AtomIdx> = atoms_of(mol, 6);
90 let o_atoms: Vec<AtomIdx> = atoms_of(mol, 8);
91 let n_atoms: Vec<AtomIdx> = atoms_of(mol, 7);
92 let s_atoms: Vec<AtomIdx> = atoms_of(mol, 16);
93 let halogens: Vec<AtomIdx> = mol
94 .atoms()
95 .filter(|(_, a)| matches!(a.element.atomic_number(), 9 | 17 | 35 | 53))
96 .map(|(i, _)| i)
97 .collect();
98
99 let het_elements: HashSet<u8> = mol
101 .atoms()
102 .filter(|(_, a)| { let an = a.element.atomic_number(); an != 6 && an != 1 })
103 .map(|(_, a)| a.element.atomic_number())
104 .collect();
105 if het_elements.iter().any(|&an| !matches!(an, 7 | 8 | 9 | 16 | 17 | 35 | 53)) {
106 return Err(IupacError::NotSupported);
107 }
108
109 let cyclic = !ring_atoms.is_empty();
110
111 if cyclic {
112 let any_aromatic = ring_atoms.iter().any(|&i| mol.atom(i).aromatic);
113 if any_aromatic {
114 return self.name_aromatic_ring(&ring_atoms);
115 }
116 let only_oxygen = het_elements.len() == 1 && het_elements.contains(&8);
118 if !het_elements.is_empty() && !only_oxygen {
119 return Err(IupacError::NotSupported);
120 }
121 if only_oxygen {
122 return self.name_cycloalkanol(&ring_atoms, &carbons, &o_atoms);
123 }
124 return self.name_cycloalkane(&ring_atoms, &carbons);
125 }
126
127 match (o_atoms.len(), n_atoms.len(), s_atoms.len(), halogens.len()) {
129 (0, 0, 0, 0) => self.name_acyclic_hydrocarbon(&carbons),
130 (1, 0, 0, 0) => self.name_one_oxygen(&carbons, o_atoms[0]),
131 (2, 0, 0, 0) => self.name_two_oxygens(&carbons, &o_atoms),
132 (1, 1, 0, 0) => self.name_amide(&carbons, o_atoms[0], n_atoms[0]),
133 (0, 1, 0, 0) => {
134 if self.is_nitrile(n_atoms[0]) {
136 self.name_nitrile(&carbons, n_atoms[0])
137 } else {
138 self.name_amine(&carbons, n_atoms[0])
139 }
140 }
141 (0, 0, 1, 0) => self.name_thiol(&carbons, s_atoms[0]),
142 (0, 0, 0, _) if !halogens.is_empty() => {
143 if het_elements.len() != 1 {
144 return Err(IupacError::NotSupported);
145 }
146 let prefix = match het_elements.iter().next().copied().unwrap() {
147 9 => "fluoro",
148 17 => "chloro",
149 35 => "bromo",
150 53 => "iodo",
151 _ => return Err(IupacError::NotSupported),
152 };
153 self.name_haloalkane(&carbons, &halogens, prefix)
154 }
155 _ => Err(IupacError::NotSupported),
156 }
157 }
158
159 fn name_aromatic_ring(&self, ring_atoms: &HashSet<AtomIdx>) -> Result<String, IupacError> {
164 let mol = self.mol;
165 if !ring_atoms.iter().all(|&i| mol.atom(i).aromatic) {
167 return Err(IupacError::NotSupported);
168 }
169
170 let n_n = ring_atoms.iter().filter(|&&i| mol.atom(i).element.atomic_number() == 7).count();
171 let n_o = ring_atoms.iter().filter(|&&i| mol.atom(i).element.atomic_number() == 8).count();
172 let n_s = ring_atoms.iter().filter(|&&i| mol.atom(i).element.atomic_number() == 16).count();
173 let sz = ring_atoms.len();
174
175 if ring_atoms.len() == mol.atom_count() {
177 return match (sz, n_n, n_o, n_s) {
178 (6, 0, 0, 0) => Ok("benzene".into()),
179 (6, 1, 0, 0) => Ok("pyridine".into()),
180 (6, 2, 0, 0) => Ok("pyrimidine".into()),
181 (5, 0, 1, 0) => Ok("furan".into()),
182 (5, 0, 0, 1) => Ok("thiophene".into()),
183 (5, 1, 0, 0) => Ok("pyrrole".into()),
184 (5, 2, 0, 0) => Ok("imidazole".into()),
185 _ => Err(IupacError::NotSupported),
186 };
187 }
188
189 if sz == 6 && n_n == 0 && n_o == 0 && n_s == 0 {
192 let sub_atoms: Vec<AtomIdx> = mol.atoms()
193 .filter(|(i, _)| !ring_atoms.contains(i))
194 .map(|(i, _)| i)
195 .collect();
196 return self.name_monosubstituted_benzene(ring_atoms, &sub_atoms);
197 }
198
199 Err(IupacError::NotSupported)
200 }
201
202 fn name_monosubstituted_benzene(
207 &self,
208 ring_atoms: &HashSet<AtomIdx>,
209 sub_atoms: &[AtomIdx],
210 ) -> Result<String, IupacError> {
211 let mol = self.mol;
212 let attach_count = ring_atoms.iter().filter(|&&r| {
214 mol.neighbors(r).any(|(nb, _)| !ring_atoms.contains(&nb))
215 }).count();
216 if attach_count == 2 {
217 return self.name_disubstituted_benzene(ring_atoms, sub_atoms);
218 }
219 if attach_count == 3 {
220 return self.name_trisubstituted_benzene(ring_atoms);
221 }
222 if attach_count != 1 {
223 return Err(IupacError::NotSupported);
224 }
225
226 let mut n_c = 0usize; let mut n_n = 0usize;
228 let mut n_o = 0usize; let mut n_hal = 0usize;
229 let mut halogen_an = 0u8;
230 for &a in sub_atoms {
231 match mol.atom(a).element.atomic_number() {
232 6 => n_c += 1,
233 7 => n_n += 1,
234 8 => n_o += 1,
235 1 => {},
236 an @ (9 | 17 | 35 | 53) => { n_hal += 1; halogen_an = an; }
237 _ => return Err(IupacError::NotSupported),
238 }
239 }
240
241 let sub_set: HashSet<AtomIdx> = sub_atoms.iter().copied().collect();
242 let has_triple = mol.bonds().any(|(_, b)| {
243 b.order == BondOrder::Triple
244 && (sub_set.contains(&b.atom1) || sub_set.contains(&b.atom2))
245 });
246 let has_double = mol.bonds().any(|(_, b)| {
247 b.order == BondOrder::Double
248 && (sub_set.contains(&b.atom1) || sub_set.contains(&b.atom2))
249 });
250
251 match (n_c, n_n, n_o, n_hal, has_double, has_triple) {
252 (0, 0, 1, 0, false, false) => Ok("phenol".into()),
254 (0, 1, 0, 0, false, false) => Ok("aniline".into()),
256 (0, 0, 0, 1, false, false) => {
258 let prefix = match halogen_an {
259 9 => "fluoro", 17 => "chloro", 35 => "bromo", 53 => "iodo", _ => return Err(IupacError::NotSupported),
260 };
261 Ok(format!("{prefix}benzene"))
262 }
263 (1, 0, 0, 0, false, false) => Ok("toluene".into()),
265 (1, 0, 1, 0, true, false) => Ok("benzaldehyde".into()),
267 (1, 0, 2, 0, true, false) => Ok("benzoic acid".into()),
269 (1, 1, 0, 0, false, true) => Ok("benzonitrile".into()),
271 _ => Err(IupacError::NotSupported),
272 }
273 }
274
275 fn name_cycloalkane(
280 &self,
281 ring_atoms: &HashSet<AtomIdx>,
282 carbons: &[AtomIdx],
283 ) -> Result<String, IupacError> {
284 let mol = self.mol;
285 if carbons.iter().any(|&c| mol.atom(c).aromatic) {
286 return Err(IupacError::NotSupported);
287 }
288 if ring_atoms.len() == carbons.len() {
290 return Ok(format!("cyclo{}", alkane_suffix(ring_atoms.len())));
291 }
292 let outside: Vec<AtomIdx> = carbons.iter()
293 .filter(|&&c| !ring_atoms.contains(&c))
294 .copied()
295 .collect();
296
297 let is_terminal_methyl = |sub: AtomIdx| -> bool {
298 mol.neighbors(sub)
299 .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() == 6 && !ring_atoms.contains(nb))
300 .count() == 0
301 };
302
303 if outside.len() == 1 && is_terminal_methyl(outside[0]) {
304 return Ok(format!("methylcyclo{}", alkane_suffix(ring_atoms.len())));
305 }
306
307 if outside.len() == 2 && is_terminal_methyl(outside[0]) && is_terminal_methyl(outside[1]) {
308 let att_a = mol.neighbors(outside[0])
309 .find(|(nb, _)| ring_atoms.contains(nb))
310 .map(|(nb, _)| nb)
311 .ok_or(IupacError::NotSupported)?;
312 let att_b = mol.neighbors(outside[1])
313 .find(|(nb, _)| ring_atoms.contains(nb))
314 .map(|(nb, _)| nb)
315 .ok_or(IupacError::NotSupported)?;
316 let raw_dist = {
318 let mut dist = 0usize;
319 let mut queue = VecDeque::new();
320 let mut visited: HashSet<AtomIdx> = HashSet::new();
321 queue.push_back((att_a, 0usize));
322 visited.insert(att_a);
323 'bfs: while let Some((cur, d)) = queue.pop_front() {
324 if cur == att_b { dist = d; break 'bfs; }
325 for (nb, _) in mol.neighbors(cur) {
326 if ring_atoms.contains(&nb) && visited.insert(nb) {
327 queue.push_back((nb, d + 1));
328 }
329 }
330 }
331 dist
332 };
333 let ring_dist = raw_dist.min(ring_atoms.len() - raw_dist);
334 return Ok(format!("1,{}-dimethylcyclo{}", ring_dist + 1, alkane_suffix(ring_atoms.len())));
335 }
336
337 Err(IupacError::NotSupported)
338 }
339
340 fn name_cycloalkanol(
345 &self,
346 ring_atoms: &HashSet<AtomIdx>,
347 carbons: &[AtomIdx],
348 o_atoms: &[AtomIdx],
349 ) -> Result<String, IupacError> {
350 let mol = self.mol;
351 if o_atoms.len() != 1 { return Err(IupacError::NotSupported); }
353 let o_idx = o_atoms[0];
354 if mol.neighbors(o_idx).any(|(_, bi)| mol.bond(bi).order == BondOrder::Double) {
356 return Err(IupacError::NotSupported);
357 }
358 if implicit_hcount(mol, o_idx) == 0 {
360 return Err(IupacError::NotSupported);
361 }
362 let exo_c = carbons.iter().filter(|&&c| !ring_atoms.contains(&c)).count();
364 if exo_c > 0 { return Err(IupacError::NotSupported); }
365 Ok(format!("cyclo{}ol", alkane_base(ring_atoms.len())))
366 }
367
368 fn name_acyclic_hydrocarbon(&self, carbons: &[AtomIdx]) -> Result<String, IupacError> {
373 let mol = self.mol;
374 let n = carbons.len();
375
376 let double_bonds = mol.bonds().filter(|(_, b)| b.order == BondOrder::Double).count();
377 let triple_bonds = mol.bonds().filter(|(_, b)| b.order == BondOrder::Triple).count();
378 if double_bonds > 1 || triple_bonds > 1 || (double_bonds > 0 && triple_bonds > 0) {
379 return Err(IupacError::NotSupported);
380 }
381
382 let c_set: HashSet<AtomIdx> = carbons.iter().copied().collect();
384 let is_branched = carbons.iter().any(|&c| {
385 mol.neighbors(c).filter(|(nb, _)| c_set.contains(nb)).count() > 2
386 });
387
388 if is_branched {
389 if double_bonds > 0 || triple_bonds > 0 {
391 return Err(IupacError::NotSupported);
392 }
393 return self.name_branched_alkane(carbons);
394 }
395
396 if triple_bonds == 1 {
397 if n >= 4 {
398 let pos = unsaturation_locant(mol, carbons, BondOrder::Triple);
399 Ok(format!("{}-{}-yne", alkane_stem(n), pos))
400 } else {
401 Ok(alkyne_suffix(n))
402 }
403 } else if double_bonds == 1 {
404 if n >= 4 {
405 let pos = unsaturation_locant(mol, carbons, BondOrder::Double);
406 Ok(format!("{}-{}-ene", alkane_stem(n), pos))
407 } else {
408 Ok(alkene_suffix(n))
409 }
410 } else {
411 Ok(alkane_suffix(n))
412 }
413 }
414
415 fn name_one_oxygen(&self, carbons: &[AtomIdx], o_idx: AtomIdx) -> Result<String, IupacError> {
420 let mol = self.mol;
421 let is_double = mol.neighbors(o_idx).any(|(_, bi)| mol.bond(bi).order == BondOrder::Double);
422
423 if !is_double {
424 let o_c_nb: Vec<AtomIdx> = mol.neighbors(o_idx)
426 .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() == 6)
427 .map(|(nb, _)| nb)
428 .collect();
429 if o_c_nb.len() == 2 && implicit_hcount(mol, o_idx) == 0 {
430 return self.name_ether(carbons, o_idx, o_c_nb[0], o_c_nb[1]);
431 }
432
433 let oh_c = mol.neighbors(o_idx)
435 .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() == 6)
436 .map(|(nb, _)| nb)
437 .next()
438 .ok_or(IupacError::NotSupported)?;
439
440 let c_set: HashSet<AtomIdx> = carbons.iter().copied().collect();
442 let is_branched = carbons.iter().any(|&c| {
443 mol.neighbors(c).filter(|(nb, _)| c_set.contains(nb)).count() > 2
444 });
445 if is_branched {
446 return self.name_branched_alcohol(carbons, oh_c);
447 }
448
449 let chain = find_longest_c_chain(mol, carbons);
451 let n = chain.len();
452 let pos_fwd = chain.iter().position(|&c| c == oh_c).map(|p| p + 1).unwrap_or(1);
453 let pos = pos_fwd.min(n + 1 - pos_fwd);
454 if pos == 1 && n <= 2 {
455 return Ok(format!("{}anol", alkane_stem(n)));
457 }
458 return Ok(format!("{}-{}-ol", alkane_base(n), pos));
459 }
460
461 let carbonyl_c = mol
463 .neighbors(o_idx)
464 .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() == 6)
465 .map(|(nb, _)| nb)
466 .next()
467 .ok_or(IupacError::NotSupported)?;
468
469 if implicit_hcount(mol, carbonyl_c) > 0 {
470 let c_set: HashSet<AtomIdx> = carbons.iter().copied().collect();
472 let chain = chain_from_anchor(mol, &c_set, carbonyl_c);
473 let n = chain.len();
474 let chain_set: HashSet<AtomIdx> = chain.iter().copied().collect();
475 let mut subs: Vec<(usize, usize)> = Vec::new();
476 for (pos0, &chain_c) in chain.iter().enumerate() {
477 if pos0 == 0 { continue; }
478 let position = pos0 + 1;
479 for (nb, _) in mol.neighbors(chain_c) {
480 if c_set.contains(&nb) && !chain_set.contains(&nb) {
481 let sub_len = count_c_chain(mol, nb, chain_c);
482 if sub_len > 4 { return Err(IupacError::NotSupported); }
483 subs.push((position, sub_len));
484 }
485 }
486 }
487 let prefix = if subs.is_empty() { String::new() } else { format_substituents(&subs) };
488 return Ok(format!("{}{}anal", prefix, alkane_stem(n)));
489 }
490
491 let chain = find_longest_c_chain(mol, carbons);
493 let n = chain.len();
494 if n < 3 { return Err(IupacError::NotSupported); }
495 let chain_set: HashSet<AtomIdx> = chain.iter().copied().collect();
496 let all_c_set: HashSet<AtomIdx> = carbons.iter().copied().collect();
497 let pos_fwd = chain.iter().position(|&c| c == carbonyl_c)
498 .map(|p| p + 1).ok_or(IupacError::NotSupported)?;
499 let pos = pos_fwd.min(n + 1 - pos_fwd);
500 let reversed = pos_fwd > n + 1 - pos_fwd;
501 let mut subs: Vec<(usize, usize)> = Vec::new();
503 for (idx, &chain_c) in chain.iter().enumerate() {
504 let position = idx + 1;
505 for (nb, _) in mol.neighbors(chain_c) {
506 if all_c_set.contains(&nb) && !chain_set.contains(&nb) {
507 let sub_len = count_c_chain(mol, nb, chain_c);
508 if sub_len > 4 { return Err(IupacError::NotSupported); }
509 let adj_pos = if reversed { n + 1 - position } else { position };
510 subs.push((adj_pos, sub_len));
511 }
512 }
513 }
514 let prefix = if subs.is_empty() { String::new() } else { format_substituents(&subs) };
515 Ok(format!("{}{}-{}-one", prefix, alkane_base(n), pos))
516 }
517
518 fn name_ether(
523 &self,
524 carbons: &[AtomIdx],
525 o_idx: AtomIdx,
526 side_a: AtomIdx,
527 side_b: AtomIdx,
528 ) -> Result<String, IupacError> {
529 let mol = self.mol;
530 let c_set: HashSet<AtomIdx> = carbons.iter().copied().collect();
532 if carbons.iter().any(|&c| {
533 mol.neighbors(c).filter(|(nb, _)| c_set.contains(nb)).count() > 2
534 }) {
535 return Err(IupacError::NotSupported);
536 }
537 let len_a = count_c_chain(mol, side_a, o_idx);
538 let len_b = count_c_chain(mol, side_b, o_idx);
539 let (alkoxy_len, parent_len) = if len_a <= len_b { (len_a, len_b) } else { (len_b, len_a) };
540 let alkoxy = format!("{}oxy", alkane_stem(alkoxy_len));
541 let parent = alkane_suffix(parent_len);
542 if parent_len >= 3 && alkoxy_len != parent_len {
544 Ok(format!("1-{alkoxy}{parent}"))
545 } else {
546 Ok(format!("{alkoxy}{parent}"))
547 }
548 }
549
550 fn name_two_oxygens(&self, carbons: &[AtomIdx], o_atoms: &[AtomIdx]) -> Result<String, IupacError> {
555 let mol = self.mol;
556 let o1 = o_atoms[0];
557 let o2 = o_atoms[1];
558
559 let o1_dbl = mol.neighbors(o1).any(|(_, bi)| mol.bond(bi).order == BondOrder::Double);
560 let o2_dbl = mol.neighbors(o2).any(|(_, bi)| mol.bond(bi).order == BondOrder::Double);
561
562 let (carbonyl_o, ester_o) = match (o1_dbl, o2_dbl) {
563 (true, false) => (o1, o2),
564 (false, true) => (o2, o1),
565 _ => return Err(IupacError::NotSupported),
566 };
567
568 let carbonyl_c = mol
570 .neighbors(carbonyl_o)
571 .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() == 6)
572 .map(|(nb, _)| nb)
573 .next()
574 .ok_or(IupacError::NotSupported)?;
575
576 if !mol.neighbors(carbonyl_c).any(|(nb, _)| nb == ester_o) {
578 return Err(IupacError::NotSupported);
579 }
580
581 let alcohol_c = mol
583 .neighbors(ester_o)
584 .filter(|(nb, _)| *nb != carbonyl_c && mol.atom(*nb).element.atomic_number() == 6)
585 .map(|(nb, _)| nb)
586 .next();
587
588 let c_set: HashSet<AtomIdx> = carbons.iter().copied().collect();
589 if let Some(alc_c) = alcohol_c {
590 let chain_acid = chain_from_anchor(mol, &c_set, carbonyl_c);
592 let acid_n = chain_acid.len();
593 let chain_acid_set: HashSet<AtomIdx> = chain_acid.iter().copied().collect();
594 let mut subs: Vec<(usize, usize)> = Vec::new();
595 for (pos0, &chain_c) in chain_acid.iter().enumerate() {
596 if pos0 == 0 { continue; }
597 let position = pos0 + 1;
598 for (nb, _) in mol.neighbors(chain_c) {
599 if c_set.contains(&nb) && !chain_acid_set.contains(&nb) {
600 let sub_len = count_c_chain(mol, nb, chain_c);
601 if sub_len > 4 { return Err(IupacError::NotSupported); }
602 subs.push((position, sub_len));
603 }
604 }
605 }
606 let alcohol_n = count_c_chain(mol, alc_c, ester_o);
607 let acid_part = if subs.is_empty() {
608 format!("{}anoate", alkane_stem(acid_n))
609 } else {
610 format!("{}{}anoate", format_substituents(&subs), alkane_stem(acid_n))
611 };
612 Ok(format!("{}yl {}", alkane_stem(alcohol_n), acid_part))
613 } else {
614 let chain = chain_from_anchor(mol, &c_set, carbonyl_c);
616 let n = chain.len();
617 let chain_set: HashSet<AtomIdx> = chain.iter().copied().collect();
618 let mut subs: Vec<(usize, usize)> = Vec::new();
619 for (pos0, &chain_c) in chain.iter().enumerate() {
620 if pos0 == 0 { continue; }
621 let position = pos0 + 1;
622 for (nb, _) in mol.neighbors(chain_c) {
623 if c_set.contains(&nb) && !chain_set.contains(&nb) {
624 let sub_len = count_c_chain(mol, nb, chain_c);
625 if sub_len > 4 { return Err(IupacError::NotSupported); }
626 subs.push((position, sub_len));
627 }
628 }
629 }
630 if subs.is_empty() {
631 Ok(format!("{}anoic acid", alkane_stem(n)))
632 } else {
633 Ok(format!("{}{}anoic acid", format_substituents(&subs), alkane_stem(n)))
634 }
635 }
636 }
637
638 fn name_amide(
643 &self,
644 _carbons: &[AtomIdx],
645 o_idx: AtomIdx,
646 n_idx: AtomIdx,
647 ) -> Result<String, IupacError> {
648 let mol = self.mol;
649
650 if !mol.neighbors(o_idx).any(|(_, bi)| mol.bond(bi).order == BondOrder::Double) {
652 return Err(IupacError::NotSupported);
653 }
654
655 let carbonyl_c = mol
656 .neighbors(o_idx)
657 .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() == 6)
658 .map(|(nb, _)| nb)
659 .next()
660 .ok_or(IupacError::NotSupported)?;
661
662 if !mol.neighbors(carbonyl_c).any(|(nb, _)| nb == n_idx) {
664 return Err(IupacError::NotSupported);
665 }
666
667 if implicit_hcount(mol, n_idx) == 0 {
669 return Err(IupacError::NotSupported);
670 }
671
672 let c_set: HashSet<AtomIdx> = mol.atoms()
674 .filter(|(_, a)| a.element.atomic_number() == 6)
675 .map(|(i, _)| i)
676 .collect();
677 let chain = chain_from_anchor(mol, &c_set, carbonyl_c);
678 let n = chain.len();
679 let chain_set: HashSet<AtomIdx> = chain.iter().copied().collect();
680 let mut subs: Vec<(usize, usize)> = Vec::new();
681 for (pos0, &chain_c) in chain.iter().enumerate() {
682 if pos0 == 0 { continue; }
683 let position = pos0 + 1;
684 for (nb, _) in mol.neighbors(chain_c) {
685 if c_set.contains(&nb) && !chain_set.contains(&nb) {
686 let sub_len = count_c_chain(mol, nb, chain_c);
687 if sub_len > 4 { return Err(IupacError::NotSupported); }
688 subs.push((position, sub_len));
689 }
690 }
691 }
692 let prefix = if subs.is_empty() { String::new() } else { format_substituents(&subs) };
693 Ok(format!("{}{}anamide", prefix, alkane_stem(n)))
694 }
695
696 fn name_amine(&self, carbons: &[AtomIdx], n_idx: AtomIdx) -> Result<String, IupacError> {
701 let mol = self.mol;
702 let n_h = implicit_hcount(mol, n_idx);
703 let c_sides: Vec<AtomIdx> = mol.neighbors(n_idx)
704 .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() == 6)
705 .map(|(nb, _)| nb)
706 .collect();
707 let mut chain_lens: Vec<usize> = c_sides.iter()
708 .map(|&nb| count_c_chain(mol, nb, n_idx))
709 .collect();
710 chain_lens.sort_unstable_by(|a, b| b.cmp(a)); match n_h {
712 2 => {
713 let chain = find_longest_c_chain(mol, carbons);
715 let n_chain = chain.len();
716 let chain_set: HashSet<AtomIdx> = chain.iter().copied().collect();
717 let amine_c = mol.neighbors(n_idx)
718 .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() == 6
719 && chain_set.contains(nb))
720 .map(|(nb, _)| nb)
721 .next()
722 .ok_or(IupacError::NotSupported)?;
723 let pos_fwd = chain.iter().position(|&c| c == amine_c)
724 .map(|p| p + 1).unwrap_or(1);
725 let pos = pos_fwd.min(n_chain + 1 - pos_fwd);
726 Ok(format!("{}an-{}-amine", alkane_stem(n_chain), pos))
727 }
728 1 => {
729 if chain_lens.len() != 2 { return Err(IupacError::NotSupported); }
730 let parent_len = chain_lens[0];
731 let sub_len = chain_lens[1];
732 Ok(format!("N-{}yl{}anamine", alkane_stem(sub_len), alkane_stem(parent_len)))
733 }
734 0 => {
735 if chain_lens.len() != 3 { return Err(IupacError::NotSupported); }
736 let parent_len = chain_lens[0];
737 let sub1 = chain_lens[1];
738 let sub2 = chain_lens[2];
739 if sub1 == sub2 {
740 Ok(format!("N,N-di{}yl{}anamine", alkane_stem(sub1), alkane_stem(parent_len)))
741 } else {
742 let (lo, hi) = (sub1.min(sub2), sub1.max(sub2));
743 Ok(format!("N-{}yl-N-{}yl{}anamine", alkane_stem(lo), alkane_stem(hi), alkane_stem(parent_len)))
744 }
745 }
746 _ => Err(IupacError::NotSupported),
747 }
748 }
749
750 fn name_haloalkane(
755 &self,
756 carbons: &[AtomIdx],
757 halogen_atoms: &[AtomIdx],
758 prefix: &str,
759 ) -> Result<String, IupacError> {
760 let mol = self.mol;
761 let chain = find_longest_c_chain(mol, carbons);
762 let n = chain.len();
763 let chain_set: HashSet<AtomIdx> = chain.iter().copied().collect();
764
765 let mut locants: Vec<usize> = Vec::new();
767 for &hal in halogen_atoms {
768 let hal_c = mol.neighbors(hal)
769 .filter(|(nb, _)| chain_set.contains(nb))
770 .map(|(nb, _)| nb)
771 .next()
772 .ok_or(IupacError::NotSupported)?;
773 let pos = chain.iter().position(|&c| c == hal_c).map(|p| p + 1)
774 .ok_or(IupacError::NotSupported)?;
775 locants.push(pos);
776 }
777
778 let locants_rev: Vec<usize> = locants.iter().map(|&p| n + 1 - p).collect();
780 let best = if locants.iter().min() <= locants_rev.iter().min() {
781 locants
782 } else {
783 locants_rev
784 };
785
786 let count = halogen_atoms.len();
787 let mult = match count {
788 1 => prefix.to_string(),
789 2 => format!("di{prefix}"),
790 3 => format!("tri{prefix}"),
791 _ => return Err(IupacError::NotSupported),
792 };
793
794 let mut sorted_locs = best;
795 sorted_locs.sort_unstable();
796 let locant_str = sorted_locs.iter().map(|l| l.to_string()).collect::<Vec<_>>().join(",");
797
798 if n <= 2 && count == 1 {
800 Ok(format!("{mult}{}", alkane_suffix(n)))
801 } else {
802 Ok(format!("{locant_str}-{mult}{}", alkane_suffix(n)))
803 }
804 }
805
806 fn name_thiol(&self, carbons: &[AtomIdx], s_idx: AtomIdx) -> Result<String, IupacError> {
811 let mol = self.mol;
812 if implicit_hcount(mol, s_idx) == 0 {
813 return Err(IupacError::NotSupported);
814 }
815 let chain = find_longest_c_chain(mol, carbons);
816 let n = chain.len();
817 let chain_set: HashSet<AtomIdx> = chain.iter().copied().collect();
818 let thiol_c = mol.neighbors(s_idx)
819 .filter(|(nb, _)| chain_set.contains(nb))
820 .map(|(nb, _)| nb)
821 .next()
822 .ok_or(IupacError::NotSupported)?;
823 let pos_fwd = chain.iter().position(|&c| c == thiol_c).map(|p| p + 1).unwrap_or(1);
824 let pos = pos_fwd.min(n + 1 - pos_fwd);
825 if pos == 1 {
827 Ok(format!("{}anethiol", alkane_stem(n)))
828 } else {
829 Ok(format!("{}ane-{}-thiol", alkane_stem(n), pos))
830 }
831 }
832
833 fn name_branched_alcohol(
838 &self,
839 carbons: &[AtomIdx],
840 oh_c: AtomIdx,
841 ) -> Result<String, IupacError> {
842 let chain = find_longest_c_chain(self.mol, carbons);
844 let n = chain.len();
845 if n < 2 { return Err(IupacError::NotSupported); }
846
847 let chain_set: HashSet<AtomIdx> = chain.iter().copied().collect();
848 let all_c_set: HashSet<AtomIdx> = carbons.iter().copied().collect();
849
850 let pos_on_chain = if chain_set.contains(&oh_c) {
852 chain.iter().position(|&c| c == oh_c).map(|p| p + 1)
853 } else {
854 None
855 };
856
857 let pos_fwd = pos_on_chain.ok_or(IupacError::NotSupported)?;
858 let pos = pos_fwd.min(n + 1 - pos_fwd);
859
860 let mut subs: Vec<(usize, usize)> = Vec::new();
862 for (pos0, &chain_c) in chain.iter().enumerate() {
863 let position = pos0 + 1;
864 for (nb, _) in self.mol.neighbors(chain_c) {
865 if all_c_set.contains(&nb) && !chain_set.contains(&nb) {
866 let sub_len = count_c_chain(self.mol, nb, chain_c);
867 if sub_len > 4 { return Err(IupacError::NotSupported); }
868 subs.push((position, sub_len));
869 }
870 }
871 }
872
873 if pos_fwd > n + 1 - pos_fwd {
875 subs = subs.iter().map(|&(p, l)| (n + 1 - p, l)).collect();
877 }
878
879 let prefix = if subs.is_empty() {
880 String::new()
881 } else {
882 subs.sort_unstable();
883 let subs_rev: Vec<(usize, usize)> = subs.iter()
884 .map(|&(p, l)| (n + 1 - p, l))
885 .collect();
886 let first_fwd = subs.iter().map(|&(p, _)| p).min().unwrap_or(usize::MAX);
887 let first_rev = subs_rev.iter().map(|&(p, _)| p).min().unwrap_or(usize::MAX);
888 let best = if first_fwd <= first_rev { subs.clone() } else { subs_rev };
889 format!("{}-", format_substituents(&best))
890 };
891
892 Ok(format!("{}{}-{}-ol", prefix, alkane_base(n), pos))
893 }
894
895 fn name_disubstituted_benzene(
900 &self,
901 ring_atoms: &HashSet<AtomIdx>,
902 _sub_atoms: &[AtomIdx],
903 ) -> Result<String, IupacError> {
904 let mol = self.mol;
905
906 let attach_points: Vec<AtomIdx> = ring_atoms.iter()
908 .filter(|&&r| mol.neighbors(r).any(|(nb, _)| !ring_atoms.contains(&nb)))
909 .copied()
910 .collect();
911 if attach_points.len() != 2 {
912 return Err(IupacError::NotSupported);
913 }
914
915 let ring_dist = {
917 let ring_vec: Vec<AtomIdx> = ring_atoms.iter().copied().collect();
918 let mut dist = usize::MAX;
919 let mut queue = VecDeque::new();
921 let mut visited: HashSet<AtomIdx> = HashSet::new();
922 queue.push_back((attach_points[0], 0usize));
923 visited.insert(attach_points[0]);
924 while let Some((cur, d)) = queue.pop_front() {
925 if cur == attach_points[1] { dist = d; break; }
926 for (nb, _) in mol.neighbors(cur) {
927 if ring_atoms.contains(&nb) && visited.insert(nb) {
928 queue.push_back((nb, d + 1));
929 }
930 }
931 }
932 dist.min(ring_vec.len() - dist)
934 };
935
936 let classify_sub = |attach: AtomIdx| -> Option<(&str, bool)> {
938 let direct: Vec<AtomIdx> = mol.neighbors(attach)
943 .filter(|(nb, _)| !ring_atoms.contains(nb))
944 .map(|(nb, _)| nb)
945 .collect();
946 if direct.is_empty() { return None; }
947 let first = direct[0];
948 let an = mol.atom(first).element.atomic_number();
949 match an {
950 8 if !mol.neighbors(first).any(|(_, bi)| mol.bond(bi).order == BondOrder::Double) => {
951 Some(("hydroxy", true)) }
953 7 if implicit_hcount(mol, first) > 0 => Some(("amino", true)), 6 => Some(("methyl", false)), 17 => Some(("chloro", false)),
956 35 => Some(("bromo", false)),
957 9 => Some(("fluoro", false)),
958 53 => Some(("iodo", false)),
959 _ => None,
960 }
961 };
962
963 let sub_a = classify_sub(attach_points[0]);
964 let sub_b = classify_sub(attach_points[1]);
965
966 let (sub_a, sub_b) = match (sub_a, sub_b) {
967 (Some(a), Some(b)) => (a, b),
968 _ => return Err(IupacError::NotSupported),
969 };
970
971 let pos2 = ring_dist + 1; let (prefix_sub, root_name) = if sub_a.1 {
976 let root = match sub_a.0 {
978 "hydroxy" => "phenol",
979 "amino" => "aniline",
980 _ => return Err(IupacError::NotSupported),
981 };
982 (sub_b.0, root)
983 } else if sub_b.1 {
984 let root = match sub_b.0 {
985 "hydroxy" => "phenol",
986 "amino" => "aniline",
987 _ => return Err(IupacError::NotSupported),
988 };
989 (sub_a.0, root)
990 } else {
991 let (s1, s2) = if sub_a.0 <= sub_b.0 {
994 (sub_a.0, sub_b.0)
995 } else {
996 (sub_b.0, sub_a.0)
997 };
998 return if s1 == s2 {
999 Ok(format!("1,{}-di{}benzene", pos2, s1))
1000 } else {
1001 Ok(format!("1-{}-{}-{}benzene", s1, pos2, s2))
1002 };
1003 };
1004
1005 Ok(format!("{}-{}{}", pos2, prefix_sub, root_name))
1006 }
1007
1008 fn name_trisubstituted_benzene(
1013 &self,
1014 ring_atoms: &HashSet<AtomIdx>,
1015 ) -> Result<String, IupacError> {
1016 let mol = self.mol;
1017 let attach_points: Vec<AtomIdx> = ring_atoms.iter()
1018 .filter(|&&r| mol.neighbors(r).any(|(nb, _)| !ring_atoms.contains(&nb)))
1019 .copied()
1020 .collect();
1021 if attach_points.len() != 3 {
1022 return Err(IupacError::NotSupported);
1023 }
1024
1025 let locant_map = best_benzene_locants(mol, ring_atoms, &attach_points);
1026
1027 let mut sub_list: Vec<(usize, String)> = Vec::new();
1029 for &(locant, attach) in &locant_map {
1030 let sub = self.classify_benzene_sub_simple(attach, ring_atoms)
1031 .ok_or(IupacError::NotSupported)?;
1032 sub_list.push((locant, sub));
1033 }
1034
1035 sub_list.sort_by(|a, b| a.1.cmp(&b.1).then(a.0.cmp(&b.0)));
1037
1038 let mut groups: Vec<(String, Vec<usize>)> = Vec::new();
1040 for (locant, name) in sub_list {
1041 if let Some(last) = groups.last_mut() {
1042 if last.0 == name {
1043 last.1.push(locant);
1044 continue;
1045 }
1046 }
1047 groups.push((name, vec![locant]));
1048 }
1049
1050 let mut parts: Vec<String> = Vec::new();
1051 for (name, mut locs) in groups {
1052 locs.sort_unstable();
1053 let locant_str = locs.iter().map(|l| l.to_string()).collect::<Vec<_>>().join(",");
1054 let mult = match locs.len() {
1055 1 => String::new(),
1056 2 => "di".to_string(),
1057 3 => "tri".to_string(),
1058 _ => return Err(IupacError::NotSupported),
1059 };
1060 parts.push(format!("{}-{}{}", locant_str, mult, name));
1061 }
1062
1063 Ok(format!("{}benzene", parts.join("-")))
1064 }
1065
1066 fn classify_benzene_sub_simple(
1068 &self,
1069 attach: AtomIdx,
1070 ring_atoms: &HashSet<AtomIdx>,
1071 ) -> Option<String> {
1072 let mol = self.mol;
1073 let direct: Vec<AtomIdx> = mol.neighbors(attach)
1074 .filter(|(nb, _)| !ring_atoms.contains(nb))
1075 .map(|(nb, _)| nb)
1076 .collect();
1077 if direct.is_empty() { return None; }
1078 let first = direct[0];
1079 match mol.atom(first).element.atomic_number() {
1080 6 => Some("methyl".to_string()),
1081 7 => Some("amino".to_string()),
1082 8 => Some("hydroxy".to_string()),
1083 9 => Some("fluoro".to_string()),
1084 17 => Some("chloro".to_string()),
1085 35 => Some("bromo".to_string()),
1086 53 => Some("iodo".to_string()),
1087 _ => None,
1088 }
1089 }
1090
1091 fn is_nitrile(&self, n_idx: AtomIdx) -> bool {
1096 self.mol.neighbors(n_idx)
1097 .any(|(_, bi)| self.mol.bond(bi).order == BondOrder::Triple)
1098 }
1099
1100 fn name_nitrile(&self, carbons: &[AtomIdx], n_idx: AtomIdx) -> Result<String, IupacError> {
1101 let mol = self.mol;
1102 let nitrile_c = mol.neighbors(n_idx)
1104 .filter(|(_, bi)| mol.bond(*bi).order == BondOrder::Triple)
1105 .map(|(nb, _)| nb)
1106 .next()
1107 .ok_or(IupacError::NotSupported)?;
1108 let n_carbons = count_c_chain(mol, nitrile_c, n_idx);
1111 if n_carbons == 0 { return Err(IupacError::NotSupported); }
1113 let c_set: std::collections::HashSet<AtomIdx> = carbons.iter().copied().collect();
1115 for &c in carbons {
1116 if mol.neighbors(c)
1117 .filter(|(nb, _)| c_set.contains(nb))
1118 .count() > 2
1119 {
1120 return Err(IupacError::NotSupported); }
1122 }
1123 Ok(format!("{}enitrile", alkane_base(n_carbons)))
1124 }
1125
1126 fn name_branched_alkane(&self, carbons: &[AtomIdx]) -> Result<String, IupacError> {
1131 let mol = self.mol;
1132
1133 let chain = find_longest_c_chain(mol, carbons);
1135 let n = chain.len();
1136 if n < 2 {
1137 return Err(IupacError::NotSupported);
1138 }
1139
1140 let chain_set: std::collections::HashSet<AtomIdx> = chain.iter().copied().collect();
1141 let all_c_set: std::collections::HashSet<AtomIdx> = carbons.iter().copied().collect();
1142
1143 let mut subs: Vec<(usize, usize)> = Vec::new();
1145 for (pos0, &chain_c) in chain.iter().enumerate() {
1146 let position = pos0 + 1;
1147 for (nb, _) in mol.neighbors(chain_c) {
1148 if all_c_set.contains(&nb) && !chain_set.contains(&nb) {
1149 let sub_len = count_c_chain(mol, nb, chain_c);
1151 if sub_len > 4 {
1152 return Err(IupacError::NotSupported);
1153 }
1154 subs.push((position, sub_len));
1155 }
1156 }
1157 }
1158
1159 if subs.is_empty() {
1160 return Err(IupacError::NotSupported);
1161 }
1162
1163 let subs_rev: Vec<(usize, usize)> = subs.iter()
1165 .map(|&(pos, len)| (n + 1 - pos, len))
1166 .collect();
1167
1168 let first_fwd = subs.iter().map(|&(p, _)| p).min().unwrap_or(usize::MAX);
1169 let first_rev = subs_rev.iter().map(|&(p, _)| p).min().unwrap_or(usize::MAX);
1170 let best_subs = if first_fwd <= first_rev { subs } else { subs_rev };
1171
1172 Ok(format!(
1173 "{}{}",
1174 format_substituents(&best_subs),
1175 alkane_suffix(n)
1176 ))
1177 }
1178}
1179
1180fn atoms_of(mol: &Molecule, atomic_num: u8) -> Vec<AtomIdx> {
1185 mol.atoms()
1186 .filter(|(_, a)| a.element.atomic_number() == atomic_num)
1187 .map(|(i, _)| i)
1188 .collect()
1189}
1190
1191fn count_c_chain(mol: &Molecule, start: AtomIdx, blocked: AtomIdx) -> usize {
1193 let mut visited = HashSet::new();
1194 let mut queue = VecDeque::new();
1195 visited.insert(start);
1196 queue.push_back(start);
1197 while let Some(cur) = queue.pop_front() {
1198 for (nb, _) in mol.neighbors(cur) {
1199 if nb == blocked { continue; }
1200 if mol.atom(nb).element.atomic_number() == 6 && visited.insert(nb) {
1201 queue.push_back(nb);
1202 }
1203 }
1204 }
1205 visited.len()
1206}
1207
1208fn find_longest_c_chain(mol: &Molecule, carbons: &[AtomIdx]) -> Vec<AtomIdx> {
1213 if carbons.is_empty() { return Vec::new(); }
1214
1215 let c_set: std::collections::HashSet<AtomIdx> = carbons.iter().copied().collect();
1216
1217 let bfs_far = |start: AtomIdx| -> (AtomIdx, std::collections::HashMap<AtomIdx, AtomIdx>) {
1219 let mut parent: std::collections::HashMap<AtomIdx, AtomIdx> = std::collections::HashMap::new();
1220 let mut visited: std::collections::HashSet<AtomIdx> = std::collections::HashSet::new();
1221 let mut queue = VecDeque::new();
1222 let mut farthest = start;
1223 visited.insert(start);
1224 queue.push_back(start);
1225 while let Some(cur) = queue.pop_front() {
1226 farthest = cur;
1227 for (nb, _) in mol.neighbors(cur) {
1228 if c_set.contains(&nb) && visited.insert(nb) {
1229 parent.insert(nb, cur);
1230 queue.push_back(nb);
1231 }
1232 }
1233 }
1234 (farthest, parent)
1235 };
1236
1237 let reconstruct = |end: AtomIdx, start: AtomIdx,
1238 parents: &std::collections::HashMap<AtomIdx, AtomIdx>| -> Vec<AtomIdx> {
1239 let mut path = vec![end];
1240 let mut cur = end;
1241 while cur != start {
1242 cur = parents[&cur];
1243 path.push(cur);
1244 }
1245 path.reverse();
1246 path
1247 };
1248
1249 let (end1, _) = bfs_far(carbons[0]);
1251 let (end2, parents) = bfs_far(end1);
1253
1254 reconstruct(end2, end1, &parents)
1255}
1256
1257fn format_substituents(subs: &[(usize, usize)]) -> String {
1259 let mut groups: std::collections::BTreeMap<&str, Vec<usize>> =
1261 std::collections::BTreeMap::new();
1262 for &(pos, len) in subs {
1263 let alkyl = match len {
1264 1 => "methyl",
1265 2 => "ethyl",
1266 3 => "propyl",
1267 4 => "butyl",
1268 _ => continue,
1269 };
1270 groups.entry(alkyl).or_default().push(pos);
1271 }
1272
1273 let mut parts: Vec<String> = Vec::new();
1274 for (alkyl, mut positions) in groups {
1275 positions.sort_unstable();
1276 let locants = positions.iter().map(|p| p.to_string()).collect::<Vec<_>>().join(",");
1277 let mult = match positions.len() {
1278 1 => String::new(),
1279 2 => "di".to_string(),
1280 3 => "tri".to_string(),
1281 _ => "?".to_string(),
1282 };
1283 parts.push(format!("{}-{}{}", locants, mult, alkyl));
1284 }
1285 parts.join("-")
1286}
1287
1288fn chain_from_anchor(
1290 mol: &Molecule,
1291 c_set: &HashSet<AtomIdx>,
1292 anchor: AtomIdx,
1293) -> Vec<AtomIdx> {
1294 let mut parent: std::collections::HashMap<AtomIdx, AtomIdx> =
1295 std::collections::HashMap::new();
1296 let mut visited: HashSet<AtomIdx> = HashSet::new();
1297 let mut queue = VecDeque::new();
1298 let mut farthest = anchor;
1299 visited.insert(anchor);
1300 queue.push_back(anchor);
1301 while let Some(cur) = queue.pop_front() {
1302 farthest = cur;
1303 for (nb, _) in mol.neighbors(cur) {
1304 if c_set.contains(&nb) && visited.insert(nb) {
1305 parent.insert(nb, cur);
1306 queue.push_back(nb);
1307 }
1308 }
1309 }
1310 let mut path = vec![farthest];
1311 let mut cur = farthest;
1312 while cur != anchor { cur = parent[&cur]; path.push(cur); }
1313 path.reverse();
1314 path
1315}
1316
1317fn unsaturation_locant(mol: &Molecule, carbons: &[AtomIdx], order: BondOrder) -> usize {
1319 let chain = find_longest_c_chain(mol, carbons);
1320 let n = chain.len();
1321 for (_, b) in mol.bonds() {
1322 if b.order == order {
1323 if let (Some(p1), Some(p2)) = (
1324 chain.iter().position(|&c| c == b.atom1),
1325 chain.iter().position(|&c| c == b.atom2),
1326 ) {
1327 let fwd = p1.min(p2) + 1; let rev = n - p1.max(p2); return fwd.min(rev);
1330 }
1331 }
1332 }
1333 1
1334}
1335
1336fn ring_order_traversal(mol: &Molecule, ring_atoms: &HashSet<AtomIdx>) -> Vec<AtomIdx> {
1338 if ring_atoms.is_empty() { return Vec::new(); }
1339 let start = *ring_atoms.iter().next().unwrap();
1340 let mut order = vec![start];
1341 let first_nb = mol.neighbors(start).find(|(nb, _)| ring_atoms.contains(nb)).map(|(nb, _)| nb);
1342 let mut cur = match first_nb { Some(nb) => nb, None => return order };
1343 let mut prev = start;
1344 while cur != start {
1345 order.push(cur);
1346 let next = mol.neighbors(cur)
1347 .find(|(nb, _)| ring_atoms.contains(nb) && *nb != prev)
1348 .map(|(nb, _)| nb);
1349 prev = cur;
1350 match next { Some(nb) => cur = nb, None => break }
1351 }
1352 order
1353}
1354
1355fn best_benzene_locants(
1358 mol: &Molecule,
1359 ring_atoms: &HashSet<AtomIdx>,
1360 attach_points: &[AtomIdx],
1361) -> Vec<(usize, AtomIdx)> {
1362 let ring_order = ring_order_traversal(mol, ring_atoms);
1363 let ring_n = ring_order.len();
1364 if ring_n == 0 { return Vec::new(); }
1365 let n = attach_points.len();
1366 let pos_of: Vec<usize> = attach_points.iter()
1367 .map(|a| ring_order.iter().position(|r| r == a).unwrap_or(0))
1368 .collect();
1369 let mut best_locs: Option<Vec<usize>> = None;
1370 let mut best_assignment: Vec<(usize, AtomIdx)> = Vec::new();
1371 for start in 0..n {
1372 for &reverse in &[false, true] {
1373 let mut assignment: Vec<(usize, AtomIdx)> = Vec::new();
1374 for k in 0..n {
1375 let idx = (start + k) % n;
1376 let pos = if !reverse {
1377 (pos_of[idx] + ring_n - pos_of[start]) % ring_n
1378 } else {
1379 (pos_of[start] + ring_n - pos_of[idx]) % ring_n
1380 };
1381 assignment.push((pos + 1, attach_points[idx]));
1382 }
1383 assignment.sort_by_key(|&(l, _)| l);
1384 let locs: Vec<usize> = assignment.iter().map(|&(l, _)| l).collect();
1385 let is_better = best_locs.as_ref().map_or(true, |b| locs < *b);
1386 if is_better {
1387 best_locs = Some(locs);
1388 best_assignment = assignment;
1389 }
1390 }
1391 }
1392 best_assignment
1393}
1394
1395fn count_components(mol: &Molecule) -> usize {
1396 let n = mol.atom_count();
1397 if n == 0 { return 0; }
1398 let mut visited = vec![false; n];
1399 let mut count = 0;
1400 for start in 0..n {
1401 if visited[start] { continue; }
1402 count += 1;
1403 let mut queue = VecDeque::new();
1404 queue.push_back(AtomIdx(start as u32));
1405 visited[start] = true;
1406 while let Some(cur) = queue.pop_front() {
1407 for (nb, _) in mol.neighbors(cur) {
1408 if !visited[nb.0 as usize] {
1409 visited[nb.0 as usize] = true;
1410 queue.push_back(nb);
1411 }
1412 }
1413 }
1414 }
1415 count
1416}
1417
1418fn alkane_stem(n: usize) -> &'static str {
1423 match n {
1424 1 => "meth", 2 => "eth", 3 => "prop", 4 => "but",
1425 5 => "pent", 6 => "hex", 7 => "hept", 8 => "oct",
1426 9 => "non", 10 => "dec", _ => "long",
1427 }
1428}
1429
1430fn alkane_base(n: usize) -> String {
1432 format!("{}an", alkane_stem(n))
1433}
1434
1435fn alkane_suffix(n: usize) -> String {
1436 match n {
1437 1 => "methane".into(), 2 => "ethane".into(),
1438 3 => "propane".into(), 4 => "butane".into(),
1439 5 => "pentane".into(), 6 => "hexane".into(),
1440 7 => "heptane".into(), 8 => "octane".into(),
1441 9 => "nonane".into(), 10 => "decane".into(),
1442 11 => "undecane".into(), 12 => "dodecane".into(),
1443 13 => "tridecane".into(), 14 => "tetradecane".into(),
1444 15 => "pentadecane".into(), 16 => "hexadecane".into(),
1445 17 => "heptadecane".into(), 18 => "octadecane".into(),
1446 19 => "nonadecane".into(), 20 => "icosane".into(),
1447 _ => format!("{n}alkane"),
1448 }
1449}
1450
1451fn alkene_suffix(n: usize) -> String { alkane_suffix(n).replace("ane", "ene") }
1452fn alkyne_suffix(n: usize) -> String { alkane_suffix(n).replace("ane", "yne") }
1453
1454#[cfg(test)]
1459mod tests {
1460 use super::*;
1461 use chematic_smiles::parse;
1462
1463 fn mol(s: &str) -> Molecule { parse(s).unwrap() }
1464
1465 #[test]
1468 fn test_alkanes() {
1469 assert_eq!(name(&mol("C")).unwrap(), "methane");
1470 assert_eq!(name(&mol("CC")).unwrap(), "ethane");
1471 assert_eq!(name(&mol("CCC")).unwrap(), "propane");
1472 assert_eq!(name(&mol("CCCC")).unwrap(), "butane");
1473 assert_eq!(name(&mol("CCCCC")).unwrap(), "pentane");
1474 assert_eq!(name(&mol("CCCCCC")).unwrap(), "hexane");
1475 }
1476
1477 #[test]
1478 fn test_alkenes_alkynes() {
1479 assert_eq!(name(&mol("C=C")).unwrap(), "ethene");
1480 assert_eq!(name(&mol("CC=C")).unwrap(), "propene");
1481 assert_eq!(name(&mol("C#C")).unwrap(), "ethyne");
1482 assert_eq!(name(&mol("CC#C")).unwrap(), "propyne");
1483 }
1484
1485 #[test]
1486 fn test_cycloalkanes() {
1487 assert_eq!(name(&mol("C1CC1")).unwrap(), "cyclopropane");
1488 assert_eq!(name(&mol("C1CCC1")).unwrap(), "cyclobutane");
1489 assert_eq!(name(&mol("C1CCCC1")).unwrap(), "cyclopentane");
1490 assert_eq!(name(&mol("C1CCCCC1")).unwrap(),"cyclohexane");
1491 }
1492
1493 #[test]
1494 fn test_alcohol() {
1495 assert_eq!(name(&mol("CO")).unwrap(), "methanol");
1496 assert_eq!(name(&mol("CCO")).unwrap(), "ethanol");
1497 assert_eq!(name(&mol("CCCO")).unwrap(), "propan-1-ol");
1498 }
1499
1500 #[test]
1501 fn test_amine() {
1502 assert_eq!(name(&mol("CN")).unwrap(), "methan-1-amine");
1503 assert_eq!(name(&mol("CCN")).unwrap(), "ethan-1-amine");
1504 }
1505
1506 #[test]
1507 fn test_haloalkane() {
1508 assert_eq!(name(&mol("CCCl")).unwrap(), "chloroethane");
1509 assert_eq!(name(&mol("CCBr")).unwrap(), "bromoethane");
1510 assert_eq!(name(&mol("CF")).unwrap(), "fluoromethane");
1511 assert_eq!(name(&mol("CI")).unwrap(), "iodomethane");
1512 }
1513
1514 #[test]
1515 fn test_not_supported() {
1516 assert!(name(&mol("CC.CC")).is_err()); }
1518
1519 #[test]
1520 fn test_empty() {
1521 use chematic_core::MoleculeBuilder;
1522 let mol = MoleculeBuilder::new().build();
1523 assert_eq!(name(&mol), Err(IupacError::Empty));
1524 }
1525
1526 #[test]
1529 fn test_benzene() {
1530 assert_eq!(name(&mol("c1ccccc1")).unwrap(), "benzene");
1531 }
1532
1533 #[test]
1534 fn test_aromatic_heterocycles() {
1535 assert_eq!(name(&mol("c1ccncc1")).unwrap(), "pyridine");
1536 assert_eq!(name(&mol("c1ccoc1")).unwrap(), "furan");
1537 assert_eq!(name(&mol("c1ccsc1")).unwrap(), "thiophene");
1538 assert_eq!(name(&mol("c1cc[nH]c1")).unwrap(), "pyrrole");
1539 assert_eq!(name(&mol("c1cnc[nH]1")).unwrap(), "imidazole");
1540 }
1541
1542 #[test]
1545 fn test_ketones() {
1546 assert_eq!(name(&mol("CC(=O)C")).unwrap(), "propan-2-one");
1547 assert_eq!(name(&mol("CC(=O)CC")).unwrap(), "butan-2-one");
1548 assert_eq!(name(&mol("CCC(=O)CC")).unwrap(), "pentan-3-one");
1549 assert_eq!(name(&mol("CCCC(=O)C")).unwrap(), "pentan-2-one");
1550 }
1551
1552 #[test]
1555 fn test_carboxylic_acids() {
1556 assert_eq!(name(&mol("CC(=O)O")).unwrap(), "ethanoic acid");
1557 assert_eq!(name(&mol("CCC(=O)O")).unwrap(), "propanoic acid");
1558 assert_eq!(name(&mol("C(=O)O")).unwrap(), "methanoic acid");
1559 }
1560
1561 #[test]
1564 fn test_esters() {
1565 assert_eq!(name(&mol("CC(=O)OC")).unwrap(), "methyl ethanoate");
1566 assert_eq!(name(&mol("C(=O)OC")).unwrap(), "methyl methanoate");
1567 assert_eq!(name(&mol("CC(=O)OCC")).unwrap(), "ethyl ethanoate");
1568 }
1569
1570 #[test]
1573 fn test_amides() {
1574 assert_eq!(name(&mol("CC(=O)N")).unwrap(), "ethanamide");
1575 assert_eq!(name(&mol("C(=O)N")).unwrap(), "methanamide");
1576 assert_eq!(name(&mol("CCC(=O)N")).unwrap(), "propanamide");
1577 }
1578
1579 #[test]
1582 fn test_branched_alkanes() {
1583 assert_eq!(name(&mol("CC(C)C")).unwrap(), "2-methylpropane");
1584 assert_eq!(name(&mol("CC(C)CC")).unwrap(), "2-methylbutane");
1585 assert_eq!(name(&mol("CC(C)(C)C")).unwrap(), "2,2-dimethylpropane");
1586 assert_eq!(name(&mol("CCCC(C)CC")).unwrap(), "3-methylhexane");
1587 }
1588
1589 #[test]
1590 fn test_branched_alkane_lowest_locant() {
1591 assert_eq!(name(&mol("CCC(C)C")).unwrap(), "2-methylbutane");
1593 }
1594
1595 #[test]
1598 fn test_substituted_benzenes() {
1599 assert_eq!(name(&mol("c1ccccc1O")).unwrap(), "phenol");
1600 assert_eq!(name(&mol("c1ccccc1N")).unwrap(), "aniline");
1601 assert_eq!(name(&mol("c1ccccc1Cl")).unwrap(), "chlorobenzene");
1602 assert_eq!(name(&mol("c1ccccc1Br")).unwrap(), "bromobenzene");
1603 }
1604
1605 #[test]
1606 fn test_substituted_benzene_carbonyl() {
1607 assert_eq!(name(&mol("c1ccccc1C=O")).unwrap(), "benzaldehyde");
1608 assert_eq!(name(&mol("c1ccccc1C(=O)O")).unwrap(), "benzoic acid");
1609 }
1610
1611 #[test]
1614 fn test_nitriles() {
1615 assert_eq!(name(&mol("CC#N")).unwrap(), "ethanenitrile");
1616 assert_eq!(name(&mol("CCC#N")).unwrap(), "propanenitrile");
1617 }
1618
1619 #[test]
1622 fn test_thiols() {
1623 assert_eq!(name(&mol("CS")).unwrap(), "methanethiol");
1624 assert_eq!(name(&mol("CCS")).unwrap(), "ethanethiol");
1625 assert_eq!(name(&mol("CCCS")).unwrap(), "propanethiol");
1626 }
1627
1628 #[test]
1629 fn test_alcohol_locants() {
1630 assert_eq!(name(&mol("CCCCO")).unwrap(), "butan-1-ol");
1631 assert_eq!(name(&mol("CC(O)C")).unwrap(), "propan-2-ol");
1632 assert_eq!(name(&mol("CCC(O)C")).unwrap(), "butan-2-ol");
1633 }
1634
1635 #[test]
1636 fn test_disubstituted_benzene() {
1637 assert_eq!(name(&mol("Oc1ccc(Cl)cc1")).unwrap(), "4-chlorophenol");
1639 assert_eq!(name(&mol("c1ccc(O)cc1Cl")).unwrap(), "3-chlorophenol");
1641 }
1642
1643 #[test]
1644 fn test_methylcycloalkane() {
1645 assert_eq!(name(&mol("CC1CCCCC1")).unwrap(), "methylcyclohexane");
1646 assert_eq!(name(&mol("CC1CCCC1")).unwrap(), "methylcyclopentane");
1647 assert_eq!(name(&mol("CC1CCC1")).unwrap(), "methylcyclobutane");
1648 }
1649
1650 #[test]
1653 fn test_ethers() {
1654 assert_eq!(name(&mol("COC")).unwrap(), "methoxymethane");
1655 assert_eq!(name(&mol("COCC")).unwrap(), "methoxyethane");
1656 assert_eq!(name(&mol("CCOCC")).unwrap(), "ethoxyethane");
1657 assert_eq!(name(&mol("COCCC")).unwrap(), "1-methoxypropane");
1658 }
1659
1660 #[test]
1661 fn test_trimethylbenzene() {
1662 assert_eq!(name(&mol("Cc1cccc(C)c1C")).unwrap(), "1,2,3-trimethylbenzene");
1663 assert_eq!(name(&mol("Cc1ccc(C)cc1C")).unwrap(), "1,2,4-trimethylbenzene");
1664 assert_eq!(name(&mol("Cc1cc(C)cc(C)c1")).unwrap(), "1,3,5-trimethylbenzene");
1665 }
1666
1667 #[test]
1668 fn test_secondary_amine() {
1669 assert_eq!(name(&mol("CCNCC")).unwrap(), "N-ethylethanamine");
1670 assert_eq!(name(&mol("CNCC")).unwrap(), "N-methylethanamine");
1671 assert_eq!(name(&mol("CN(C)C")).unwrap(), "N,N-dimethylmethanamine");
1672 }
1673
1674 #[test]
1677 fn test_branched_aldehyde() {
1678 assert_eq!(name(&mol("CC(C)C=O")).unwrap(), "2-methylpropanal");
1679 assert_eq!(name(&mol("CCC(C)C=O")).unwrap(), "2-methylbutanal");
1680 }
1681
1682 #[test]
1683 fn test_branched_amide() {
1684 assert_eq!(name(&mol("CC(C)C(=O)N")).unwrap(), "2-methylpropanamide");
1685 assert_eq!(name(&mol("CCC(C)C(=O)N")).unwrap(), "2-methylbutanamide");
1686 }
1687
1688 #[test]
1691 fn test_branched_ester() {
1692 assert_eq!(name(&mol("CC(C)C(=O)OC")).unwrap(), "methyl 2-methylpropanoate");
1693 assert_eq!(name(&mol("CC(C)C(=O)OCC")).unwrap(), "ethyl 2-methylpropanoate");
1694 }
1695
1696 #[test]
1697 fn test_branched_ketone() {
1698 assert_eq!(name(&mol("CC(=O)C(C)C")).unwrap(), "3-methylbutan-2-one");
1699 assert_eq!(name(&mol("CC(=O)C(C)(C)C")).unwrap(), "3,3-dimethylbutan-2-one");
1700 }
1701
1702 #[test]
1705 fn test_secondary_thiol() {
1706 assert_eq!(name(&mol("CCC(S)C")).unwrap(), "butane-2-thiol");
1707 assert_eq!(name(&mol("CCCC(S)C")).unwrap(), "pentane-2-thiol");
1708 }
1709
1710 #[test]
1711 fn test_branched_carboxylic_acid() {
1712 assert_eq!(name(&mol("CC(C)C(=O)O")).unwrap(), "2-methylpropanoic acid");
1713 assert_eq!(name(&mol("CCC(C)C(=O)O")).unwrap(), "2-methylbutanoic acid");
1714 assert_eq!(name(&mol("CC(C)(C)C(=O)O")).unwrap(), "2,2-dimethylpropanoic acid");
1715 }
1716
1717 #[test]
1720 fn test_alkene_locants() {
1721 assert_eq!(name(&mol("CC=CC")).unwrap(), "but-2-ene");
1722 assert_eq!(name(&mol("C=CCC")).unwrap(), "but-1-ene");
1723 assert_eq!(name(&mol("CC=CCC")).unwrap(), "pent-2-ene");
1724 assert_eq!(name(&mol("C=CCCC")).unwrap(), "pent-1-ene");
1725 }
1726
1727 #[test]
1728 fn test_alkyne_locants() {
1729 assert_eq!(name(&mol("CC#CC")).unwrap(), "but-2-yne");
1730 assert_eq!(name(&mol("C#CCC")).unwrap(), "but-1-yne");
1731 }
1732
1733 #[test]
1734 fn test_amine_locants() {
1735 assert_eq!(name(&mol("CCCN")).unwrap(), "propan-1-amine");
1736 assert_eq!(name(&mol("CCC(N)C")).unwrap(), "butan-2-amine");
1737 assert_eq!(name(&mol("CC(N)CCC")).unwrap(), "pentan-2-amine");
1738 }
1739
1740 #[test]
1743 fn test_haloalkane_locants() {
1744 assert_eq!(name(&mol("CCCCl")).unwrap(), "1-chloropropane");
1746 assert_eq!(name(&mol("CCCCCl")).unwrap(), "1-chlorobutane");
1748 assert_eq!(name(&mol("CCC(Cl)C")).unwrap(), "2-chlorobutane");
1750 assert_eq!(name(&mol("CCCC(Cl)C")).unwrap(), "2-chloropentane");
1752 assert_eq!(name(&mol("ClCCCl")).unwrap(), "1,2-dichloroethane");
1754 assert_eq!(name(&mol("ClCCCCl")).unwrap(), "1,3-dichloropropane");
1755 }
1756
1757 #[test]
1758 fn test_cycloalkanol() {
1759 assert_eq!(name(&mol("OC1CCC1")).unwrap(), "cyclobutanol");
1760 assert_eq!(name(&mol("OC1CCCC1")).unwrap(), "cyclopentanol");
1761 assert_eq!(name(&mol("OC1CCCCC1")).unwrap(), "cyclohexanol");
1762 }
1763
1764 #[test]
1767 fn test_disubstituted_benzene_non_principal() {
1768 assert_eq!(name(&mol("Clc1ccc(Br)cc1")).unwrap(), "1-bromo-4-chlorobenzene");
1770 assert_eq!(name(&mol("Clc1ccc(F)cc1")).unwrap(), "1-chloro-4-fluorobenzene");
1771 assert_eq!(name(&mol("Cc1ccccc1C")).unwrap(), "1,2-dimethylbenzene");
1773 assert_eq!(name(&mol("Cc1ccc(C)cc1")).unwrap(), "1,4-dimethylbenzene");
1774 assert_eq!(name(&mol("Cc1ccc(Cl)cc1")).unwrap(), "1-chloro-4-methylbenzene");
1776 }
1777
1778 #[test]
1779 fn test_propyl_substituent() {
1780 assert_eq!(name(&mol("CCCC(CCC)CCCC")).unwrap(), "4-propyloctane");
1782 }
1783
1784 #[test]
1785 fn test_dimethylcycloalkane() {
1786 assert_eq!(name(&mol("CC1CCC(C)CC1")).unwrap(), "1,4-dimethylcyclohexane");
1787 assert_eq!(name(&mol("CC1CCCC1C")).unwrap(), "1,2-dimethylcyclopentane");
1788 assert_eq!(name(&mol("CC1CCC(C)C1")).unwrap(), "1,3-dimethylcyclopentane");
1789 }
1790}