Skip to main content

chematic_iupac/
lib.rs

1//! `chematic-iupac` — local IUPAC name generation, no network required.
2//!
3//! Supports:
4//! - Linear alkanes and cycloalkanes
5//! - Alkenes (`-ene`) and alkynes (`-yne`) with one unsaturation
6//! - Simple derivatives: alcohols (`-ol`), amines (`-amine`), aldehydes (`-al`),
7//!   ketones (`-one` with position locant), carboxylic acids (`-oic acid`)
8//! - Esters (`alkyl alkanoate`) — linear, primary esters
9//! - Primary/secondary amides (`-anamide`)
10//! - Halogen substituents: fluoro-, chloro-, bromo-, iodo-
11//! - Common aromatic heterocycles: benzene, pyridine, furan, thiophene,
12//!   pyrrole, imidazole, pyrimidine
13//!
14//! Complex polycyclic systems, stereo descriptors, and structures outside
15//! the above scope return [`IupacError::NotSupported`].
16
17#![forbid(unsafe_code)]
18
19use chematic_core::{AtomIdx, BondOrder, Molecule, implicit_hcount};
20use chematic_perception::find_sssr;
21use std::collections::{HashSet, VecDeque};
22
23// ---------------------------------------------------------------------------
24// Public error type
25// ---------------------------------------------------------------------------
26
27/// Error returned by [`name`].
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub enum IupacError {
30    /// The molecule contains no atoms.
31    Empty,
32    /// The molecule is outside the supported naming scope.
33    NotSupported,
34}
35
36impl core::fmt::Display for IupacError {
37    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
38        match self {
39            Self::Empty => write!(f, "empty molecule"),
40            Self::NotSupported => write!(f, "IUPAC name not supported for this structure"),
41        }
42    }
43}
44
45impl std::error::Error for IupacError {}
46
47// ---------------------------------------------------------------------------
48// Public entry point
49// ---------------------------------------------------------------------------
50
51/// Generate a local IUPAC name for `mol`.
52///
53/// Returns `Err(IupacError::NotSupported)` for structures outside the current
54/// scope (polycyclic systems, multi-functional groups, stereocenters, etc.).
55pub fn name(mol: &Molecule) -> Result<String, IupacError> {
56    if mol.atom_count() == 0 {
57        return Err(IupacError::Empty);
58    }
59    Namer::new(mol).name()
60}
61
62// ---------------------------------------------------------------------------
63// Internal namer
64// ---------------------------------------------------------------------------
65
66struct Namer<'a> {
67    mol: &'a Molecule,
68}
69
70impl<'a> Namer<'a> {
71    fn new(mol: &'a Molecule) -> Self {
72        Self { mol }
73    }
74
75    fn name(&self) -> Result<String, IupacError> {
76        let mol = self.mol;
77
78        if count_components(mol) != 1 {
79            return Err(IupacError::NotSupported);
80        }
81
82        let rings = find_sssr(mol);
83        let ring_atoms: HashSet<AtomIdx> = rings
84            .rings()
85            .iter()
86            .flat_map(|r| r.iter().copied())
87            .collect();
88
89        let carbons:  Vec<AtomIdx> = atoms_of(mol, 6);
90        let o_atoms:  Vec<AtomIdx> = atoms_of(mol, 8);
91        let n_atoms:  Vec<AtomIdx> = atoms_of(mol, 7);
92        let s_atoms:  Vec<AtomIdx> = atoms_of(mol, 16);
93        let halogens: Vec<AtomIdx> = mol
94            .atoms()
95            .filter(|(_, a)| matches!(a.element.atomic_number(), 9 | 17 | 35 | 53))
96            .map(|(i, _)| i)
97            .collect();
98
99        // Reject elements outside C, H, N, O, S, halogens.
100        let het_elements: HashSet<u8> = mol
101            .atoms()
102            .filter(|(_, a)| { let an = a.element.atomic_number(); an != 6 && an != 1 })
103            .map(|(_, a)| a.element.atomic_number())
104            .collect();
105        if het_elements.iter().any(|&an| !matches!(an, 7 | 8 | 9 | 16 | 17 | 35 | 53)) {
106            return Err(IupacError::NotSupported);
107        }
108
109        let cyclic = !ring_atoms.is_empty();
110
111        if cyclic {
112            let any_aromatic = ring_atoms.iter().any(|&i| mol.atom(i).aromatic);
113            if any_aromatic {
114                return self.name_aromatic_ring(&ring_atoms);
115            }
116            // Non-aromatic ring: allow OH substituent (cycloalkanol), block others.
117            let only_oxygen = het_elements.len() == 1 && het_elements.contains(&8);
118            if !het_elements.is_empty() && !only_oxygen {
119                return Err(IupacError::NotSupported);
120            }
121            if only_oxygen {
122                return self.name_cycloalkanol(&ring_atoms, &carbons, &o_atoms);
123            }
124            return self.name_cycloalkane(&ring_atoms, &carbons);
125        }
126
127        // Acyclic dispatch on heteroatom composition.
128        match (o_atoms.len(), n_atoms.len(), s_atoms.len(), halogens.len()) {
129            (0, 0, 0, 0) => self.name_acyclic_hydrocarbon(&carbons),
130            (1, 0, 0, 0) => self.name_one_oxygen(&carbons, o_atoms[0]),
131            (2, 0, 0, 0) => self.name_two_oxygens(&carbons, &o_atoms),
132            (1, 1, 0, 0) => self.name_amide(&carbons, o_atoms[0], n_atoms[0]),
133            (0, 1, 0, 0) => {
134                // Nitrile (C≡N) takes priority over amine.
135                if self.is_nitrile(n_atoms[0]) {
136                    self.name_nitrile(&carbons, n_atoms[0])
137                } else {
138                    self.name_amine(&carbons, n_atoms[0])
139                }
140            }
141            (0, 0, 1, 0) => self.name_thiol(&carbons, s_atoms[0]),
142            (0, 0, 0, _) if !halogens.is_empty() => {
143                if het_elements.len() != 1 {
144                    return Err(IupacError::NotSupported);
145                }
146                let prefix = match het_elements.iter().next().copied().unwrap() {
147                    9  => "fluoro",
148                    17 => "chloro",
149                    35 => "bromo",
150                    53 => "iodo",
151                    _  => return Err(IupacError::NotSupported),
152                };
153                self.name_haloalkane(&carbons, &halogens, prefix)
154            }
155            _ => Err(IupacError::NotSupported),
156        }
157    }
158
159    // -----------------------------------------------------------------------
160    // Aromatic ring naming
161    // -----------------------------------------------------------------------
162
163    fn name_aromatic_ring(&self, ring_atoms: &HashSet<AtomIdx>) -> Result<String, IupacError> {
164        let mol = self.mol;
165        // All ring atoms must be aromatic.
166        if !ring_atoms.iter().all(|&i| mol.atom(i).aromatic) {
167            return Err(IupacError::NotSupported);
168        }
169
170        let n_n = ring_atoms.iter().filter(|&&i| mol.atom(i).element.atomic_number() == 7).count();
171        let n_o = ring_atoms.iter().filter(|&&i| mol.atom(i).element.atomic_number() == 8).count();
172        let n_s = ring_atoms.iter().filter(|&&i| mol.atom(i).element.atomic_number() == 16).count();
173        let sz  = ring_atoms.len();
174
175        // Case 1: Pure aromatic ring (no substituents).
176        if ring_atoms.len() == mol.atom_count() {
177            return match (sz, n_n, n_o, n_s) {
178                (6, 0, 0, 0) => Ok("benzene".into()),
179                (6, 1, 0, 0) => Ok("pyridine".into()),
180                (6, 2, 0, 0) => Ok("pyrimidine".into()),
181                (5, 0, 1, 0) => Ok("furan".into()),
182                (5, 0, 0, 1) => Ok("thiophene".into()),
183                (5, 1, 0, 0) => Ok("pyrrole".into()),
184                (5, 2, 0, 0) => Ok("imidazole".into()),
185                _            => Err(IupacError::NotSupported),
186            };
187        }
188
189        // Case 2: Monosubstituted benzene (phenol, toluene, aniline, etc.)
190        // Only support pure benzene ring (6 C, no N/O/S in ring).
191        if sz == 6 && n_n == 0 && n_o == 0 && n_s == 0 {
192            let sub_atoms: Vec<AtomIdx> = mol.atoms()
193                .filter(|(i, _)| !ring_atoms.contains(i))
194                .map(|(i, _)| i)
195                .collect();
196            return self.name_monosubstituted_benzene(ring_atoms, &sub_atoms);
197        }
198
199        Err(IupacError::NotSupported)
200    }
201
202    // -----------------------------------------------------------------------
203    // Monosubstituted benzene naming
204    // -----------------------------------------------------------------------
205
206    fn name_monosubstituted_benzene(
207        &self,
208        ring_atoms: &HashSet<AtomIdx>,
209        sub_atoms: &[AtomIdx],
210    ) -> Result<String, IupacError> {
211        let mol = self.mol;
212        // Count how many ring C have substituents.
213        let attach_count = ring_atoms.iter().filter(|&&r| {
214            mol.neighbors(r).any(|(nb, _)| !ring_atoms.contains(&nb))
215        }).count();
216        if attach_count == 2 {
217            return self.name_disubstituted_benzene(ring_atoms, sub_atoms);
218        }
219        if attach_count == 3 {
220            return self.name_trisubstituted_benzene(ring_atoms);
221        }
222        if attach_count != 1 {
223            return Err(IupacError::NotSupported);
224        }
225
226        // Classify substituent by element counts + bond types.
227        let mut n_c = 0usize; let mut n_n = 0usize;
228        let mut n_o = 0usize; let mut n_hal = 0usize;
229        let mut halogen_an = 0u8;
230        for &a in sub_atoms {
231            match mol.atom(a).element.atomic_number() {
232                6  => n_c += 1,
233                7  => n_n += 1,
234                8  => n_o += 1,
235                1  => {},
236                an @ (9 | 17 | 35 | 53) => { n_hal += 1; halogen_an = an; }
237                _  => return Err(IupacError::NotSupported),
238            }
239        }
240
241        let sub_set: HashSet<AtomIdx> = sub_atoms.iter().copied().collect();
242        let has_triple = mol.bonds().any(|(_, b)| {
243            b.order == BondOrder::Triple
244                && (sub_set.contains(&b.atom1) || sub_set.contains(&b.atom2))
245        });
246        let has_double = mol.bonds().any(|(_, b)| {
247            b.order == BondOrder::Double
248                && (sub_set.contains(&b.atom1) || sub_set.contains(&b.atom2))
249        });
250
251        match (n_c, n_n, n_o, n_hal, has_double, has_triple) {
252            // Phenol: c1ccccc1O
253            (0, 0, 1, 0, false, false) => Ok("phenol".into()),
254            // Aniline: c1ccccc1N
255            (0, 1, 0, 0, false, false) => Ok("aniline".into()),
256            // Halo-benzenes
257            (0, 0, 0, 1, false, false) => {
258                let prefix = match halogen_an {
259                    9 => "fluoro", 17 => "chloro", 35 => "bromo", 53 => "iodo", _ => return Err(IupacError::NotSupported),
260                };
261                Ok(format!("{prefix}benzene"))
262            }
263            // Toluene: c1ccccc1C (one CH3)
264            (1, 0, 0, 0, false, false) => Ok("toluene".into()),
265            // Benzaldehyde: c1ccccc1C=O (n_c=1, n_o=1, has_double)
266            (1, 0, 1, 0, true, false) => Ok("benzaldehyde".into()),
267            // Benzoic acid: c1ccccc1C(=O)O (n_c=1, n_o=2, has_double)
268            (1, 0, 2, 0, true, false) => Ok("benzoic acid".into()),
269            // Benzonitrile: c1ccccc1C#N (n_c=1, n_n=1, has_triple)
270            (1, 1, 0, 0, false, true) => Ok("benzonitrile".into()),
271            _ => Err(IupacError::NotSupported),
272        }
273    }
274
275    // -----------------------------------------------------------------------
276    // Cycloalkane naming
277    // -----------------------------------------------------------------------
278
279    fn name_cycloalkane(
280        &self,
281        ring_atoms: &HashSet<AtomIdx>,
282        carbons: &[AtomIdx],
283    ) -> Result<String, IupacError> {
284        let mol = self.mol;
285        if carbons.iter().any(|&c| mol.atom(c).aromatic) {
286            return Err(IupacError::NotSupported);
287        }
288        // All carbons in ring: unsubstituted cycloalkane.
289        if ring_atoms.len() == carbons.len() {
290            return Ok(format!("cyclo{}", alkane_suffix(ring_atoms.len())));
291        }
292        let outside: Vec<AtomIdx> = carbons.iter()
293            .filter(|&&c| !ring_atoms.contains(&c))
294            .copied()
295            .collect();
296
297        let is_terminal_methyl = |sub: AtomIdx| -> bool {
298            mol.neighbors(sub)
299                .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() == 6 && !ring_atoms.contains(nb))
300                .count() == 0
301        };
302
303        if outside.len() == 1 && is_terminal_methyl(outside[0]) {
304            return Ok(format!("methylcyclo{}", alkane_suffix(ring_atoms.len())));
305        }
306
307        if outside.len() == 2 && is_terminal_methyl(outside[0]) && is_terminal_methyl(outside[1]) {
308            let att_a = mol.neighbors(outside[0])
309                .find(|(nb, _)| ring_atoms.contains(nb))
310                .map(|(nb, _)| nb)
311                .ok_or(IupacError::NotSupported)?;
312            let att_b = mol.neighbors(outside[1])
313                .find(|(nb, _)| ring_atoms.contains(nb))
314                .map(|(nb, _)| nb)
315                .ok_or(IupacError::NotSupported)?;
316            // BFS shortest path within ring.
317            let raw_dist = {
318                let mut dist = 0usize;
319                let mut queue = VecDeque::new();
320                let mut visited: HashSet<AtomIdx> = HashSet::new();
321                queue.push_back((att_a, 0usize));
322                visited.insert(att_a);
323                'bfs: while let Some((cur, d)) = queue.pop_front() {
324                    if cur == att_b { dist = d; break 'bfs; }
325                    for (nb, _) in mol.neighbors(cur) {
326                        if ring_atoms.contains(&nb) && visited.insert(nb) {
327                            queue.push_back((nb, d + 1));
328                        }
329                    }
330                }
331                dist
332            };
333            let ring_dist = raw_dist.min(ring_atoms.len() - raw_dist);
334            return Ok(format!("1,{}-dimethylcyclo{}", ring_dist + 1, alkane_suffix(ring_atoms.len())));
335        }
336
337        Err(IupacError::NotSupported)
338    }
339
340    // -----------------------------------------------------------------------
341    // Cycloalkanol naming (cyclopentanol, cyclohexanol, ...)
342    // -----------------------------------------------------------------------
343
344    fn name_cycloalkanol(
345        &self,
346        ring_atoms: &HashSet<AtomIdx>,
347        carbons: &[AtomIdx],
348        o_atoms: &[AtomIdx],
349    ) -> Result<String, IupacError> {
350        let mol = self.mol;
351        // Only one OH substituent.
352        if o_atoms.len() != 1 { return Err(IupacError::NotSupported); }
353        let o_idx = o_atoms[0];
354        // O must be single-bond –OH (not carbonyl).
355        if mol.neighbors(o_idx).any(|(_, bi)| mol.bond(bi).order == BondOrder::Double) {
356            return Err(IupacError::NotSupported);
357        }
358        // O must have implicit H.
359        if implicit_hcount(mol, o_idx) == 0 {
360            return Err(IupacError::NotSupported);
361        }
362        // No exocyclic carbons (unsubstituted ring + OH only).
363        let exo_c = carbons.iter().filter(|&&c| !ring_atoms.contains(&c)).count();
364        if exo_c > 0 { return Err(IupacError::NotSupported); }
365        Ok(format!("cyclo{}ol", alkane_base(ring_atoms.len())))
366    }
367
368    // -----------------------------------------------------------------------
369    // Acyclic hydrocarbon naming
370    // -----------------------------------------------------------------------
371
372    fn name_acyclic_hydrocarbon(&self, carbons: &[AtomIdx]) -> Result<String, IupacError> {
373        let mol = self.mol;
374        let n = carbons.len();
375
376        let double_bonds = mol.bonds().filter(|(_, b)| b.order == BondOrder::Double).count();
377        let triple_bonds = mol.bonds().filter(|(_, b)| b.order == BondOrder::Triple).count();
378        if double_bonds > 1 || triple_bonds > 1 || (double_bonds > 0 && triple_bonds > 0) {
379            return Err(IupacError::NotSupported);
380        }
381
382        // Check for branching.
383        let c_set: HashSet<AtomIdx> = carbons.iter().copied().collect();
384        let is_branched = carbons.iter().any(|&c| {
385            mol.neighbors(c).filter(|(nb, _)| c_set.contains(nb)).count() > 2
386        });
387
388        if is_branched {
389            // Only saturated branched alkanes supported for now.
390            if double_bonds > 0 || triple_bonds > 0 {
391                return Err(IupacError::NotSupported);
392            }
393            return self.name_branched_alkane(carbons);
394        }
395
396        if triple_bonds == 1 {
397            if n >= 4 {
398                let pos = unsaturation_locant(mol, carbons, BondOrder::Triple);
399                Ok(format!("{}-{}-yne", alkane_stem(n), pos))
400            } else {
401                Ok(alkyne_suffix(n))
402            }
403        } else if double_bonds == 1 {
404            if n >= 4 {
405                let pos = unsaturation_locant(mol, carbons, BondOrder::Double);
406                Ok(format!("{}-{}-ene", alkane_stem(n), pos))
407            } else {
408                Ok(alkene_suffix(n))
409            }
410        } else {
411            Ok(alkane_suffix(n))
412        }
413    }
414
415    // -----------------------------------------------------------------------
416    // One-oxygen compound: alcohol / aldehyde / ketone
417    // -----------------------------------------------------------------------
418
419    fn name_one_oxygen(&self, carbons: &[AtomIdx], o_idx: AtomIdx) -> Result<String, IupacError> {
420        let mol = self.mol;
421        let is_double = mol.neighbors(o_idx).any(|(_, bi)| mol.bond(bi).order == BondOrder::Double);
422
423        if !is_double {
424            // Ether check: O with 2 C neighbors and no implicit H → R-O-R
425            let o_c_nb: Vec<AtomIdx> = mol.neighbors(o_idx)
426                .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() == 6)
427                .map(|(nb, _)| nb)
428                .collect();
429            if o_c_nb.len() == 2 && implicit_hcount(mol, o_idx) == 0 {
430                return self.name_ether(carbons, o_idx, o_c_nb[0], o_c_nb[1]);
431            }
432
433            // Find the OH carbon.
434            let oh_c = mol.neighbors(o_idx)
435                .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() == 6)
436                .map(|(nb, _)| nb)
437                .next()
438                .ok_or(IupacError::NotSupported)?;
439
440            // Check for branching.
441            let c_set: HashSet<AtomIdx> = carbons.iter().copied().collect();
442            let is_branched = carbons.iter().any(|&c| {
443                mol.neighbors(c).filter(|(nb, _)| c_set.contains(nb)).count() > 2
444            });
445            if is_branched {
446                return self.name_branched_alcohol(carbons, oh_c);
447            }
448
449            // Straight-chain: determine OH position using longest chain.
450            let chain = find_longest_c_chain(mol, carbons);
451            let n = chain.len();
452            let pos_fwd = chain.iter().position(|&c| c == oh_c).map(|p| p + 1).unwrap_or(1);
453            let pos = pos_fwd.min(n + 1 - pos_fwd);
454            if pos == 1 && n <= 2 {
455                // Short common names without locant: methanol, ethanol.
456                return Ok(format!("{}anol", alkane_stem(n)));
457            }
458            return Ok(format!("{}-{}-ol", alkane_base(n), pos));
459        }
460
461        // Carbonyl: find the C=O carbon.
462        let carbonyl_c = mol
463            .neighbors(o_idx)
464            .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() == 6)
465            .map(|(nb, _)| nb)
466            .next()
467            .ok_or(IupacError::NotSupported)?;
468
469        if implicit_hcount(mol, carbonyl_c) > 0 {
470            // Aldehyde: CHO is position 1; find chain from carbonyl_c.
471            let c_set: HashSet<AtomIdx> = carbons.iter().copied().collect();
472            let chain = chain_from_anchor(mol, &c_set, carbonyl_c);
473            let n = chain.len();
474            let chain_set: HashSet<AtomIdx> = chain.iter().copied().collect();
475            let mut subs: Vec<(usize, usize)> = Vec::new();
476            for (pos0, &chain_c) in chain.iter().enumerate() {
477                if pos0 == 0 { continue; }
478                let position = pos0 + 1;
479                for (nb, _) in mol.neighbors(chain_c) {
480                    if c_set.contains(&nb) && !chain_set.contains(&nb) {
481                        let sub_len = count_c_chain(mol, nb, chain_c);
482                        if sub_len > 4 { return Err(IupacError::NotSupported); }
483                        subs.push((position, sub_len));
484                    }
485                }
486            }
487            let prefix = if subs.is_empty() { String::new() } else { format_substituents(&subs) };
488            return Ok(format!("{}{}anal", prefix, alkane_stem(n)));
489        }
490
491        // Ketone: internal C=O — find principal chain and position.
492        let chain = find_longest_c_chain(mol, carbons);
493        let n = chain.len();
494        if n < 3 { return Err(IupacError::NotSupported); }
495        let chain_set: HashSet<AtomIdx> = chain.iter().copied().collect();
496        let all_c_set: HashSet<AtomIdx> = carbons.iter().copied().collect();
497        let pos_fwd = chain.iter().position(|&c| c == carbonyl_c)
498            .map(|p| p + 1).ok_or(IupacError::NotSupported)?;
499        let pos = pos_fwd.min(n + 1 - pos_fwd);
500        let reversed = pos_fwd > n + 1 - pos_fwd;
501        // Collect alkyl substituents on the chain.
502        let mut subs: Vec<(usize, usize)> = Vec::new();
503        for (idx, &chain_c) in chain.iter().enumerate() {
504            let position = idx + 1;
505            for (nb, _) in mol.neighbors(chain_c) {
506                if all_c_set.contains(&nb) && !chain_set.contains(&nb) {
507                    let sub_len = count_c_chain(mol, nb, chain_c);
508                    if sub_len > 4 { return Err(IupacError::NotSupported); }
509                    let adj_pos = if reversed { n + 1 - position } else { position };
510                    subs.push((adj_pos, sub_len));
511                }
512            }
513        }
514        let prefix = if subs.is_empty() { String::new() } else { format_substituents(&subs) };
515        Ok(format!("{}{}-{}-one", prefix, alkane_base(n), pos))
516    }
517
518    // -----------------------------------------------------------------------
519    // Ether naming (R-O-R → "alkoxyalkane")
520    // -----------------------------------------------------------------------
521
522    fn name_ether(
523        &self,
524        carbons: &[AtomIdx],
525        o_idx: AtomIdx,
526        side_a: AtomIdx,
527        side_b: AtomIdx,
528    ) -> Result<String, IupacError> {
529        let mol = self.mol;
530        // Only unbranched ethers.
531        let c_set: HashSet<AtomIdx> = carbons.iter().copied().collect();
532        if carbons.iter().any(|&c| {
533            mol.neighbors(c).filter(|(nb, _)| c_set.contains(nb)).count() > 2
534        }) {
535            return Err(IupacError::NotSupported);
536        }
537        let len_a = count_c_chain(mol, side_a, o_idx);
538        let len_b = count_c_chain(mol, side_b, o_idx);
539        let (alkoxy_len, parent_len) = if len_a <= len_b { (len_a, len_b) } else { (len_b, len_a) };
540        let alkoxy = format!("{}oxy", alkane_stem(alkoxy_len));
541        let parent = alkane_suffix(parent_len);
542        // Add locant "1-" when parent ≥ 3 C and chains differ (O position is ambiguous).
543        if parent_len >= 3 && alkoxy_len != parent_len {
544            Ok(format!("1-{alkoxy}{parent}"))
545        } else {
546            Ok(format!("{alkoxy}{parent}"))
547        }
548    }
549
550    // -----------------------------------------------------------------------
551    // Two-oxygen compound: carboxylic acid or ester
552    // -----------------------------------------------------------------------
553
554    fn name_two_oxygens(&self, carbons: &[AtomIdx], o_atoms: &[AtomIdx]) -> Result<String, IupacError> {
555        let mol = self.mol;
556        let o1 = o_atoms[0];
557        let o2 = o_atoms[1];
558
559        let o1_dbl = mol.neighbors(o1).any(|(_, bi)| mol.bond(bi).order == BondOrder::Double);
560        let o2_dbl = mol.neighbors(o2).any(|(_, bi)| mol.bond(bi).order == BondOrder::Double);
561
562        let (carbonyl_o, ester_o) = match (o1_dbl, o2_dbl) {
563            (true, false) => (o1, o2),
564            (false, true) => (o2, o1),
565            _ => return Err(IupacError::NotSupported),
566        };
567
568        // Carbonyl C is bonded to the =O oxygen.
569        let carbonyl_c = mol
570            .neighbors(carbonyl_o)
571            .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() == 6)
572            .map(|(nb, _)| nb)
573            .next()
574            .ok_or(IupacError::NotSupported)?;
575
576        // Carbonyl C must also be bonded to the single-bond O.
577        if !mol.neighbors(carbonyl_c).any(|(nb, _)| nb == ester_o) {
578            return Err(IupacError::NotSupported);
579        }
580
581        // Is the single-bond O also bonded to another C (→ ester) or only H (→ acid)?
582        let alcohol_c = mol
583            .neighbors(ester_o)
584            .filter(|(nb, _)| *nb != carbonyl_c && mol.atom(*nb).element.atomic_number() == 6)
585            .map(|(nb, _)| nb)
586            .next();
587
588        let c_set: HashSet<AtomIdx> = carbons.iter().copied().collect();
589        if let Some(alc_c) = alcohol_c {
590            // Ester: find acid chain from carbonyl_c (handles branched acid parts).
591            let chain_acid = chain_from_anchor(mol, &c_set, carbonyl_c);
592            let acid_n = chain_acid.len();
593            let chain_acid_set: HashSet<AtomIdx> = chain_acid.iter().copied().collect();
594            let mut subs: Vec<(usize, usize)> = Vec::new();
595            for (pos0, &chain_c) in chain_acid.iter().enumerate() {
596                if pos0 == 0 { continue; }
597                let position = pos0 + 1;
598                for (nb, _) in mol.neighbors(chain_c) {
599                    if c_set.contains(&nb) && !chain_acid_set.contains(&nb) {
600                        let sub_len = count_c_chain(mol, nb, chain_c);
601                        if sub_len > 4 { return Err(IupacError::NotSupported); }
602                        subs.push((position, sub_len));
603                    }
604                }
605            }
606            let alcohol_n = count_c_chain(mol, alc_c, ester_o);
607            let acid_part = if subs.is_empty() {
608                format!("{}anoate", alkane_stem(acid_n))
609            } else {
610                format!("{}{}anoate", format_substituents(&subs), alkane_stem(acid_n))
611            };
612            Ok(format!("{}yl {}", alkane_stem(alcohol_n), acid_part))
613        } else {
614            // Carboxylic acid — find principal chain from carboxyl C (always position 1).
615            let chain = chain_from_anchor(mol, &c_set, carbonyl_c);
616            let n = chain.len();
617            let chain_set: HashSet<AtomIdx> = chain.iter().copied().collect();
618            let mut subs: Vec<(usize, usize)> = Vec::new();
619            for (pos0, &chain_c) in chain.iter().enumerate() {
620                if pos0 == 0 { continue; }
621                let position = pos0 + 1;
622                for (nb, _) in mol.neighbors(chain_c) {
623                    if c_set.contains(&nb) && !chain_set.contains(&nb) {
624                        let sub_len = count_c_chain(mol, nb, chain_c);
625                        if sub_len > 4 { return Err(IupacError::NotSupported); }
626                        subs.push((position, sub_len));
627                    }
628                }
629            }
630            if subs.is_empty() {
631                Ok(format!("{}anoic acid", alkane_stem(n)))
632            } else {
633                Ok(format!("{}{}anoic acid", format_substituents(&subs), alkane_stem(n)))
634            }
635        }
636    }
637
638    // -----------------------------------------------------------------------
639    // Amide: C(=O)–N
640    // -----------------------------------------------------------------------
641
642    fn name_amide(
643        &self,
644        _carbons: &[AtomIdx],
645        o_idx: AtomIdx,
646        n_idx: AtomIdx,
647    ) -> Result<String, IupacError> {
648        let mol = self.mol;
649
650        // O must be a carbonyl (C=O).
651        if !mol.neighbors(o_idx).any(|(_, bi)| mol.bond(bi).order == BondOrder::Double) {
652            return Err(IupacError::NotSupported);
653        }
654
655        let carbonyl_c = mol
656            .neighbors(o_idx)
657            .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() == 6)
658            .map(|(nb, _)| nb)
659            .next()
660            .ok_or(IupacError::NotSupported)?;
661
662        // Carbonyl C must be bonded to N.
663        if !mol.neighbors(carbonyl_c).any(|(nb, _)| nb == n_idx) {
664            return Err(IupacError::NotSupported);
665        }
666
667        // Only primary/secondary amides (N has ≥ 1 H).
668        if implicit_hcount(mol, n_idx) == 0 {
669            return Err(IupacError::NotSupported);
670        }
671
672        // Amide chain from carbonyl_c (handles branched structures).
673        let c_set: HashSet<AtomIdx> = mol.atoms()
674            .filter(|(_, a)| a.element.atomic_number() == 6)
675            .map(|(i, _)| i)
676            .collect();
677        let chain = chain_from_anchor(mol, &c_set, carbonyl_c);
678        let n = chain.len();
679        let chain_set: HashSet<AtomIdx> = chain.iter().copied().collect();
680        let mut subs: Vec<(usize, usize)> = Vec::new();
681        for (pos0, &chain_c) in chain.iter().enumerate() {
682            if pos0 == 0 { continue; }
683            let position = pos0 + 1;
684            for (nb, _) in mol.neighbors(chain_c) {
685                if c_set.contains(&nb) && !chain_set.contains(&nb) {
686                    let sub_len = count_c_chain(mol, nb, chain_c);
687                    if sub_len > 4 { return Err(IupacError::NotSupported); }
688                    subs.push((position, sub_len));
689                }
690            }
691        }
692        let prefix = if subs.is_empty() { String::new() } else { format_substituents(&subs) };
693        Ok(format!("{}{}anamide", prefix, alkane_stem(n)))
694    }
695
696    // -----------------------------------------------------------------------
697    // Amine naming
698    // -----------------------------------------------------------------------
699
700    fn name_amine(&self, carbons: &[AtomIdx], n_idx: AtomIdx) -> Result<String, IupacError> {
701        let mol = self.mol;
702        let n_h = implicit_hcount(mol, n_idx);
703        let c_sides: Vec<AtomIdx> = mol.neighbors(n_idx)
704            .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() == 6)
705            .map(|(nb, _)| nb)
706            .collect();
707        let mut chain_lens: Vec<usize> = c_sides.iter()
708            .map(|&nb| count_c_chain(mol, nb, n_idx))
709            .collect();
710        chain_lens.sort_unstable_by(|a, b| b.cmp(a)); // descending
711        match n_h {
712            2 => {
713                // Find N-bearing C's position on the principal chain.
714                let chain = find_longest_c_chain(mol, carbons);
715                let n_chain = chain.len();
716                let chain_set: HashSet<AtomIdx> = chain.iter().copied().collect();
717                let amine_c = mol.neighbors(n_idx)
718                    .filter(|(nb, _)| mol.atom(*nb).element.atomic_number() == 6
719                                   && chain_set.contains(nb))
720                    .map(|(nb, _)| nb)
721                    .next()
722                    .ok_or(IupacError::NotSupported)?;
723                let pos_fwd = chain.iter().position(|&c| c == amine_c)
724                    .map(|p| p + 1).unwrap_or(1);
725                let pos = pos_fwd.min(n_chain + 1 - pos_fwd);
726                Ok(format!("{}an-{}-amine", alkane_stem(n_chain), pos))
727            }
728            1 => {
729                if chain_lens.len() != 2 { return Err(IupacError::NotSupported); }
730                let parent_len = chain_lens[0];
731                let sub_len    = chain_lens[1];
732                Ok(format!("N-{}yl{}anamine", alkane_stem(sub_len), alkane_stem(parent_len)))
733            }
734            0 => {
735                if chain_lens.len() != 3 { return Err(IupacError::NotSupported); }
736                let parent_len = chain_lens[0];
737                let sub1 = chain_lens[1];
738                let sub2 = chain_lens[2];
739                if sub1 == sub2 {
740                    Ok(format!("N,N-di{}yl{}anamine", alkane_stem(sub1), alkane_stem(parent_len)))
741                } else {
742                    let (lo, hi) = (sub1.min(sub2), sub1.max(sub2));
743                    Ok(format!("N-{}yl-N-{}yl{}anamine", alkane_stem(lo), alkane_stem(hi), alkane_stem(parent_len)))
744                }
745            }
746            _ => Err(IupacError::NotSupported),
747        }
748    }
749
750    // -----------------------------------------------------------------------
751    // Haloalkane naming
752    // -----------------------------------------------------------------------
753
754    fn name_haloalkane(
755        &self,
756        carbons: &[AtomIdx],
757        halogen_atoms: &[AtomIdx],
758        prefix: &str,
759    ) -> Result<String, IupacError> {
760        let mol = self.mol;
761        let chain = find_longest_c_chain(mol, carbons);
762        let n = chain.len();
763        let chain_set: HashSet<AtomIdx> = chain.iter().copied().collect();
764
765        // Find the locant of each halogen on the chain.
766        let mut locants: Vec<usize> = Vec::new();
767        for &hal in halogen_atoms {
768            let hal_c = mol.neighbors(hal)
769                .filter(|(nb, _)| chain_set.contains(nb))
770                .map(|(nb, _)| nb)
771                .next()
772                .ok_or(IupacError::NotSupported)?;
773            let pos = chain.iter().position(|&c| c == hal_c).map(|p| p + 1)
774                .ok_or(IupacError::NotSupported)?;
775            locants.push(pos);
776        }
777
778        // Apply lowest-locant rule (compare forward vs reversed numbering).
779        let locants_rev: Vec<usize> = locants.iter().map(|&p| n + 1 - p).collect();
780        let best = if locants.iter().min() <= locants_rev.iter().min() {
781            locants
782        } else {
783            locants_rev
784        };
785
786        let count = halogen_atoms.len();
787        let mult = match count {
788            1 => prefix.to_string(),
789            2 => format!("di{prefix}"),
790            3 => format!("tri{prefix}"),
791            _ => return Err(IupacError::NotSupported),
792        };
793
794        let mut sorted_locs = best;
795        sorted_locs.sort_unstable();
796        let locant_str = sorted_locs.iter().map(|l| l.to_string()).collect::<Vec<_>>().join(",");
797
798        // Omit locant for short unambiguous cases (n≤2, single halogen at terminal).
799        if n <= 2 && count == 1 {
800            Ok(format!("{mult}{}", alkane_suffix(n)))
801        } else {
802            Ok(format!("{locant_str}-{mult}{}", alkane_suffix(n)))
803        }
804    }
805
806    // -----------------------------------------------------------------------
807    // Thiol naming (R-SH → "...anethiol")
808    // -----------------------------------------------------------------------
809
810    fn name_thiol(&self, carbons: &[AtomIdx], s_idx: AtomIdx) -> Result<String, IupacError> {
811        let mol = self.mol;
812        if implicit_hcount(mol, s_idx) == 0 {
813            return Err(IupacError::NotSupported);
814        }
815        let chain = find_longest_c_chain(mol, carbons);
816        let n = chain.len();
817        let chain_set: HashSet<AtomIdx> = chain.iter().copied().collect();
818        let thiol_c = mol.neighbors(s_idx)
819            .filter(|(nb, _)| chain_set.contains(nb))
820            .map(|(nb, _)| nb)
821            .next()
822            .ok_or(IupacError::NotSupported)?;
823        let pos_fwd = chain.iter().position(|&c| c == thiol_c).map(|p| p + 1).unwrap_or(1);
824        let pos = pos_fwd.min(n + 1 - pos_fwd);
825        // Terminal SH (pos=1): no locant; internal: add locant.
826        if pos == 1 {
827            Ok(format!("{}anethiol", alkane_stem(n)))
828        } else {
829            Ok(format!("{}ane-{}-thiol", alkane_stem(n), pos))
830        }
831    }
832
833    // -----------------------------------------------------------------------
834    // Branched alcohol naming (e.g., "propan-2-ol")
835    // -----------------------------------------------------------------------
836
837    fn name_branched_alcohol(
838        &self,
839        carbons: &[AtomIdx],
840        oh_c: AtomIdx,
841    ) -> Result<String, IupacError> {
842        // Find principal chain.
843        let chain = find_longest_c_chain(self.mol, carbons);
844        let n = chain.len();
845        if n < 2 { return Err(IupacError::NotSupported); }
846
847        let chain_set: HashSet<AtomIdx> = chain.iter().copied().collect();
848        let all_c_set: HashSet<AtomIdx> = carbons.iter().copied().collect();
849
850        // The OH carbon must be on the principal chain.
851        let pos_on_chain = if chain_set.contains(&oh_c) {
852            chain.iter().position(|&c| c == oh_c).map(|p| p + 1)
853        } else {
854            None
855        };
856
857        let pos_fwd = pos_on_chain.ok_or(IupacError::NotSupported)?;
858        let pos = pos_fwd.min(n + 1 - pos_fwd);
859
860        // Also collect any alkyl substituents on the chain.
861        let mut subs: Vec<(usize, usize)> = Vec::new();
862        for (pos0, &chain_c) in chain.iter().enumerate() {
863            let position = pos0 + 1;
864            for (nb, _) in self.mol.neighbors(chain_c) {
865                if all_c_set.contains(&nb) && !chain_set.contains(&nb) {
866                    let sub_len = count_c_chain(self.mol, nb, chain_c);
867                    if sub_len > 4 { return Err(IupacError::NotSupported); }
868                    subs.push((position, sub_len));
869                }
870            }
871        }
872
873        // Re-number subs with the same locant direction as OH position.
874        if pos_fwd > n + 1 - pos_fwd {
875            // Reverse direction was chosen for OH; re-number subs accordingly.
876            subs = subs.iter().map(|&(p, l)| (n + 1 - p, l)).collect();
877        }
878
879        let prefix = if subs.is_empty() {
880            String::new()
881        } else {
882            subs.sort_unstable();
883            let subs_rev: Vec<(usize, usize)> = subs.iter()
884                .map(|&(p, l)| (n + 1 - p, l))
885                .collect();
886            let first_fwd = subs.iter().map(|&(p, _)| p).min().unwrap_or(usize::MAX);
887            let first_rev = subs_rev.iter().map(|&(p, _)| p).min().unwrap_or(usize::MAX);
888            let best = if first_fwd <= first_rev { subs.clone() } else { subs_rev };
889            format!("{}-", format_substituents(&best))
890        };
891
892        Ok(format!("{}{}-{}-ol", prefix, alkane_base(n), pos))
893    }
894
895    // -----------------------------------------------------------------------
896    // Disubstituted benzene naming (e.g., "4-chlorophenol")
897    // -----------------------------------------------------------------------
898
899    fn name_disubstituted_benzene(
900        &self,
901        ring_atoms: &HashSet<AtomIdx>,
902        _sub_atoms: &[AtomIdx],
903    ) -> Result<String, IupacError> {
904        let mol = self.mol;
905
906        // Identify the two ring C attachment points and their substituent sets.
907        let attach_points: Vec<AtomIdx> = ring_atoms.iter()
908            .filter(|&&r| mol.neighbors(r).any(|(nb, _)| !ring_atoms.contains(&nb)))
909            .copied()
910            .collect();
911        if attach_points.len() != 2 {
912            return Err(IupacError::NotSupported);
913        }
914
915        // Compute ring distance (shortest path within ring) between the two attachment points.
916        let ring_dist = {
917            let ring_vec: Vec<AtomIdx> = ring_atoms.iter().copied().collect();
918            let mut dist = usize::MAX;
919            // BFS within the ring
920            let mut queue = VecDeque::new();
921            let mut visited: HashSet<AtomIdx> = HashSet::new();
922            queue.push_back((attach_points[0], 0usize));
923            visited.insert(attach_points[0]);
924            while let Some((cur, d)) = queue.pop_front() {
925                if cur == attach_points[1] { dist = d; break; }
926                for (nb, _) in mol.neighbors(cur) {
927                    if ring_atoms.contains(&nb) && visited.insert(nb) {
928                        queue.push_back((nb, d + 1));
929                    }
930                }
931            }
932            // Take minimum of this and the longer path
933            dist.min(ring_vec.len() - dist)
934        };
935
936        // Classify each substituent group.
937        let classify_sub = |attach: AtomIdx| -> Option<(&str, bool)> {
938            // Returns (substituent_name, is_principal)
939            // is_principal: true if this substituent determines the compound root name
940            // Collect sub atoms for this attachment (unused for now, just for documentation)
941            // Simple: just look at atoms directly bonded to attach that are not in ring
942            let direct: Vec<AtomIdx> = mol.neighbors(attach)
943                .filter(|(nb, _)| !ring_atoms.contains(nb))
944                .map(|(nb, _)| nb)
945                .collect();
946            if direct.is_empty() { return None; }
947            let first = direct[0];
948            let an = mol.atom(first).element.atomic_number();
949            match an {
950                8 if !mol.neighbors(first).any(|(_, bi)| mol.bond(bi).order == BondOrder::Double) => {
951                    Some(("hydroxy", true)) // -OH → phenol as principal
952                }
953                7 if implicit_hcount(mol, first) > 0 => Some(("amino", true)), // -NH2 → aniline
954                6 => Some(("methyl", false)), // -CH3 → toluene substituent
955                17 => Some(("chloro", false)),
956                35 => Some(("bromo", false)),
957                9  => Some(("fluoro", false)),
958                53 => Some(("iodo", false)),
959                _ => None,
960            }
961        };
962
963        let sub_a = classify_sub(attach_points[0]);
964        let sub_b = classify_sub(attach_points[1]);
965
966        let (sub_a, sub_b) = match (sub_a, sub_b) {
967            (Some(a), Some(b)) => (a, b),
968            _ => return Err(IupacError::NotSupported),
969        };
970
971        // Determine locant prefix (1,2= ortho, 1,3= meta, 1,4= para for 6-ring).
972        let pos2 = ring_dist + 1; // position of the second substituent from first
973
974        // Build name: principal group determines root, non-principal is prefix.
975        let (prefix_sub, root_name) = if sub_a.1 {
976            // sub_a is principal (phenol/aniline): prefix comes from sub_b
977            let root = match sub_a.0 {
978                "hydroxy" => "phenol",
979                "amino" => "aniline",
980                _ => return Err(IupacError::NotSupported),
981            };
982            (sub_b.0, root)
983        } else if sub_b.1 {
984            let root = match sub_b.0 {
985                "hydroxy" => "phenol",
986                "amino" => "aniline",
987                _ => return Err(IupacError::NotSupported),
988            };
989            (sub_a.0, root)
990        } else {
991            // Neither is principal — both are substituents on benzene.
992            // Alphabetically first substituent gets locant 1.
993            let (s1, s2) = if sub_a.0 <= sub_b.0 {
994                (sub_a.0, sub_b.0)
995            } else {
996                (sub_b.0, sub_a.0)
997            };
998            return if s1 == s2 {
999                Ok(format!("1,{}-di{}benzene", pos2, s1))
1000            } else {
1001                Ok(format!("1-{}-{}-{}benzene", s1, pos2, s2))
1002            };
1003        };
1004
1005        Ok(format!("{}-{}{}", pos2, prefix_sub, root_name))
1006    }
1007
1008    // -----------------------------------------------------------------------
1009    // Trisubstituted benzene naming
1010    // -----------------------------------------------------------------------
1011
1012    fn name_trisubstituted_benzene(
1013        &self,
1014        ring_atoms: &HashSet<AtomIdx>,
1015    ) -> Result<String, IupacError> {
1016        let mol = self.mol;
1017        let attach_points: Vec<AtomIdx> = ring_atoms.iter()
1018            .filter(|&&r| mol.neighbors(r).any(|(nb, _)| !ring_atoms.contains(&nb)))
1019            .copied()
1020            .collect();
1021        if attach_points.len() != 3 {
1022            return Err(IupacError::NotSupported);
1023        }
1024
1025        let locant_map = best_benzene_locants(mol, ring_atoms, &attach_points);
1026
1027        // Classify each substituent.
1028        let mut sub_list: Vec<(usize, String)> = Vec::new();
1029        for &(locant, attach) in &locant_map {
1030            let sub = self.classify_benzene_sub_simple(attach, ring_atoms)
1031                .ok_or(IupacError::NotSupported)?;
1032            sub_list.push((locant, sub));
1033        }
1034
1035        // Sort alphabetically by substituent name, then numerically by locant.
1036        sub_list.sort_by(|a, b| a.1.cmp(&b.1).then(a.0.cmp(&b.0)));
1037
1038        // Group identical substituents for di/tri multiplier.
1039        let mut groups: Vec<(String, Vec<usize>)> = Vec::new();
1040        for (locant, name) in sub_list {
1041            if let Some(last) = groups.last_mut() {
1042                if last.0 == name {
1043                    last.1.push(locant);
1044                    continue;
1045                }
1046            }
1047            groups.push((name, vec![locant]));
1048        }
1049
1050        let mut parts: Vec<String> = Vec::new();
1051        for (name, mut locs) in groups {
1052            locs.sort_unstable();
1053            let locant_str = locs.iter().map(|l| l.to_string()).collect::<Vec<_>>().join(",");
1054            let mult = match locs.len() {
1055                1 => String::new(),
1056                2 => "di".to_string(),
1057                3 => "tri".to_string(),
1058                _ => return Err(IupacError::NotSupported),
1059            };
1060            parts.push(format!("{}-{}{}", locant_str, mult, name));
1061        }
1062
1063        Ok(format!("{}benzene", parts.join("-")))
1064    }
1065
1066    /// Classify a single benzene substituent by element type (for trisubstituted naming).
1067    fn classify_benzene_sub_simple(
1068        &self,
1069        attach: AtomIdx,
1070        ring_atoms: &HashSet<AtomIdx>,
1071    ) -> Option<String> {
1072        let mol = self.mol;
1073        let direct: Vec<AtomIdx> = mol.neighbors(attach)
1074            .filter(|(nb, _)| !ring_atoms.contains(nb))
1075            .map(|(nb, _)| nb)
1076            .collect();
1077        if direct.is_empty() { return None; }
1078        let first = direct[0];
1079        match mol.atom(first).element.atomic_number() {
1080            6  => Some("methyl".to_string()),
1081            7  => Some("amino".to_string()),
1082            8  => Some("hydroxy".to_string()),
1083            9  => Some("fluoro".to_string()),
1084            17 => Some("chloro".to_string()),
1085            35 => Some("bromo".to_string()),
1086            53 => Some("iodo".to_string()),
1087            _  => None,
1088        }
1089    }
1090
1091    // -----------------------------------------------------------------------
1092    // Nitrile naming (R-C≡N → "...nitrile")
1093    // -----------------------------------------------------------------------
1094
1095    fn is_nitrile(&self, n_idx: AtomIdx) -> bool {
1096        self.mol.neighbors(n_idx)
1097            .any(|(_, bi)| self.mol.bond(bi).order == BondOrder::Triple)
1098    }
1099
1100    fn name_nitrile(&self, carbons: &[AtomIdx], n_idx: AtomIdx) -> Result<String, IupacError> {
1101        let mol = self.mol;
1102        // Find the C≡N carbon.
1103        let nitrile_c = mol.neighbors(n_idx)
1104            .filter(|(_, bi)| mol.bond(*bi).order == BondOrder::Triple)
1105            .map(|(nb, _)| nb)
1106            .next()
1107            .ok_or(IupacError::NotSupported)?;
1108        // Count the total C chain (nitrile C + alkyl chain).
1109        // count_c_chain gives all C reachable from nitrile_c without crossing N.
1110        let n_carbons = count_c_chain(mol, nitrile_c, n_idx);
1111        // n_carbons already includes the nitrile carbon itself.
1112        if n_carbons == 0 { return Err(IupacError::NotSupported); }
1113        // Verify no branching on the C chain
1114        let c_set: std::collections::HashSet<AtomIdx> = carbons.iter().copied().collect();
1115        for &c in carbons {
1116            if mol.neighbors(c)
1117                .filter(|(nb, _)| c_set.contains(nb))
1118                .count() > 2
1119            {
1120                return Err(IupacError::NotSupported); // branched nitrile not supported
1121            }
1122        }
1123        Ok(format!("{}enitrile", alkane_base(n_carbons)))
1124    }
1125
1126    // -----------------------------------------------------------------------
1127    // Branched alkane naming (e.g., "2-methylpropane", "2,2-dimethylpropane")
1128    // -----------------------------------------------------------------------
1129
1130    fn name_branched_alkane(&self, carbons: &[AtomIdx]) -> Result<String, IupacError> {
1131        let mol = self.mol;
1132
1133        // Find the principal chain (longest C–C path).
1134        let chain = find_longest_c_chain(mol, carbons);
1135        let n = chain.len();
1136        if n < 2 {
1137            return Err(IupacError::NotSupported);
1138        }
1139
1140        let chain_set: std::collections::HashSet<AtomIdx> = chain.iter().copied().collect();
1141        let all_c_set: std::collections::HashSet<AtomIdx> = carbons.iter().copied().collect();
1142
1143        // Collect substituents: (chain_position_1based, alkyl_length).
1144        let mut subs: Vec<(usize, usize)> = Vec::new();
1145        for (pos0, &chain_c) in chain.iter().enumerate() {
1146            let position = pos0 + 1;
1147            for (nb, _) in mol.neighbors(chain_c) {
1148                if all_c_set.contains(&nb) && !chain_set.contains(&nb) {
1149                    // Substituent rooted at `nb`, blocked by chain_c.
1150                    let sub_len = count_c_chain(mol, nb, chain_c);
1151                    if sub_len > 4 {
1152                        return Err(IupacError::NotSupported);
1153                    }
1154                    subs.push((position, sub_len));
1155                }
1156            }
1157        }
1158
1159        if subs.is_empty() {
1160            return Err(IupacError::NotSupported);
1161        }
1162
1163        // Apply IUPAC lowest-locant rule: try forward and reverse numbering.
1164        let subs_rev: Vec<(usize, usize)> = subs.iter()
1165            .map(|&(pos, len)| (n + 1 - pos, len))
1166            .collect();
1167
1168        let first_fwd = subs.iter().map(|&(p, _)| p).min().unwrap_or(usize::MAX);
1169        let first_rev = subs_rev.iter().map(|&(p, _)| p).min().unwrap_or(usize::MAX);
1170        let best_subs = if first_fwd <= first_rev { subs } else { subs_rev };
1171
1172        Ok(format!(
1173            "{}{}",
1174            format_substituents(&best_subs),
1175            alkane_suffix(n)
1176        ))
1177    }
1178}
1179
1180// ---------------------------------------------------------------------------
1181// Graph helpers
1182// ---------------------------------------------------------------------------
1183
1184fn atoms_of(mol: &Molecule, atomic_num: u8) -> Vec<AtomIdx> {
1185    mol.atoms()
1186        .filter(|(_, a)| a.element.atomic_number() == atomic_num)
1187        .map(|(i, _)| i)
1188        .collect()
1189}
1190
1191/// BFS count of C atoms reachable from `start` without crossing `blocked`.
1192fn count_c_chain(mol: &Molecule, start: AtomIdx, blocked: AtomIdx) -> usize {
1193    let mut visited = HashSet::new();
1194    let mut queue = VecDeque::new();
1195    visited.insert(start);
1196    queue.push_back(start);
1197    while let Some(cur) = queue.pop_front() {
1198        for (nb, _) in mol.neighbors(cur) {
1199            if nb == blocked { continue; }
1200            if mol.atom(nb).element.atomic_number() == 6 && visited.insert(nb) {
1201                queue.push_back(nb);
1202            }
1203        }
1204    }
1205    visited.len()
1206}
1207
1208/// Find the longest carbon chain in a C-subgraph using two-pass BFS.
1209///
1210/// Returns the sequence of AtomIdx forming the longest simple path.
1211/// For branched alkanes this gives the principal chain (IUPAC rule: longest chain).
1212fn find_longest_c_chain(mol: &Molecule, carbons: &[AtomIdx]) -> Vec<AtomIdx> {
1213    if carbons.is_empty() { return Vec::new(); }
1214
1215    let c_set: std::collections::HashSet<AtomIdx> = carbons.iter().copied().collect();
1216
1217    // BFS to find the farthest atom from a given start, returning (farthest, parents).
1218    let bfs_far = |start: AtomIdx| -> (AtomIdx, std::collections::HashMap<AtomIdx, AtomIdx>) {
1219        let mut parent: std::collections::HashMap<AtomIdx, AtomIdx> = std::collections::HashMap::new();
1220        let mut visited: std::collections::HashSet<AtomIdx> = std::collections::HashSet::new();
1221        let mut queue = VecDeque::new();
1222        let mut farthest = start;
1223        visited.insert(start);
1224        queue.push_back(start);
1225        while let Some(cur) = queue.pop_front() {
1226            farthest = cur;
1227            for (nb, _) in mol.neighbors(cur) {
1228                if c_set.contains(&nb) && visited.insert(nb) {
1229                    parent.insert(nb, cur);
1230                    queue.push_back(nb);
1231                }
1232            }
1233        }
1234        (farthest, parent)
1235    };
1236
1237    let reconstruct = |end: AtomIdx, start: AtomIdx,
1238                        parents: &std::collections::HashMap<AtomIdx, AtomIdx>| -> Vec<AtomIdx> {
1239        let mut path = vec![end];
1240        let mut cur = end;
1241        while cur != start {
1242            cur = parents[&cur];
1243            path.push(cur);
1244        }
1245        path.reverse();
1246        path
1247    };
1248
1249    // Pass 1: BFS from first carbon to find one endpoint of the longest chain.
1250    let (end1, _) = bfs_far(carbons[0]);
1251    // Pass 2: BFS from end1 to find the other endpoint.
1252    let (end2, parents) = bfs_far(end1);
1253
1254    reconstruct(end2, end1, &parents)
1255}
1256
1257/// Format substituents as an IUPAC prefix string ("2-methyl", "2,2-dimethyl", etc.).
1258fn format_substituents(subs: &[(usize, usize)]) -> String {
1259    // Group by alkyl name; sort alphabetically.
1260    let mut groups: std::collections::BTreeMap<&str, Vec<usize>> =
1261        std::collections::BTreeMap::new();
1262    for &(pos, len) in subs {
1263        let alkyl = match len {
1264            1 => "methyl",
1265            2 => "ethyl",
1266            3 => "propyl",
1267            4 => "butyl",
1268            _ => continue,
1269        };
1270        groups.entry(alkyl).or_default().push(pos);
1271    }
1272
1273    let mut parts: Vec<String> = Vec::new();
1274    for (alkyl, mut positions) in groups {
1275        positions.sort_unstable();
1276        let locants = positions.iter().map(|p| p.to_string()).collect::<Vec<_>>().join(",");
1277        let mult = match positions.len() {
1278            1 => String::new(),
1279            2 => "di".to_string(),
1280            3 => "tri".to_string(),
1281            _ => "?".to_string(),
1282        };
1283        parts.push(format!("{}-{}{}", locants, mult, alkyl));
1284    }
1285    parts.join("-")
1286}
1287
1288/// BFS chain anchored at `anchor` (always at index 0 = IUPAC position 1 in result).
1289fn chain_from_anchor(
1290    mol: &Molecule,
1291    c_set: &HashSet<AtomIdx>,
1292    anchor: AtomIdx,
1293) -> Vec<AtomIdx> {
1294    let mut parent: std::collections::HashMap<AtomIdx, AtomIdx> =
1295        std::collections::HashMap::new();
1296    let mut visited: HashSet<AtomIdx> = HashSet::new();
1297    let mut queue = VecDeque::new();
1298    let mut farthest = anchor;
1299    visited.insert(anchor);
1300    queue.push_back(anchor);
1301    while let Some(cur) = queue.pop_front() {
1302        farthest = cur;
1303        for (nb, _) in mol.neighbors(cur) {
1304            if c_set.contains(&nb) && visited.insert(nb) {
1305                parent.insert(nb, cur);
1306                queue.push_back(nb);
1307            }
1308        }
1309    }
1310    let mut path = vec![farthest];
1311    let mut cur = farthest;
1312    while cur != anchor { cur = parent[&cur]; path.push(cur); }
1313    path.reverse();
1314    path
1315}
1316
1317/// Return the IUPAC locant (1-based, lowest) of a double or triple bond on the chain.
1318fn unsaturation_locant(mol: &Molecule, carbons: &[AtomIdx], order: BondOrder) -> usize {
1319    let chain = find_longest_c_chain(mol, carbons);
1320    let n = chain.len();
1321    for (_, b) in mol.bonds() {
1322        if b.order == order {
1323            if let (Some(p1), Some(p2)) = (
1324                chain.iter().position(|&c| c == b.atom1),
1325                chain.iter().position(|&c| c == b.atom2),
1326            ) {
1327                let fwd = p1.min(p2) + 1;  // 1-based lower position in forward direction
1328                let rev = n - p1.max(p2);  // 1-based lower position in reversed direction
1329                return fwd.min(rev);
1330            }
1331        }
1332    }
1333    1
1334}
1335
1336/// Return ring atoms in cyclic traversal order.
1337fn ring_order_traversal(mol: &Molecule, ring_atoms: &HashSet<AtomIdx>) -> Vec<AtomIdx> {
1338    if ring_atoms.is_empty() { return Vec::new(); }
1339    let start = *ring_atoms.iter().next().unwrap();
1340    let mut order = vec![start];
1341    let first_nb = mol.neighbors(start).find(|(nb, _)| ring_atoms.contains(nb)).map(|(nb, _)| nb);
1342    let mut cur = match first_nb { Some(nb) => nb, None => return order };
1343    let mut prev = start;
1344    while cur != start {
1345        order.push(cur);
1346        let next = mol.neighbors(cur)
1347            .find(|(nb, _)| ring_atoms.contains(nb) && *nb != prev)
1348            .map(|(nb, _)| nb);
1349        prev = cur;
1350        match next { Some(nb) => cur = nb, None => break }
1351    }
1352    order
1353}
1354
1355/// Find the minimum IUPAC locant assignment for `attach_points` on a ring.
1356/// Returns sorted `(locant, attach_atom)` pairs.
1357fn best_benzene_locants(
1358    mol: &Molecule,
1359    ring_atoms: &HashSet<AtomIdx>,
1360    attach_points: &[AtomIdx],
1361) -> Vec<(usize, AtomIdx)> {
1362    let ring_order = ring_order_traversal(mol, ring_atoms);
1363    let ring_n = ring_order.len();
1364    if ring_n == 0 { return Vec::new(); }
1365    let n = attach_points.len();
1366    let pos_of: Vec<usize> = attach_points.iter()
1367        .map(|a| ring_order.iter().position(|r| r == a).unwrap_or(0))
1368        .collect();
1369    let mut best_locs: Option<Vec<usize>> = None;
1370    let mut best_assignment: Vec<(usize, AtomIdx)> = Vec::new();
1371    for start in 0..n {
1372        for &reverse in &[false, true] {
1373            let mut assignment: Vec<(usize, AtomIdx)> = Vec::new();
1374            for k in 0..n {
1375                let idx = (start + k) % n;
1376                let pos = if !reverse {
1377                    (pos_of[idx] + ring_n - pos_of[start]) % ring_n
1378                } else {
1379                    (pos_of[start] + ring_n - pos_of[idx]) % ring_n
1380                };
1381                assignment.push((pos + 1, attach_points[idx]));
1382            }
1383            assignment.sort_by_key(|&(l, _)| l);
1384            let locs: Vec<usize> = assignment.iter().map(|&(l, _)| l).collect();
1385            let is_better = best_locs.as_ref().map_or(true, |b| locs < *b);
1386            if is_better {
1387                best_locs = Some(locs);
1388                best_assignment = assignment;
1389            }
1390        }
1391    }
1392    best_assignment
1393}
1394
1395fn count_components(mol: &Molecule) -> usize {
1396    let n = mol.atom_count();
1397    if n == 0 { return 0; }
1398    let mut visited = vec![false; n];
1399    let mut count = 0;
1400    for start in 0..n {
1401        if visited[start] { continue; }
1402        count += 1;
1403        let mut queue = VecDeque::new();
1404        queue.push_back(AtomIdx(start as u32));
1405        visited[start] = true;
1406        while let Some(cur) = queue.pop_front() {
1407            for (nb, _) in mol.neighbors(cur) {
1408                if !visited[nb.0 as usize] {
1409                    visited[nb.0 as usize] = true;
1410                    queue.push_back(nb);
1411                }
1412            }
1413        }
1414    }
1415    count
1416}
1417
1418// ---------------------------------------------------------------------------
1419// Naming helpers
1420// ---------------------------------------------------------------------------
1421
1422fn alkane_stem(n: usize) -> &'static str {
1423    match n {
1424        1 => "meth", 2 => "eth",  3 => "prop", 4 => "but",
1425        5 => "pent", 6 => "hex",  7 => "hept", 8 => "oct",
1426        9 => "non",  10 => "dec", _ => "long",
1427    }
1428}
1429
1430/// Stem with "an" appended — base for most suffix compounds.
1431fn alkane_base(n: usize) -> String {
1432    format!("{}an", alkane_stem(n))
1433}
1434
1435fn alkane_suffix(n: usize) -> String {
1436    match n {
1437        1  => "methane".into(),   2  => "ethane".into(),
1438        3  => "propane".into(),   4  => "butane".into(),
1439        5  => "pentane".into(),   6  => "hexane".into(),
1440        7  => "heptane".into(),   8  => "octane".into(),
1441        9  => "nonane".into(),    10 => "decane".into(),
1442        11 => "undecane".into(),  12 => "dodecane".into(),
1443        13 => "tridecane".into(), 14 => "tetradecane".into(),
1444        15 => "pentadecane".into(), 16 => "hexadecane".into(),
1445        17 => "heptadecane".into(), 18 => "octadecane".into(),
1446        19 => "nonadecane".into(), 20 => "icosane".into(),
1447        _  => format!("{n}alkane"),
1448    }
1449}
1450
1451fn alkene_suffix(n: usize) -> String { alkane_suffix(n).replace("ane", "ene") }
1452fn alkyne_suffix(n: usize) -> String { alkane_suffix(n).replace("ane", "yne") }
1453
1454// ---------------------------------------------------------------------------
1455// Tests
1456// ---------------------------------------------------------------------------
1457
1458#[cfg(test)]
1459mod tests {
1460    use super::*;
1461    use chematic_smiles::parse;
1462
1463    fn mol(s: &str) -> Molecule { parse(s).unwrap() }
1464
1465    // --- Existing tests (must remain green) ---------------------------------
1466
1467    #[test]
1468    fn test_alkanes() {
1469        assert_eq!(name(&mol("C")).unwrap(),      "methane");
1470        assert_eq!(name(&mol("CC")).unwrap(),     "ethane");
1471        assert_eq!(name(&mol("CCC")).unwrap(),    "propane");
1472        assert_eq!(name(&mol("CCCC")).unwrap(),   "butane");
1473        assert_eq!(name(&mol("CCCCC")).unwrap(),  "pentane");
1474        assert_eq!(name(&mol("CCCCCC")).unwrap(), "hexane");
1475    }
1476
1477    #[test]
1478    fn test_alkenes_alkynes() {
1479        assert_eq!(name(&mol("C=C")).unwrap(),   "ethene");
1480        assert_eq!(name(&mol("CC=C")).unwrap(),  "propene");
1481        assert_eq!(name(&mol("C#C")).unwrap(),   "ethyne");
1482        assert_eq!(name(&mol("CC#C")).unwrap(),  "propyne");
1483    }
1484
1485    #[test]
1486    fn test_cycloalkanes() {
1487        assert_eq!(name(&mol("C1CC1")).unwrap(),   "cyclopropane");
1488        assert_eq!(name(&mol("C1CCC1")).unwrap(),  "cyclobutane");
1489        assert_eq!(name(&mol("C1CCCC1")).unwrap(), "cyclopentane");
1490        assert_eq!(name(&mol("C1CCCCC1")).unwrap(),"cyclohexane");
1491    }
1492
1493    #[test]
1494    fn test_alcohol() {
1495        assert_eq!(name(&mol("CO")).unwrap(),   "methanol");
1496        assert_eq!(name(&mol("CCO")).unwrap(),  "ethanol");
1497        assert_eq!(name(&mol("CCCO")).unwrap(), "propan-1-ol");
1498    }
1499
1500    #[test]
1501    fn test_amine() {
1502        assert_eq!(name(&mol("CN")).unwrap(),  "methan-1-amine");
1503        assert_eq!(name(&mol("CCN")).unwrap(), "ethan-1-amine");
1504    }
1505
1506    #[test]
1507    fn test_haloalkane() {
1508        assert_eq!(name(&mol("CCCl")).unwrap(), "chloroethane");
1509        assert_eq!(name(&mol("CCBr")).unwrap(), "bromoethane");
1510        assert_eq!(name(&mol("CF")).unwrap(),   "fluoromethane");
1511        assert_eq!(name(&mol("CI")).unwrap(),   "iodomethane");
1512    }
1513
1514    #[test]
1515    fn test_not_supported() {
1516        assert!(name(&mol("CC.CC")).is_err());  // disconnected
1517    }
1518
1519    #[test]
1520    fn test_empty() {
1521        use chematic_core::MoleculeBuilder;
1522        let mol = MoleculeBuilder::new().build();
1523        assert_eq!(name(&mol), Err(IupacError::Empty));
1524    }
1525
1526    // --- New: benzene & aromatic heterocycles --------------------------------
1527
1528    #[test]
1529    fn test_benzene() {
1530        assert_eq!(name(&mol("c1ccccc1")).unwrap(), "benzene");
1531    }
1532
1533    #[test]
1534    fn test_aromatic_heterocycles() {
1535        assert_eq!(name(&mol("c1ccncc1")).unwrap(),   "pyridine");
1536        assert_eq!(name(&mol("c1ccoc1")).unwrap(),    "furan");
1537        assert_eq!(name(&mol("c1ccsc1")).unwrap(),    "thiophene");
1538        assert_eq!(name(&mol("c1cc[nH]c1")).unwrap(), "pyrrole");
1539        assert_eq!(name(&mol("c1cnc[nH]1")).unwrap(), "imidazole");
1540    }
1541
1542    // --- New: ketones with position locant -----------------------------------
1543
1544    #[test]
1545    fn test_ketones() {
1546        assert_eq!(name(&mol("CC(=O)C")).unwrap(),    "propan-2-one");
1547        assert_eq!(name(&mol("CC(=O)CC")).unwrap(),   "butan-2-one");
1548        assert_eq!(name(&mol("CCC(=O)CC")).unwrap(),  "pentan-3-one");
1549        assert_eq!(name(&mol("CCCC(=O)C")).unwrap(),  "pentan-2-one");
1550    }
1551
1552    // --- New: carboxylic acids -----------------------------------------------
1553
1554    #[test]
1555    fn test_carboxylic_acids() {
1556        assert_eq!(name(&mol("CC(=O)O")).unwrap(),  "ethanoic acid");
1557        assert_eq!(name(&mol("CCC(=O)O")).unwrap(), "propanoic acid");
1558        assert_eq!(name(&mol("C(=O)O")).unwrap(),   "methanoic acid");
1559    }
1560
1561    // --- New: esters ---------------------------------------------------------
1562
1563    #[test]
1564    fn test_esters() {
1565        assert_eq!(name(&mol("CC(=O)OC")).unwrap(),  "methyl ethanoate");
1566        assert_eq!(name(&mol("C(=O)OC")).unwrap(),   "methyl methanoate");
1567        assert_eq!(name(&mol("CC(=O)OCC")).unwrap(), "ethyl ethanoate");
1568    }
1569
1570    // --- New: amides ---------------------------------------------------------
1571
1572    #[test]
1573    fn test_amides() {
1574        assert_eq!(name(&mol("CC(=O)N")).unwrap(),   "ethanamide");
1575        assert_eq!(name(&mol("C(=O)N")).unwrap(),    "methanamide");
1576        assert_eq!(name(&mol("CCC(=O)N")).unwrap(),  "propanamide");
1577    }
1578
1579    // ---- New: branched alkanes (v0.1.101) ------------------------------------
1580
1581    #[test]
1582    fn test_branched_alkanes() {
1583        assert_eq!(name(&mol("CC(C)C")).unwrap(),    "2-methylpropane");
1584        assert_eq!(name(&mol("CC(C)CC")).unwrap(),   "2-methylbutane");
1585        assert_eq!(name(&mol("CC(C)(C)C")).unwrap(), "2,2-dimethylpropane");
1586        assert_eq!(name(&mol("CCCC(C)CC")).unwrap(), "3-methylhexane");
1587    }
1588
1589    #[test]
1590    fn test_branched_alkane_lowest_locant() {
1591        // CCC(C)C = 2-methylbutane (not 3-methylbutane — lower locant wins).
1592        assert_eq!(name(&mol("CCC(C)C")).unwrap(), "2-methylbutane");
1593    }
1594
1595    // ---- New: substituted benzenes (v0.1.101) --------------------------------
1596
1597    #[test]
1598    fn test_substituted_benzenes() {
1599        assert_eq!(name(&mol("c1ccccc1O")).unwrap(),     "phenol");
1600        assert_eq!(name(&mol("c1ccccc1N")).unwrap(),     "aniline");
1601        assert_eq!(name(&mol("c1ccccc1Cl")).unwrap(),    "chlorobenzene");
1602        assert_eq!(name(&mol("c1ccccc1Br")).unwrap(),    "bromobenzene");
1603    }
1604
1605    #[test]
1606    fn test_substituted_benzene_carbonyl() {
1607        assert_eq!(name(&mol("c1ccccc1C=O")).unwrap(),        "benzaldehyde");
1608        assert_eq!(name(&mol("c1ccccc1C(=O)O")).unwrap(),     "benzoic acid");
1609    }
1610
1611    // ---- New: nitriles (v0.1.101) -------------------------------------------
1612
1613    #[test]
1614    fn test_nitriles() {
1615        assert_eq!(name(&mol("CC#N")).unwrap(),  "ethanenitrile");
1616        assert_eq!(name(&mol("CCC#N")).unwrap(), "propanenitrile");
1617    }
1618
1619    // ---- New Round 2 tests (v0.1.102) ---------------------------------------
1620
1621    #[test]
1622    fn test_thiols() {
1623        assert_eq!(name(&mol("CS")).unwrap(),   "methanethiol");
1624        assert_eq!(name(&mol("CCS")).unwrap(),  "ethanethiol");
1625        assert_eq!(name(&mol("CCCS")).unwrap(), "propanethiol");
1626    }
1627
1628    #[test]
1629    fn test_alcohol_locants() {
1630        assert_eq!(name(&mol("CCCCO")).unwrap(),  "butan-1-ol");
1631        assert_eq!(name(&mol("CC(O)C")).unwrap(), "propan-2-ol");
1632        assert_eq!(name(&mol("CCC(O)C")).unwrap(), "butan-2-ol");
1633    }
1634
1635    #[test]
1636    fn test_disubstituted_benzene() {
1637        // Para-chlorophenol: OH and Cl are 3 bonds apart in the ring (positions 1 and 4).
1638        assert_eq!(name(&mol("Oc1ccc(Cl)cc1")).unwrap(), "4-chlorophenol");
1639        // Meta-chlorophenol: OH and Cl are 2 bonds apart (positions 1 and 3).
1640        assert_eq!(name(&mol("c1ccc(O)cc1Cl")).unwrap(), "3-chlorophenol");
1641    }
1642
1643    #[test]
1644    fn test_methylcycloalkane() {
1645        assert_eq!(name(&mol("CC1CCCCC1")).unwrap(), "methylcyclohexane");
1646        assert_eq!(name(&mol("CC1CCCC1")).unwrap(),  "methylcyclopentane");
1647        assert_eq!(name(&mol("CC1CCC1")).unwrap(),   "methylcyclobutane");
1648    }
1649
1650    // ---- New Round 3 tests (v0.1.103) ----------------------------------------
1651
1652    #[test]
1653    fn test_ethers() {
1654        assert_eq!(name(&mol("COC")).unwrap(),    "methoxymethane");
1655        assert_eq!(name(&mol("COCC")).unwrap(),   "methoxyethane");
1656        assert_eq!(name(&mol("CCOCC")).unwrap(),  "ethoxyethane");
1657        assert_eq!(name(&mol("COCCC")).unwrap(),  "1-methoxypropane");
1658    }
1659
1660    #[test]
1661    fn test_trimethylbenzene() {
1662        assert_eq!(name(&mol("Cc1cccc(C)c1C")).unwrap(),   "1,2,3-trimethylbenzene");
1663        assert_eq!(name(&mol("Cc1ccc(C)cc1C")).unwrap(),   "1,2,4-trimethylbenzene");
1664        assert_eq!(name(&mol("Cc1cc(C)cc(C)c1")).unwrap(), "1,3,5-trimethylbenzene");
1665    }
1666
1667    #[test]
1668    fn test_secondary_amine() {
1669        assert_eq!(name(&mol("CCNCC")).unwrap(),  "N-ethylethanamine");
1670        assert_eq!(name(&mol("CNCC")).unwrap(),   "N-methylethanamine");
1671        assert_eq!(name(&mol("CN(C)C")).unwrap(), "N,N-dimethylmethanamine");
1672    }
1673
1674    // ---- New Round 9 tests (v0.1.109) ----------------------------------------
1675
1676    #[test]
1677    fn test_branched_aldehyde() {
1678        assert_eq!(name(&mol("CC(C)C=O")).unwrap(),  "2-methylpropanal");
1679        assert_eq!(name(&mol("CCC(C)C=O")).unwrap(), "2-methylbutanal");
1680    }
1681
1682    #[test]
1683    fn test_branched_amide() {
1684        assert_eq!(name(&mol("CC(C)C(=O)N")).unwrap(),  "2-methylpropanamide");
1685        assert_eq!(name(&mol("CCC(C)C(=O)N")).unwrap(), "2-methylbutanamide");
1686    }
1687
1688    // ---- New Round 8 tests (v0.1.108) ----------------------------------------
1689
1690    #[test]
1691    fn test_branched_ester() {
1692        assert_eq!(name(&mol("CC(C)C(=O)OC")).unwrap(),  "methyl 2-methylpropanoate");
1693        assert_eq!(name(&mol("CC(C)C(=O)OCC")).unwrap(), "ethyl 2-methylpropanoate");
1694    }
1695
1696    #[test]
1697    fn test_branched_ketone() {
1698        assert_eq!(name(&mol("CC(=O)C(C)C")).unwrap(),    "3-methylbutan-2-one");
1699        assert_eq!(name(&mol("CC(=O)C(C)(C)C")).unwrap(), "3,3-dimethylbutan-2-one");
1700    }
1701
1702    // ---- New Round 7 tests (v0.1.107) ----------------------------------------
1703
1704    #[test]
1705    fn test_secondary_thiol() {
1706        assert_eq!(name(&mol("CCC(S)C")).unwrap(),  "butane-2-thiol");
1707        assert_eq!(name(&mol("CCCC(S)C")).unwrap(), "pentane-2-thiol");
1708    }
1709
1710    #[test]
1711    fn test_branched_carboxylic_acid() {
1712        assert_eq!(name(&mol("CC(C)C(=O)O")).unwrap(),    "2-methylpropanoic acid");
1713        assert_eq!(name(&mol("CCC(C)C(=O)O")).unwrap(),   "2-methylbutanoic acid");
1714        assert_eq!(name(&mol("CC(C)(C)C(=O)O")).unwrap(), "2,2-dimethylpropanoic acid");
1715    }
1716
1717    // ---- New Round 6 tests (v0.1.106) ----------------------------------------
1718
1719    #[test]
1720    fn test_alkene_locants() {
1721        assert_eq!(name(&mol("CC=CC")).unwrap(),   "but-2-ene");
1722        assert_eq!(name(&mol("C=CCC")).unwrap(),   "but-1-ene");
1723        assert_eq!(name(&mol("CC=CCC")).unwrap(),  "pent-2-ene");
1724        assert_eq!(name(&mol("C=CCCC")).unwrap(),  "pent-1-ene");
1725    }
1726
1727    #[test]
1728    fn test_alkyne_locants() {
1729        assert_eq!(name(&mol("CC#CC")).unwrap(),  "but-2-yne");
1730        assert_eq!(name(&mol("C#CCC")).unwrap(),  "but-1-yne");
1731    }
1732
1733    #[test]
1734    fn test_amine_locants() {
1735        assert_eq!(name(&mol("CCCN")).unwrap(),     "propan-1-amine");
1736        assert_eq!(name(&mol("CCC(N)C")).unwrap(),  "butan-2-amine");
1737        assert_eq!(name(&mol("CC(N)CCC")).unwrap(), "pentan-2-amine");
1738    }
1739
1740    // ---- New Round 5 tests (v0.1.105) ----------------------------------------
1741
1742    #[test]
1743    fn test_haloalkane_locants() {
1744        // n=3: terminal → "1-chloropropane"
1745        assert_eq!(name(&mol("CCCCl")).unwrap(),   "1-chloropropane");
1746        // n=4: terminal → "1-chlorobutane"
1747        assert_eq!(name(&mol("CCCCCl")).unwrap(),  "1-chlorobutane");
1748        // n=4: internal → "2-chlorobutane"
1749        assert_eq!(name(&mol("CCC(Cl)C")).unwrap(), "2-chlorobutane");
1750        // n=5: internal → "2-chloropentane"
1751        assert_eq!(name(&mol("CCCC(Cl)C")).unwrap(), "2-chloropentane");
1752        // di-halo: ClCCCl = 2C → "1,2-dichloroethane"; ClCCCCl = 3C → "1,3-dichloropropane"
1753        assert_eq!(name(&mol("ClCCCl")).unwrap(),   "1,2-dichloroethane");
1754        assert_eq!(name(&mol("ClCCCCl")).unwrap(),  "1,3-dichloropropane");
1755    }
1756
1757    #[test]
1758    fn test_cycloalkanol() {
1759        assert_eq!(name(&mol("OC1CCC1")).unwrap(),   "cyclobutanol");
1760        assert_eq!(name(&mol("OC1CCCC1")).unwrap(),  "cyclopentanol");
1761        assert_eq!(name(&mol("OC1CCCCC1")).unwrap(), "cyclohexanol");
1762    }
1763
1764    // ---- New Round 4 tests (v0.1.104) ----------------------------------------
1765
1766    #[test]
1767    fn test_disubstituted_benzene_non_principal() {
1768        // Two halogens (para)
1769        assert_eq!(name(&mol("Clc1ccc(Br)cc1")).unwrap(), "1-bromo-4-chlorobenzene");
1770        assert_eq!(name(&mol("Clc1ccc(F)cc1")).unwrap(),  "1-chloro-4-fluorobenzene");
1771        // Two methyls: ortho (Cc1ccccc1C) and para (Cc1ccc(C)cc1)
1772        assert_eq!(name(&mol("Cc1ccccc1C")).unwrap(),     "1,2-dimethylbenzene");
1773        assert_eq!(name(&mol("Cc1ccc(C)cc1")).unwrap(),   "1,4-dimethylbenzene");
1774        // Methyl + halogen (para)
1775        assert_eq!(name(&mol("Cc1ccc(Cl)cc1")).unwrap(),  "1-chloro-4-methylbenzene");
1776    }
1777
1778    #[test]
1779    fn test_propyl_substituent() {
1780        // 11C: longest chain = octane (8C), propyl substituent at C4
1781        assert_eq!(name(&mol("CCCC(CCC)CCCC")).unwrap(), "4-propyloctane");
1782    }
1783
1784    #[test]
1785    fn test_dimethylcycloalkane() {
1786        assert_eq!(name(&mol("CC1CCC(C)CC1")).unwrap(), "1,4-dimethylcyclohexane");
1787        assert_eq!(name(&mol("CC1CCCC1C")).unwrap(),    "1,2-dimethylcyclopentane");
1788        assert_eq!(name(&mol("CC1CCC(C)C1")).unwrap(),  "1,3-dimethylcyclopentane");
1789    }
1790}