1pub mod formula;
2pub mod scanner;
3pub mod smiles;
4
5mod alkane;
6mod locant;
7mod substituent;
8mod suffix;
9
10use crate::error::ResolveError;
11use suffix::{Suffix, SuffixGroup};
12use substituent::parse_substituents;
13
14#[derive(Debug, Clone, PartialEq)]
17pub enum Element {
18 C,
19 H,
20 O,
21 N,
22 S,
23 P,
24 F,
25 Cl,
26 Br,
27 I,
28}
29
30impl Element {
31 pub fn symbol(&self) -> &'static str {
32 match self {
33 Element::C => "C",
34 Element::H => "H",
35 Element::O => "O",
36 Element::N => "N",
37 Element::S => "S",
38 Element::P => "P",
39 Element::F => "F",
40 Element::Cl => "Cl",
41 Element::Br => "Br",
42 Element::I => "I",
43 }
44 }
45
46 pub fn valence(&self) -> u8 {
47 match self {
48 Element::C => 4,
49 Element::N => 3,
50 Element::O | Element::S => 2,
51 Element::F | Element::Cl | Element::Br | Element::I | Element::H | Element::P => 1,
52 }
53 }
54}
55
56#[derive(Debug, Clone, PartialEq)]
57pub enum BondOrder {
58 Single,
59 Double,
60 Triple,
61}
62
63impl BondOrder {
64 pub fn degree(&self) -> u8 {
65 match self {
66 BondOrder::Single => 1,
67 BondOrder::Double => 2,
68 BondOrder::Triple => 3,
69 }
70 }
71}
72
73#[derive(Debug, Clone)]
74pub struct Atom {
75 pub element: Element,
76 pub charge: i8,
77 pub implicit_h: u8,
78}
79
80#[derive(Debug, Clone)]
81pub struct Bond {
82 pub to: usize,
83 pub order: BondOrder,
84}
85
86#[derive(Debug, Clone, Default)]
88pub struct MolGraph {
89 pub atoms: Vec<Atom>,
90 pub bonds: Vec<Vec<Bond>>,
91}
92
93impl MolGraph {
94 fn add_atom(&mut self, element: Element) -> usize {
95 let idx = self.atoms.len();
96 self.atoms.push(Atom { element, charge: 0, implicit_h: 0 });
97 self.bonds.push(Vec::new());
98 idx
99 }
100
101 fn add_bond(&mut self, a: usize, b: usize, order: BondOrder) {
102 self.bonds[a].push(Bond { to: b, order: order.clone() });
103 self.bonds[b].push(Bond { to: a, order });
104 }
105
106 fn fill_implicit_h(&mut self) {
107 for i in 0..self.atoms.len() {
108 let used: u8 = self.bonds[i].iter().map(|b| b.order.degree()).sum();
109 let valence = self.atoms[i].element.valence();
110 self.atoms[i].implicit_h = valence.saturating_sub(used);
111 }
112 }
113}
114
115pub fn parse_iupac(name: &str) -> Result<MolGraph, ResolveError> {
120 let name = name.trim().strip_prefix("n-").unwrap_or(name.trim());
122
123 let (is_cyclic, name) = if let Some(rest) = name.strip_prefix("cyclo") {
125 (true, rest)
126 } else {
127 (false, name)
128 };
129
130 let (substituents, rest) = parse_substituents(name);
132
133 let (prefix_suffix_locants, rest) = extract_prefix_suffix_locants(rest);
136
137 let (chain_len, rest) = alkane::parse_stem(rest).ok_or_else(|| ResolveError::ParseError {
139 pos: 0,
140 msg: format!("unrecognized chain stem in: {name:?}"),
141 })?;
142
143 let (suffix_groups, remaining) =
145 parse_suffix_groups(rest).map_err(|_| ResolveError::ParseError {
146 pos: name.len() - rest.len(),
147 msg: format!("unrecognized suffix in: {name:?}"),
148 })?;
149
150 if !remaining.is_empty() {
151 return Err(ResolveError::ParseError {
152 pos: name.len() - remaining.len(),
153 msg: format!("unexpected trailing text: {remaining:?}"),
154 });
155 }
156
157 build_graph(chain_len, &suffix_groups, &prefix_suffix_locants, &substituents, name, is_cyclic)
158}
159
160fn extract_prefix_suffix_locants<'a>(input: &'a str) -> (Vec<u8>, &'a str) {
162 if let Some((locs, rest)) = locant::parse_locant_list(input) {
163 if alkane::parse_stem(rest).is_some() {
164 return (locs, rest);
165 }
166 }
167 (vec![], input)
168}
169
170fn parse_suffix_groups(input: &str) -> Result<(Vec<SuffixGroup>, &str), ()> {
172 let mut groups = Vec::new();
173 let mut rest = input;
174
175 loop {
176 if let Some(r) = rest.strip_prefix("an") {
181 let r = strip_elision_e(r);
182 if let Some((sg, r2)) = suffix::parse_suffix(r) {
183 groups.push(sg);
184 rest = r2;
185 if rest.is_empty() {
186 break;
187 }
188 continue;
189 }
190 }
191
192 if let Some((sg, r)) = suffix::parse_suffix(rest) {
194 groups.push(sg);
195 rest = r;
196 if rest.is_empty() {
197 break;
198 }
199 continue;
200 }
201
202 break;
203 }
204
205 if groups.is_empty() {
206 Err(())
207 } else {
208 Ok((groups, rest))
209 }
210}
211
212fn strip_elision_e(input: &str) -> &str {
215 if input.starts_with('e')
216 && !input.starts_with("ene")
217 && !input.starts_with("en-")
218 {
219 &input[1..]
220 } else {
221 input
222 }
223}
224
225fn build_graph(
228 chain_len: u8,
229 suffix_groups: &[SuffixGroup],
230 prefix_suffix_locants: &[u8],
231 substituents: &[substituent::Substituent],
232 name: &str,
233 is_cyclic: bool,
234) -> Result<MolGraph, ResolveError> {
235 let mut g = MolGraph::default();
236
237 let carbon_indices: Vec<usize> =
239 (0..chain_len as usize).map(|_| g.add_atom(Element::C)).collect();
240
241 for i in 0..carbon_indices.len().saturating_sub(1) {
242 g.add_bond(carbon_indices[i], carbon_indices[i + 1], BondOrder::Single);
243 }
244
245 if is_cyclic {
247 let n = carbon_indices.len();
248 if n >= 3 {
249 g.add_bond(carbon_indices[0], carbon_indices[n - 1], BondOrder::Single);
250 }
251 }
252
253 for (sg_idx, sg) in suffix_groups.iter().enumerate() {
255 let effective_locants: Vec<u8> = if sg.locants.is_empty() && sg_idx == 0 && !prefix_suffix_locants.is_empty() {
257 prefix_suffix_locants.to_vec()
258 } else {
259 sg.locants.clone()
260 };
261 apply_suffix(&mut g, &carbon_indices, sg, &effective_locants, name)?;
262 }
263
264 for sub in substituents {
266 apply_substituent(&mut g, &carbon_indices, sub, name)?;
267 }
268
269 g.fill_implicit_h();
270 Ok(g)
271}
272
273fn apply_suffix(
274 g: &mut MolGraph,
275 carbons: &[usize],
276 sg: &SuffixGroup,
277 effective_locants: &[u8],
278 name: &str,
279) -> Result<(), ResolveError> {
280 let count = sg.multiplier.as_ref().map(|m| m.count()).unwrap_or(1) as usize;
281
282 let locants_0: Vec<usize> = if effective_locants.is_empty() {
284 match sg.suffix {
285 Suffix::Ane => vec![],
286 Suffix::Ene | Suffix::Yne => (0..count).map(|i| i * 2).collect(),
287 Suffix::Ol => (0..count).map(|i| i).collect(),
288 Suffix::One => {
289 if carbons.len() >= 3 {
290 (0..count).map(|i| i + 1).collect()
291 } else {
292 vec![0]
293 }
294 }
295 Suffix::Al | Suffix::OicAcid => {
296 if count == 1 {
297 vec![0]
298 } else {
299 vec![0, carbons.len() - 1]
301 }
302 }
303 Suffix::Amine | Suffix::Thiol | Suffix::Nitrile | Suffix::Amide => vec![],
305 }
306 } else {
307 effective_locants.iter().map(|&l| l as usize - 1).collect()
308 };
309
310 match sg.suffix {
311 Suffix::Ane => {}
312 Suffix::Ene => {
313 for &ci in &locants_0 {
314 validate_bond_locant(ci, carbons.len(), "ene", name)?;
315 upgrade_bond(g, carbons[ci], carbons[ci + 1], BondOrder::Double);
316 }
317 }
318 Suffix::Yne => {
319 for &ci in &locants_0 {
320 validate_bond_locant(ci, carbons.len(), "yne", name)?;
321 upgrade_bond(g, carbons[ci], carbons[ci + 1], BondOrder::Triple);
322 }
323 }
324 Suffix::Ol => {
325 let indices: Vec<usize> = if locants_0.is_empty() {
326 vec![0]
327 } else {
328 locants_0
329 };
330 for ci in indices {
331 validate_atom_locant(ci, carbons.len(), "ol", name)?;
332 let oidx = g.add_atom(Element::O);
333 g.add_bond(carbons[ci], oidx, BondOrder::Single);
334 }
335 }
336 Suffix::One => {
337 for &ci in &locants_0 {
338 validate_atom_locant(ci, carbons.len(), "one", name)?;
339 let oidx = g.add_atom(Element::O);
340 g.add_bond(carbons[ci], oidx, BondOrder::Double);
341 }
342 }
343 Suffix::Al => {
344 let oidx = g.add_atom(Element::O);
345 g.add_bond(carbons[0], oidx, BondOrder::Double);
346 }
347 Suffix::OicAcid => {
348 let positions: Vec<usize> = if locants_0.is_empty() {
349 if count == 1 {
350 vec![0]
351 } else {
352 vec![0, carbons.len() - 1]
353 }
354 } else {
355 locants_0
356 };
357 for ci in positions {
358 validate_atom_locant(ci, carbons.len(), "oic acid", name)?;
359 let oidx = g.add_atom(Element::O);
360 let ohidx = g.add_atom(Element::O);
361 g.add_bond(carbons[ci], oidx, BondOrder::Double);
362 g.add_bond(carbons[ci], ohidx, BondOrder::Single);
363 }
364 }
365 Suffix::Amine => {
366 let indices: Vec<usize> = if locants_0.is_empty() {
367 vec![carbons.len() - 1]
368 } else {
369 locants_0
370 };
371 for ci in indices {
372 validate_atom_locant(ci, carbons.len(), "amine", name)?;
373 let nidx = g.add_atom(Element::N);
374 g.add_bond(carbons[ci], nidx, BondOrder::Single);
375 }
376 }
377 Suffix::Thiol => {
378 let indices: Vec<usize> = if locants_0.is_empty() {
379 vec![carbons.len() - 1]
380 } else {
381 locants_0
382 };
383 for ci in indices {
384 validate_atom_locant(ci, carbons.len(), "thiol", name)?;
385 let sidx = g.add_atom(Element::S);
386 g.add_bond(carbons[ci], sidx, BondOrder::Single);
387 }
388 }
389 Suffix::Nitrile => {
390 let cidx = carbons[0];
392 let nidx = g.add_atom(Element::N);
393 g.add_bond(cidx, nidx, BondOrder::Triple);
394 }
395 Suffix::Amide => {
396 let ci = if locants_0.is_empty() { 0 } else { locants_0[0] };
399 validate_atom_locant(ci, carbons.len(), "amide", name)?;
400 let nidx = g.add_atom(Element::N);
401 let oidx = g.add_atom(Element::O);
402 g.add_bond(carbons[ci], nidx, BondOrder::Single);
403 g.add_bond(carbons[ci], oidx, BondOrder::Double);
404 }
405 }
406 Ok(())
407}
408
409fn apply_substituent(
410 g: &mut MolGraph,
411 carbons: &[usize],
412 sub: &substituent::Substituent,
413 name: &str,
414) -> Result<(), ResolveError> {
415 use substituent::SubstituentKind;
416 for &loc in &sub.locants {
417 let ci = loc as usize - 1;
418 validate_atom_locant(ci, carbons.len(), "substituent", name)?;
419 let cidx = carbons[ci];
420 match &sub.kind {
421 SubstituentKind::Oxo => {
422 let oidx = g.add_atom(Element::O);
423 g.add_bond(cidx, oidx, BondOrder::Double);
424 }
425 SubstituentKind::Hydroxy => {
426 let oidx = g.add_atom(Element::O);
427 g.add_bond(cidx, oidx, BondOrder::Single);
428 }
429 SubstituentKind::Chloro => {
430 let x = g.add_atom(Element::Cl);
431 g.add_bond(cidx, x, BondOrder::Single);
432 }
433 SubstituentKind::Bromo => {
434 let x = g.add_atom(Element::Br);
435 g.add_bond(cidx, x, BondOrder::Single);
436 }
437 SubstituentKind::Fluoro => {
438 let x = g.add_atom(Element::F);
439 g.add_bond(cidx, x, BondOrder::Single);
440 }
441 SubstituentKind::Iodo => {
442 let x = g.add_atom(Element::I);
443 g.add_bond(cidx, x, BondOrder::Single);
444 }
445 SubstituentKind::Methyl => {
446 let m = g.add_atom(Element::C);
447 g.add_bond(cidx, m, BondOrder::Single);
448 }
449 SubstituentKind::Ethyl => {
450 let m1 = g.add_atom(Element::C);
451 let m2 = g.add_atom(Element::C);
452 g.add_bond(cidx, m1, BondOrder::Single);
453 g.add_bond(m1, m2, BondOrder::Single);
454 }
455 SubstituentKind::Propyl
456 | SubstituentKind::Butyl
457 | SubstituentKind::Pentyl
458 | SubstituentKind::Hexyl => {
459 let chain_len = match &sub.kind {
460 SubstituentKind::Propyl => 3,
461 SubstituentKind::Butyl => 4,
462 SubstituentKind::Pentyl => 5,
463 SubstituentKind::Hexyl => 6,
464 _ => unreachable!(),
465 };
466 let mut prev = cidx;
467 for _ in 0..chain_len {
468 let m = g.add_atom(Element::C);
469 g.add_bond(prev, m, BondOrder::Single);
470 prev = m;
471 }
472 }
473 SubstituentKind::Isopropyl => {
475 let branch = g.add_atom(Element::C);
476 let me1 = g.add_atom(Element::C);
477 let me2 = g.add_atom(Element::C);
478 g.add_bond(cidx, branch, BondOrder::Single);
479 g.add_bond(branch, me1, BondOrder::Single);
480 g.add_bond(branch, me2, BondOrder::Single);
481 }
482 SubstituentKind::TertButyl => {
484 let branch = g.add_atom(Element::C);
485 let me1 = g.add_atom(Element::C);
486 let me2 = g.add_atom(Element::C);
487 let me3 = g.add_atom(Element::C);
488 g.add_bond(cidx, branch, BondOrder::Single);
489 g.add_bond(branch, me1, BondOrder::Single);
490 g.add_bond(branch, me2, BondOrder::Single);
491 g.add_bond(branch, me3, BondOrder::Single);
492 }
493 SubstituentKind::SecButyl => {
495 let branch = g.add_atom(Element::C);
496 let me = g.add_atom(Element::C);
497 let et1 = g.add_atom(Element::C);
498 let et2 = g.add_atom(Element::C);
499 g.add_bond(cidx, branch, BondOrder::Single);
500 g.add_bond(branch, me, BondOrder::Single);
501 g.add_bond(branch, et1, BondOrder::Single);
502 g.add_bond(et1, et2, BondOrder::Single);
503 }
504 SubstituentKind::IsoButyl => {
506 let ch2 = g.add_atom(Element::C);
507 let branch = g.add_atom(Element::C);
508 let me1 = g.add_atom(Element::C);
509 let me2 = g.add_atom(Element::C);
510 g.add_bond(cidx, ch2, BondOrder::Single);
511 g.add_bond(ch2, branch, BondOrder::Single);
512 g.add_bond(branch, me1, BondOrder::Single);
513 g.add_bond(branch, me2, BondOrder::Single);
514 }
515 SubstituentKind::Amino => {
516 let nidx = g.add_atom(Element::N);
517 g.add_bond(cidx, nidx, BondOrder::Single);
518 }
519 SubstituentKind::Mercapto => {
520 let sidx = g.add_atom(Element::S);
521 g.add_bond(cidx, sidx, BondOrder::Single);
522 }
523 SubstituentKind::Cyano => {
524 let cbranch = g.add_atom(Element::C);
526 let nidx = g.add_atom(Element::N);
527 g.add_bond(cidx, cbranch, BondOrder::Single);
528 g.add_bond(cbranch, nidx, BondOrder::Triple);
529 }
530 SubstituentKind::Acetyl => {
531 let carbonyl = g.add_atom(Element::C);
533 let methyl = g.add_atom(Element::C);
534 let o = g.add_atom(Element::O);
535 g.add_bond(cidx, carbonyl, BondOrder::Single);
536 g.add_bond(carbonyl, methyl, BondOrder::Single);
537 g.add_bond(carbonyl, o, BondOrder::Double);
538 }
539 SubstituentKind::Formyl => {
540 let carbonyl = g.add_atom(Element::C);
542 let o = g.add_atom(Element::O);
543 g.add_bond(cidx, carbonyl, BondOrder::Single);
544 g.add_bond(carbonyl, o, BondOrder::Double);
545 }
546 }
547 }
548 Ok(())
549}
550
551fn validate_bond_locant(ci: usize, len: usize, tag: &str, name: &str) -> Result<(), ResolveError> {
552 if ci + 1 >= len {
553 Err(ResolveError::ParseError {
554 pos: 0,
555 msg: format!("{tag} locant {ci} out of range for {len}-carbon chain in {name:?}"),
556 })
557 } else {
558 Ok(())
559 }
560}
561
562fn validate_atom_locant(ci: usize, len: usize, tag: &str, name: &str) -> Result<(), ResolveError> {
563 if ci >= len {
564 Err(ResolveError::ParseError {
565 pos: 0,
566 msg: format!("{tag} locant {ci} out of range for {len}-carbon chain in {name:?}"),
567 })
568 } else {
569 Ok(())
570 }
571}
572
573fn upgrade_bond(g: &mut MolGraph, a: usize, b: usize, new_order: BondOrder) {
574 for bond in &mut g.bonds[a] {
575 if bond.to == b {
576 bond.order = new_order.clone();
577 }
578 }
579 for bond in &mut g.bonds[b] {
580 if bond.to == a {
581 bond.order = new_order.clone();
582 }
583 }
584}
585
586#[cfg(test)]
587mod tests {
588 use super::*;
589 use crate::parser::smiles::to_smiles;
590
591 fn smiles(name: &str) -> String {
592 to_smiles(&parse_iupac(name).unwrap_or_else(|e| panic!("{name}: {e}")))
593 }
594
595 #[test]
596 fn methane() {
597 assert_eq!(smiles("methane"), "C");
598 }
599
600 #[test]
601 fn ethane() {
602 assert_eq!(smiles("ethane"), "CC");
603 }
604
605 #[test]
606 fn propane() {
607 assert_eq!(smiles("propane"), "CCC");
608 }
609
610 #[test]
611 fn ethanol() {
612 assert_eq!(smiles("ethanol"), "CCO");
614 }
615
616 #[test]
617 fn propan_2_one() {
618 assert_eq!(smiles("propan-2-one"), "CC(=O)C");
620 }
621
622 #[test]
623 fn but_2_yne() {
624 assert_eq!(smiles("but-2-yne"), "CC#CC");
625 }
626
627 #[test]
628 fn two_four_pentanedione() {
629 assert_eq!(smiles("2,4-pentanedione"), "CC(=O)CC(=O)C");
630 }
631}