1use chematic_core::{
4 Atom, AtomIdx, BondIdx, BondOrder, CipCode, Element, Molecule, MoleculeBuilder,
5};
6use std::collections::HashMap;
7
8#[derive(Debug, Clone, PartialEq, Eq)]
10pub enum InchiParseError {
11 InvalidFormat,
13 InvalidFormula,
15 InvalidConnectivity,
17 InvalidHydrogen,
19 Unsupported(String),
21}
22
23impl core::fmt::Display for InchiParseError {
24 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
25 match self {
26 Self::InvalidFormat => write!(f, "invalid InChI format"),
27 Self::InvalidFormula => write!(f, "invalid formula layer"),
28 Self::InvalidConnectivity => write!(f, "invalid connectivity layer"),
29 Self::InvalidHydrogen => write!(f, "invalid hydrogen layer"),
30 Self::Unsupported(msg) => write!(f, "unsupported InChI feature: {msg}"),
31 }
32 }
33}
34
35impl std::error::Error for InchiParseError {}
36
37pub fn parse_inchi(inchi_str: &str) -> Result<Molecule, InchiParseError> {
59 let content = if let Some(pos) = inchi_str.find("/") {
61 &inchi_str[pos + 1..] } else {
63 return Err(InchiParseError::InvalidFormat);
64 };
65
66 let parts: Vec<&str> = content.split('/').collect();
67 if parts.is_empty() {
68 return Err(InchiParseError::InvalidFormat);
69 }
70
71 let element_counts = parse_formula(parts[0])?;
73
74 let mut builder = MoleculeBuilder::new();
76 let mut atom_idx_map: HashMap<usize, AtomIdx> = HashMap::new();
77
78 let mut atom_num = 0;
80 for (element, count) in &element_counts {
81 if element.atomic_number() == 1 {
83 continue;
84 }
85 for _ in 0..*count {
86 let atom = Atom::new(*element);
87 let idx = builder.add_atom(atom);
88 atom_num += 1;
89 atom_idx_map.insert(atom_num, idx);
90 }
91 }
92
93 let mut connectivity_str = "";
95 for part in parts.iter().skip(1) {
96 if let Some(layer) = part.strip_prefix('c') {
97 connectivity_str = layer;
98 break;
99 }
100 }
101
102 if !connectivity_str.is_empty() {
103 parse_connectivity(connectivity_str, &atom_idx_map, &mut builder)?;
104 }
105
106 let mut h_counts: HashMap<usize, u8> = HashMap::new();
108 for part in parts.iter().skip(1) {
109 if let Some(hydrogen_str) = part.strip_prefix('h') {
110 h_counts = parse_hydrogen_layer_to_map(hydrogen_str)?;
111 break;
112 }
113 }
114
115 let mut charges: HashMap<usize, i8> = HashMap::new();
117 for part in parts.iter().skip(1) {
118 if let Some(charge_str) = part.strip_prefix('q') {
119 charges = parse_charge_layer(charge_str)?;
120 break;
121 }
122 }
123
124 let mut isotopes: HashMap<usize, u8> = HashMap::new();
126 for part in parts.iter().skip(1) {
127 if let Some(isotope_str) = part.strip_prefix('i') {
128 isotopes = parse_isotope_layer(isotope_str)?;
129 break;
130 }
131 }
132
133 let mut ez_stereo: HashMap<(usize, usize), char> = HashMap::new();
135 for part in parts.iter().skip(1) {
136 if let Some(b_str) = part.strip_prefix('b') {
137 ez_stereo = parse_ez_stereo_layer(b_str)?;
138 break;
139 }
140 }
141
142 let mut tet_stereo: HashMap<usize, char> = HashMap::new();
144 for part in parts.iter().skip(1) {
145 if let Some(t_str) = part.strip_prefix('t') {
146 tet_stereo = parse_tetrahedral_stereo_layer(t_str)?;
147 break;
148 }
149 }
150
151 for part in parts.iter().skip(1) {
153 if let Some(m_str) = part.strip_prefix('m') {
154 let _ = parse_relative_stereo_layer(m_str)?;
155 break;
156 }
157 }
158
159 for part in parts.iter().skip(1) {
161 if let Some(s_str) = part.strip_prefix('s') {
162 let _ = parse_stereo_type_layer(s_str)?;
163 break;
164 }
165 }
166
167 let mut mol = builder.build();
169
170 if !h_counts.is_empty() {
172 mol = apply_hydrogen_counts(mol, &atom_idx_map, &h_counts);
173 }
174
175 if !charges.is_empty() {
177 mol = apply_charges(mol, &atom_idx_map, &charges);
178 }
179
180 if !isotopes.is_empty() {
182 mol = apply_isotopes(mol, &atom_idx_map, &isotopes);
183 }
184
185 if !ez_stereo.is_empty() {
187 mol = apply_ez_stereo(mol, &atom_idx_map, &ez_stereo);
188 }
189
190 if !tet_stereo.is_empty() {
192 mol = apply_tetrahedral_stereo(mol, &atom_idx_map, &tet_stereo);
193 }
194
195 Ok(mol)
196}
197
198fn parse_formula(formula_str: &str) -> Result<Vec<(Element, usize)>, InchiParseError> {
201 let mut elements = Vec::new();
202 let mut chars = formula_str.chars().peekable();
203
204 while let Some(ch) = chars.next() {
205 if !ch.is_uppercase() {
206 return Err(InchiParseError::InvalidFormula);
207 }
208
209 let mut elem_sym = ch.to_string();
210 while let Some(&next_ch) = chars.peek() {
211 if next_ch.is_lowercase() {
212 elem_sym.push(chars.next().unwrap());
213 } else {
214 break;
215 }
216 }
217
218 let element = Element::from_symbol(&elem_sym).ok_or(InchiParseError::InvalidFormula)?;
219
220 let mut count_str = String::new();
222 while let Some(&next_ch) = chars.peek() {
223 if next_ch.is_numeric() {
224 count_str.push(chars.next().unwrap());
225 } else {
226 break;
227 }
228 }
229
230 let count = if count_str.is_empty() {
231 1
232 } else {
233 count_str
234 .parse::<usize>()
235 .map_err(|_| InchiParseError::InvalidFormula)?
236 };
237
238 elements.push((element, count));
239 }
240
241 if elements.is_empty() {
242 return Err(InchiParseError::InvalidFormula);
243 }
244
245 Ok(elements)
246}
247
248fn parse_connectivity(
251 conn_str: &str,
252 atom_idx_map: &HashMap<usize, AtomIdx>,
253 builder: &mut MoleculeBuilder,
254) -> Result<(), InchiParseError> {
255 let mut current_atom: usize = 1;
259 let mut branch_stack: Vec<usize> = Vec::new();
260 let mut chars = conn_str.chars().peekable();
261
262 fn read_num<I: Iterator<Item = char>>(chars: &mut std::iter::Peekable<I>) -> Option<usize> {
265 let mut s = String::new();
266 while chars.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
267 s.push(chars.next().unwrap());
268 }
269 s.parse().ok()
270 }
271
272 if let Some(n) = read_num(&mut chars) {
274 current_atom = n;
275 }
276
277 while let Some(ch) = chars.next() {
278 match ch {
279 '-' | '=' | '#' => {
280 let order = match ch {
281 '=' => BondOrder::Double,
282 '#' => BondOrder::Triple,
283 _ => BondOrder::Single,
284 };
285 if let Some(next_atom) = read_num(&mut chars) {
286 if let (Some(&a_idx), Some(&b_idx)) = (
287 atom_idx_map.get(¤t_atom),
288 atom_idx_map.get(&next_atom),
289 ) {
290 let _ = builder.add_bond(a_idx, b_idx, order);
291 current_atom = next_atom;
292 } else {
293 return Err(InchiParseError::InvalidConnectivity);
294 }
295 }
296 }
297 ',' | ';' => {
298 if let Some(n) = read_num(&mut chars) {
300 current_atom = n;
301 }
302 }
303 '(' => {
304 branch_stack.push(current_atom);
306 }
307 ')' => {
308 if let Some(saved) = branch_stack.pop() {
310 current_atom = saved;
311 }
312 }
313 c if c.is_ascii_digit() => {
314 let mut s = String::from(c);
318 while chars.peek().map(|ch| ch.is_ascii_digit()).unwrap_or(false) {
319 s.push(chars.next().unwrap());
320 }
321 if let Ok(next_atom) = s.parse::<usize>() {
322 if let (Some(&a_idx), Some(&b_idx)) = (
323 atom_idx_map.get(¤t_atom),
324 atom_idx_map.get(&next_atom),
325 ) {
326 let _ = builder.add_bond(a_idx, b_idx, BondOrder::Single);
327 current_atom = next_atom;
328 } else {
329 return Err(InchiParseError::InvalidConnectivity);
330 }
331 }
332 }
333 _ => {} }
335 }
336
337 Ok(())
338}
339
340fn parse_hydrogen_layer_to_map(h_str: &str) -> Result<HashMap<usize, u8>, InchiParseError> {
345 let mut h_counts: HashMap<usize, u8> = HashMap::new();
346
347 if h_str.is_empty() {
348 return Ok(h_counts);
349 }
350
351 for group in h_str.split(',') {
353 let group = group.trim();
354 if group.is_empty() {
355 continue;
356 }
357
358 let parts: Vec<&str> = group.split('H').collect();
360 if parts.len() != 2 {
361 return Err(InchiParseError::InvalidHydrogen);
362 }
363
364 let atom_spec = parts[0]; let h_count_str = parts[1]; let h_count: u8 = if h_count_str.is_empty() {
367 1 } else {
369 h_count_str
370 .parse::<u8>()
371 .map_err(|_| InchiParseError::InvalidHydrogen)?
372 };
373
374 if let Some(dash_pos) = atom_spec.find('-') {
376 let start_str = &atom_spec[..dash_pos];
378 let end_str = &atom_spec[dash_pos + 1..];
379 let start: usize = start_str
380 .parse::<usize>()
381 .map_err(|_| InchiParseError::InvalidHydrogen)?;
382 let end: usize = end_str
383 .parse::<usize>()
384 .map_err(|_| InchiParseError::InvalidHydrogen)?;
385
386 for atom_num in start..=end {
387 h_counts.insert(atom_num, h_count);
388 }
389 } else {
390 let atom_num: usize = atom_spec
392 .parse::<usize>()
393 .map_err(|_| InchiParseError::InvalidHydrogen)?;
394 h_counts.insert(atom_num, h_count);
395 }
396 }
397
398 Ok(h_counts)
399}
400
401fn apply_hydrogen_counts(
403 mol: Molecule,
404 atom_idx_map: &HashMap<usize, AtomIdx>,
405 h_counts: &HashMap<usize, u8>,
406) -> Molecule {
407 let mut builder = MoleculeBuilder::new();
408
409 for i in 0..mol.atom_count() {
411 let idx = AtomIdx(i as u32);
412 let mut atom = mol.atom(idx).clone();
413
414 for (&atom_num, &atom_idx_in_map) in atom_idx_map {
416 if atom_idx_in_map == idx {
417 if let Some(&h_count) = h_counts.get(&atom_num) {
418 atom.hydrogen_count = Some(h_count);
419 }
420 break;
421 }
422 }
423
424 builder.add_atom(atom);
425 }
426
427 for i in 0..mol.bond_count() {
429 let bond = mol.bond(BondIdx(i as u32));
430 builder.add_bond(bond.atom1, bond.atom2, bond.order).ok();
431 }
432
433 builder.build()
434}
435
436fn parse_charge_layer(q_str: &str) -> Result<HashMap<usize, i8>, InchiParseError> {
439 let mut charges: HashMap<usize, i8> = HashMap::new();
440
441 if q_str.is_empty() {
443 return Ok(charges);
444 }
445
446 for charge_spec in q_str.split(',') {
448 if charge_spec.is_empty() {
449 continue;
450 }
451
452 let (atom_str, charge_val) = if let Some(plus_pos) = charge_spec.find('+') {
454 let atom_part = &charge_spec[..plus_pos];
455 let charge_part = &charge_spec[plus_pos + 1..];
456 let charge: i8 = charge_part
457 .parse::<i8>()
458 .map_err(|_| InchiParseError::Unsupported("invalid charge value".to_string()))?;
459 (atom_part, charge)
460 } else if let Some(minus_pos) = charge_spec.rfind('-') {
461 let atom_part = &charge_spec[..minus_pos];
463 let charge_part = &charge_spec[minus_pos + 1..];
464 let charge: i8 = charge_part
465 .parse::<i8>()
466 .map_err(|_| InchiParseError::Unsupported("invalid charge value".to_string()))?;
467 (atom_part, -charge)
468 } else {
469 continue; };
471
472 if atom_str.contains('-') && atom_str.matches('-').count() == 1 {
474 let parts: Vec<&str> = atom_str.split('-').collect();
476 if parts.len() == 2 {
477 let start: usize = parts[0]
478 .parse::<usize>()
479 .map_err(|_| InchiParseError::Unsupported("invalid atom range".to_string()))?;
480 let end: usize = parts[1]
481 .parse::<usize>()
482 .map_err(|_| InchiParseError::Unsupported("invalid atom range".to_string()))?;
483
484 for atom_num in start..=end {
485 charges.insert(atom_num, charge_val);
486 }
487 }
488 } else {
489 let atom_num: usize = atom_str
491 .parse::<usize>()
492 .map_err(|_| InchiParseError::Unsupported("invalid atom number".to_string()))?;
493 charges.insert(atom_num, charge_val);
494 }
495 }
496
497 Ok(charges)
498}
499
500fn parse_isotope_layer(i_str: &str) -> Result<HashMap<usize, u8>, InchiParseError> {
504 let mut isotopes: HashMap<usize, u8> = HashMap::new();
505
506 if i_str.is_empty() {
508 return Ok(isotopes);
509 }
510
511 for spec in i_str.split(',') {
513 if spec.is_empty() {
514 continue;
515 }
516
517 let parts: Vec<&str> = spec.split('/').collect();
519 if parts.len() >= 2 {
520 let atom_num: usize = parts[0].parse::<usize>().map_err(|_| {
522 InchiParseError::Unsupported("invalid atom number in isotope layer".to_string())
523 })?;
524
525 let isotope_spec = parts[1];
527 let mut mass_str = String::new();
528
529 for ch in isotope_spec.chars() {
530 if ch.is_numeric() {
531 mass_str.push(ch);
532 }
533 }
534
535 if !mass_str.is_empty() {
536 let mass: u8 = mass_str.parse::<u8>().map_err(|_| {
537 InchiParseError::Unsupported("invalid isotope mass".to_string())
538 })?;
539 isotopes.insert(atom_num, mass);
540 }
541 }
542 }
543
544 Ok(isotopes)
545}
546
547fn apply_charges(
549 mol: Molecule,
550 atom_idx_map: &HashMap<usize, AtomIdx>,
551 charges: &HashMap<usize, i8>,
552) -> Molecule {
553 let mut builder = MoleculeBuilder::new();
554
555 for i in 0..mol.atom_count() {
557 let idx = AtomIdx(i as u32);
558 let mut atom = mol.atom(idx).clone();
559
560 for (&atom_num, &atom_idx_in_map) in atom_idx_map {
562 if atom_idx_in_map == idx {
563 if let Some(&charge) = charges.get(&atom_num) {
564 atom.charge = charge;
565 }
566 break;
567 }
568 }
569
570 builder.add_atom(atom);
571 }
572
573 for i in 0..mol.bond_count() {
575 let bond = mol.bond(BondIdx(i as u32));
576 builder.add_bond(bond.atom1, bond.atom2, bond.order).ok();
577 }
578
579 builder.build()
580}
581
582fn apply_isotopes(
584 mol: Molecule,
585 atom_idx_map: &HashMap<usize, AtomIdx>,
586 isotopes: &HashMap<usize, u8>,
587) -> Molecule {
588 let mut builder = MoleculeBuilder::new();
589
590 for i in 0..mol.atom_count() {
592 let idx = AtomIdx(i as u32);
593 let mut atom = mol.atom(idx).clone();
594
595 for (&atom_num, &atom_idx_in_map) in atom_idx_map {
597 if atom_idx_in_map == idx {
598 if let Some(&mass) = isotopes.get(&atom_num) {
599 atom.isotope = Some(mass as u16);
600 }
601 break;
602 }
603 }
604
605 builder.add_atom(atom);
606 }
607
608 for i in 0..mol.bond_count() {
610 let bond = mol.bond(BondIdx(i as u32));
611 builder.add_bond(bond.atom1, bond.atom2, bond.order).ok();
612 }
613
614 builder.build()
615}
616
617fn parse_ez_stereo_layer(b_str: &str) -> Result<HashMap<(usize, usize), char>, InchiParseError> {
621 let mut stereo: HashMap<(usize, usize), char> = HashMap::new();
622
623 if b_str.is_empty() {
624 return Ok(stereo);
625 }
626
627 for spec in b_str.split(',') {
628 if spec.is_empty() {
629 continue;
630 }
631
632 if let Some(pos) = spec.rfind('+') {
634 let nums_part = &spec[..pos];
635 if let Ok((a1, a2)) = parse_bond_spec(nums_part) {
636 stereo.insert(if a1 < a2 { (a1, a2) } else { (a2, a1) }, '+');
637 }
638 } else if let Some(pos) = spec.rfind('-') {
639 let nums_part = &spec[..pos];
640 if let Ok((a1, a2)) = parse_bond_spec(nums_part) {
641 stereo.insert(if a1 < a2 { (a1, a2) } else { (a2, a1) }, '-');
642 }
643 }
644 }
645
646 Ok(stereo)
647}
648
649fn parse_tetrahedral_stereo_layer(t_str: &str) -> Result<HashMap<usize, char>, InchiParseError> {
653 let mut stereo: HashMap<usize, char> = HashMap::new();
654
655 if t_str.is_empty() {
656 return Ok(stereo);
657 }
658
659 for spec in t_str.split(',') {
660 if spec.is_empty() {
661 continue;
662 }
663
664 if let Some(pos) = spec.rfind('+') {
666 let atom_part = &spec[..pos];
667 let atom_num: usize = atom_part.parse::<usize>().map_err(|_| {
668 InchiParseError::Unsupported("invalid atom number in stereo layer".to_string())
669 })?;
670 stereo.insert(atom_num, '+');
671 } else if let Some(pos) = spec.rfind('-') {
672 let atom_part = &spec[..pos];
673 let atom_num: usize = atom_part.parse::<usize>().map_err(|_| {
674 InchiParseError::Unsupported("invalid atom number in stereo layer".to_string())
675 })?;
676 stereo.insert(atom_num, '-');
677 }
678 }
679
680 Ok(stereo)
681}
682
683fn parse_bond_spec(spec: &str) -> Result<(usize, usize), InchiParseError> {
685 let parts: Vec<&str> = spec.split('-').collect();
686 if parts.len() != 2 {
687 return Err(InchiParseError::Unsupported(
688 "invalid bond spec".to_string(),
689 ));
690 }
691
692 let a1: usize = parts[0]
693 .parse::<usize>()
694 .map_err(|_| InchiParseError::Unsupported("invalid atom in bond spec".to_string()))?;
695 let a2: usize = parts[1]
696 .parse::<usize>()
697 .map_err(|_| InchiParseError::Unsupported("invalid atom in bond spec".to_string()))?;
698
699 Ok((a1, a2))
700}
701
702fn apply_ez_stereo(
708 mol: Molecule,
709 atom_idx_map: &HashMap<usize, AtomIdx>,
710 stereo: &HashMap<(usize, usize), char>,
711) -> Molecule {
712 if stereo.is_empty() {
713 return mol;
714 }
715
716 let mut builder = MoleculeBuilder::new();
717 let mut atom_map = HashMap::new();
718
719 for (old_idx, atom) in mol.atoms() {
720 let mut a = atom.clone();
721
722 for (&(n1, _n2), &parity) in stereo.iter() {
725 if let Some(&idx1) = atom_idx_map.get(&n1)
726 && idx1 == old_idx
727 {
728 a.cip_code = Some(match parity {
729 '+' => CipCode::Z,
730 '-' => CipCode::E,
731 _ => continue,
732 });
733 break;
734 }
735 }
736
737 let new_idx = builder.add_atom(a);
738 atom_map.insert(old_idx, new_idx);
739 }
740
741 for (_, bond) in mol.bonds() {
742 let _ = builder.add_bond(atom_map[&bond.atom1], atom_map[&bond.atom2], bond.order);
743 }
744
745 builder.build()
746}
747
748fn apply_tetrahedral_stereo(
752 mol: Molecule,
753 atom_idx_map: &HashMap<usize, AtomIdx>,
754 stereo: &HashMap<usize, char>,
755) -> Molecule {
756 if stereo.is_empty() {
757 return mol;
758 }
759
760 let mut builder = MoleculeBuilder::new();
761 let mut atom_map = HashMap::new();
762
763 for (old_idx, atom) in mol.atoms() {
764 let mut a = atom.clone();
765
766 for (&inchi_num, &parity) in stereo.iter() {
769 if let Some(&idx) = atom_idx_map.get(&inchi_num)
770 && idx == old_idx
771 {
772 a.cip_code = Some(match parity {
773 '+' => CipCode::R,
774 '-' => CipCode::S,
775 _ => continue,
776 });
777 break;
778 }
779 }
780
781 let new_idx = builder.add_atom(a);
782 atom_map.insert(old_idx, new_idx);
783 }
784
785 for (_, bond) in mol.bonds() {
786 let _ = builder.add_bond(atom_map[&bond.atom1], atom_map[&bond.atom2], bond.order);
787 }
788
789 builder.build()
790}
791
792fn parse_relative_stereo_layer(m_str: &str) -> Result<HashMap<usize, String>, InchiParseError> {
796 let mut parity_map = HashMap::new();
797
798 if m_str.is_empty() {
799 return Ok(parity_map);
800 }
801
802 let entries: Vec<&str> = m_str.split(',').collect();
804 for (idx, entry) in entries.iter().enumerate() {
805 if !entry.is_empty() {
806 parity_map.insert(idx + 1, entry.to_string());
807 }
808 }
809
810 Ok(parity_map)
811}
812
813fn parse_stereo_type_layer(s_str: &str) -> Result<String, InchiParseError> {
817 Ok(s_str.to_string())
819}
820
821#[cfg(test)]
822mod tests {
823 use super::*;
824
825 #[test]
826 fn test_parse_formula_methane() {
827 let result = parse_formula("CH4");
828 assert!(result.is_ok());
829 let elements = result.unwrap();
830 assert_eq!(elements.len(), 2);
831 }
832
833 #[test]
834 fn test_parse_formula_ethane() {
835 let result = parse_formula("C2H6");
836 assert!(result.is_ok());
837 let elements = result.unwrap();
838 assert_eq!(
839 elements
840 .iter()
841 .find(|(e, _)| e.atomic_number() == 6)
842 .map(|(_, c)| c),
843 Some(&2)
844 );
845 }
846
847 #[test]
848 fn test_parse_formula_benzene() {
849 let result = parse_formula("C6H6");
850 assert!(result.is_ok());
851 let elements = result.unwrap();
852 assert_eq!(elements.len(), 2);
853 }
854
855 #[test]
856 fn test_parse_formula_invalid() {
857 let result = parse_formula("invalid");
858 assert!(result.is_err());
859 }
860
861 #[test]
862 fn test_parse_inchi_methane() {
863 let result = parse_inchi("InChI=1S/CH4/h1H4");
864 assert!(result.is_ok());
865 let mol = result.unwrap();
866 assert_eq!(mol.atom_count(), 1, "methane should have 1 heavy atom (C)");
867 }
868
869 #[test]
870 fn test_parse_inchi_ethane() {
871 let result = parse_inchi("InChI=1S/C2H6/c1-2/h1-2H3");
872 assert!(result.is_ok());
873 let mol = result.unwrap();
874 assert_eq!(mol.atom_count(), 2, "ethane should have 2 heavy atoms");
875 }
876
877 #[test]
878 fn test_parse_inchi_benzene() {
879 let result = parse_inchi("InChI=1S/C6H6/c1-2-3-4-5-6-1/h1-6H");
880 assert!(result.is_ok());
881 let mol = result.unwrap();
882 assert_eq!(mol.atom_count(), 6, "benzene should have 6 heavy atoms");
883 }
884
885 #[test]
886 fn test_parse_inchi_invalid_format() {
887 let result = parse_inchi("InvalidInChI");
888 assert!(result.is_err());
889 }
890
891 #[test]
892 fn test_parse_inchi_with_ez_stereo() {
893 let result = parse_inchi("InChI=1S/C4H8/c1-3-4-2/h3-4H,1-2H3/b4-3-");
895 assert!(result.is_ok(), "should parse InChI with /b layer");
896 if let Ok(mol) = result {
897 assert!(mol.atom_count() > 0);
898 }
899 }
900
901 #[test]
902 fn test_parse_hydrogen_layer_single_atom() {
903 let h_map = parse_hydrogen_layer_to_map("1H4").unwrap();
904 assert_eq!(h_map.get(&1), Some(&4), "atom 1 should have 4 H");
905 }
906
907 #[test]
908 fn test_parse_hydrogen_layer_range() {
909 let h_map = parse_hydrogen_layer_to_map("1-6H").unwrap();
910 for i in 1..=6 {
911 assert_eq!(h_map.get(&i), Some(&1), "atoms 1-6 should each have 1 H");
912 }
913 }
914
915 #[test]
916 fn test_parse_hydrogen_layer_mixed() {
917 let h_map = parse_hydrogen_layer_to_map("1H4,2H2,3-6H").unwrap();
918 assert_eq!(h_map.get(&1), Some(&4));
919 assert_eq!(h_map.get(&2), Some(&2));
920 assert_eq!(h_map.get(&3), Some(&1));
921 assert_eq!(h_map.get(&6), Some(&1));
922 }
923
924 #[test]
925 fn test_parse_inchi_ethanol_with_hydrogen_layer() {
926 let result = parse_inchi("InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3");
928 assert!(result.is_ok());
929 let mol = result.unwrap();
930 assert_eq!(
931 mol.atom_count(),
932 3,
933 "ethanol should have 3 heavy atoms (C, C, O)"
934 );
935
936 let has_h_count = mol.atoms().any(|(_, atom)| atom.hydrogen_count.is_some());
938 assert!(
939 has_h_count,
940 "at least one atom should have explicit hydrogen_count"
941 );
942 }
943
944 #[test]
945 fn test_parse_inchi_methane_roundtrip() {
946 let result = parse_inchi("InChI=1S/CH4/h1H4");
948 assert!(result.is_ok());
949 let mol = result.unwrap();
950 assert_eq!(mol.atom_count(), 1, "methane should have 1 heavy atom (C)");
951
952 let carbon = mol.atom(AtomIdx(0));
954 assert_eq!(carbon.element.atomic_number(), 6, "should be carbon");
955 assert_eq!(carbon.hydrogen_count, Some(4), "carbon should have 4 H");
956 }
957
958 #[test]
959 fn test_parse_charge_layer_single_positive() {
960 let charges = parse_charge_layer("1+1").unwrap();
961 assert_eq!(charges.get(&1), Some(&1), "atom 1 should have charge +1");
962 }
963
964 #[test]
965 fn test_parse_charge_layer_single_negative() {
966 let charges = parse_charge_layer("2-1").unwrap();
967 assert_eq!(charges.get(&2), Some(&-1), "atom 2 should have charge -1");
968 }
969
970 #[test]
971 fn test_parse_charge_layer_multiple() {
972 let charges = parse_charge_layer("1+1,2-1,3+2").unwrap();
973 assert_eq!(charges.get(&1), Some(&1), "atom 1 should have charge +1");
974 assert_eq!(charges.get(&2), Some(&-1), "atom 2 should have charge -1");
975 assert_eq!(charges.get(&3), Some(&2), "atom 3 should have charge +2");
976 }
977
978 #[test]
979 fn test_parse_isotope_layer_single() {
980 let isotopes = parse_isotope_layer("2/13C").unwrap();
981 assert_eq!(isotopes.get(&2), Some(&13), "atom 2 should be C-13");
982 }
983
984 #[test]
985 fn test_parse_isotope_layer_multiple() {
986 let isotopes = parse_isotope_layer("1/2H,2/13C").unwrap();
987 assert_eq!(
988 isotopes.get(&1),
989 Some(&2),
990 "atom 1 should be H-2 (deuterium)"
991 );
992 assert_eq!(isotopes.get(&2), Some(&13), "atom 2 should be C-13");
993 }
994
995 #[test]
996 fn test_parse_inchi_with_charge_layer() {
997 let charges = parse_charge_layer("1+1").unwrap();
1003 assert_eq!(charges.get(&1), Some(&1), "atom 1 should have charge +1");
1004
1005 }
1008
1009 #[test]
1010 fn test_parse_inchi_with_isotope_layer() {
1011 let result = parse_inchi("InChI=1S/C2H6/c1-2/h1-2H3/i/2H");
1014 assert!(result.is_ok() || result.is_err()); }
1016
1017 #[test]
1018 fn test_empty_charge_layer() {
1019 let charges = parse_charge_layer("").unwrap();
1020 assert!(
1021 charges.is_empty(),
1022 "empty charge layer should yield no charges"
1023 );
1024 }
1025
1026 #[test]
1027 fn test_empty_isotope_layer() {
1028 let isotopes = parse_isotope_layer("").unwrap();
1029 assert!(
1030 isotopes.is_empty(),
1031 "empty isotope layer should yield no isotopes"
1032 );
1033 }
1034
1035 #[test]
1036 fn test_parse_ez_stereo_layer_single() {
1037 let stereo = parse_ez_stereo_layer("2-3+").unwrap();
1038 assert_eq!(stereo.len(), 1);
1039 assert_eq!(stereo.get(&(2, 3)), Some(&'+'));
1040 }
1041
1042 #[test]
1043 fn test_parse_ez_stereo_layer_multiple() {
1044 let stereo = parse_ez_stereo_layer("2-3+,5-6-").unwrap();
1045 assert_eq!(stereo.len(), 2);
1046 assert_eq!(stereo.get(&(2, 3)), Some(&'+'));
1047 assert_eq!(stereo.get(&(5, 6)), Some(&'-'));
1048 }
1049
1050 #[test]
1051 fn test_parse_ez_stereo_layer_empty() {
1052 let stereo = parse_ez_stereo_layer("").unwrap();
1053 assert!(stereo.is_empty());
1054 }
1055
1056 #[test]
1057 fn test_parse_tetrahedral_stereo_layer_single() {
1058 let stereo = parse_tetrahedral_stereo_layer("1-").unwrap();
1059 assert_eq!(stereo.len(), 1);
1060 assert_eq!(stereo.get(&1), Some(&'-'));
1061 }
1062
1063 #[test]
1064 fn test_parse_tetrahedral_stereo_layer_multiple() {
1065 let stereo = parse_tetrahedral_stereo_layer("1-,2+,3-").unwrap();
1066 assert_eq!(stereo.len(), 3);
1067 assert_eq!(stereo.get(&1), Some(&'-'));
1068 assert_eq!(stereo.get(&2), Some(&'+'));
1069 assert_eq!(stereo.get(&3), Some(&'-'));
1070 }
1071
1072 #[test]
1073 fn test_parse_tetrahedral_stereo_layer_empty() {
1074 let stereo = parse_tetrahedral_stereo_layer("").unwrap();
1075 assert!(stereo.is_empty());
1076 }
1077
1078 #[test]
1079 fn test_parse_inchi_with_tetrahedral_stereo() {
1080 let result = parse_inchi("InChI=1S/C2H4O2/c1-2(3)4/h2H,1H3/t2-");
1083 assert!(result.is_ok(), "should parse InChI with /t layer");
1085 if let Ok(mol) = result {
1086 assert!(mol.atom_count() > 0);
1087 }
1088 }
1089
1090 #[test]
1091 fn test_parse_bond_spec() {
1092 let (a1, a2) = parse_bond_spec("2-3").unwrap();
1093 assert_eq!(a1, 2);
1094 assert_eq!(a2, 3);
1095 }
1096
1097 #[test]
1098 fn test_parse_bond_spec_large_numbers() {
1099 let (a1, a2) = parse_bond_spec("12-15").unwrap();
1100 assert_eq!(a1, 12);
1101 assert_eq!(a2, 15);
1102 }
1103
1104 #[test]
1105 fn test_parse_relative_stereo_layer_single() {
1106 let parity = parse_relative_stereo_layer("1").unwrap();
1107 assert_eq!(parity.len(), 1);
1108 assert_eq!(parity.get(&1), Some(&"1".to_string()));
1109 }
1110
1111 #[test]
1112 fn test_parse_relative_stereo_layer_multiple() {
1113 let parity = parse_relative_stereo_layer("1,2").unwrap();
1114 assert_eq!(parity.len(), 2);
1115 assert_eq!(parity.get(&1), Some(&"1".to_string()));
1116 assert_eq!(parity.get(&2), Some(&"2".to_string()));
1117 }
1118
1119 #[test]
1120 fn test_parse_relative_stereo_layer_empty() {
1121 let parity = parse_relative_stereo_layer("").unwrap();
1122 assert!(parity.is_empty());
1123 }
1124
1125 #[test]
1126 fn test_parse_stereo_type_layer_obsolete() {
1127 let stereo_type = parse_stereo_type_layer("obsolete").unwrap();
1128 assert_eq!(stereo_type, "obsolete");
1129 }
1130
1131 #[test]
1132 fn test_parse_stereo_type_layer_new() {
1133 let stereo_type = parse_stereo_type_layer("new").unwrap();
1134 assert_eq!(stereo_type, "new");
1135 }
1136
1137 #[test]
1138 fn test_parse_inchi_with_relative_stereo() {
1139 let result = parse_inchi("InChI=1S/C4H10/c1-3-4-2/h3-4H,1-2H3/m0");
1141 assert!(result.is_ok(), "should parse InChI with /m layer");
1143 if let Ok(mol) = result {
1144 assert!(mol.atom_count() > 0);
1145 }
1146 }
1147
1148 #[test]
1149 fn test_parse_inchi_with_stereo_type() {
1150 let result = parse_inchi("InChI=1S/C2H6/c1-2/h1-2H3/s1");
1152 assert!(result.is_ok(), "should parse InChI with /s layer");
1154 if let Ok(mol) = result {
1155 assert!(mol.atom_count() > 0);
1156 }
1157 }
1158
1159 #[test]
1160 fn test_tetrahedral_stereo_roundtrip_simple() {
1161 let mut builder = MoleculeBuilder::new();
1164 let a1 = builder.add_atom(Atom::new(Element::C));
1165 let a2 = builder.add_atom(Atom::new(Element::H));
1166 let a3 = builder.add_atom(Atom::new(Element::H));
1167 let a4 = builder.add_atom(Atom::new(Element::H));
1168 let a5 = builder.add_atom(Atom::new(Element::N));
1169
1170 let _ = builder.add_bond(a1, a2, BondOrder::Single);
1171 let _ = builder.add_bond(a1, a3, BondOrder::Single);
1172 let _ = builder.add_bond(a1, a4, BondOrder::Single);
1173 let _ = builder.add_bond(a1, a5, BondOrder::Single);
1174
1175 let mol = builder.build();
1176 let mut stereo_map = HashMap::new();
1177 stereo_map.insert(1, '-'); let mut atom_idx_map = HashMap::new();
1179 atom_idx_map.insert(1, a1);
1180
1181 let mol_stereo = apply_tetrahedral_stereo(mol, &atom_idx_map, &stereo_map);
1182 let found_s = mol_stereo
1183 .atoms()
1184 .any(|(_, atom)| atom.cip_code == Some(CipCode::S));
1185 assert!(found_s, "apply_tetrahedral_stereo should assign S cip_code");
1186 }
1187
1188 #[test]
1189 fn test_ez_stereo_roundtrip_simple() {
1190 let mut builder = MoleculeBuilder::new();
1192 let a1 = builder.add_atom(Atom::new(Element::C));
1193 let a2 = builder.add_atom(Atom::new(Element::C));
1194 let a3 = builder.add_atom(Atom::new(Element::H));
1195 let a4 = builder.add_atom(Atom::new(Element::N));
1196
1197 let _ = builder.add_bond(a1, a2, BondOrder::Double);
1198 let _ = builder.add_bond(a1, a3, BondOrder::Single);
1199 let _ = builder.add_bond(a2, a4, BondOrder::Single);
1200
1201 let mol = builder.build();
1202 let mut stereo_map = HashMap::new();
1203 stereo_map.insert((1, 2), '-'); let mut atom_idx_map = HashMap::new();
1205 atom_idx_map.insert(1, a1);
1206 atom_idx_map.insert(2, a2);
1207
1208 let mol_stereo = apply_ez_stereo(mol, &atom_idx_map, &stereo_map);
1209 let found_e = mol_stereo
1210 .atoms()
1211 .any(|(_, atom)| atom.cip_code == Some(CipCode::E));
1212 assert!(found_e, "apply_ez_stereo should assign E cip_code");
1213 }
1214
1215 #[test]
1218 fn test_parse_connectivity_branch_isobutane() {
1219 use chematic_core::{Atom, Element, MoleculeBuilder};
1222 use crate::parser::parse_inchi;
1223
1224 use std::collections::HashMap;
1226 use chematic_core::AtomIdx;
1227
1228 let mut builder = MoleculeBuilder::new();
1229 let a1 = builder.add_atom(Atom::new(Element::C));
1230 let a2 = builder.add_atom(Atom::new(Element::C));
1231 let a3 = builder.add_atom(Atom::new(Element::C));
1232 let a4 = builder.add_atom(Atom::new(Element::C));
1233 let mut map: HashMap<usize, AtomIdx> = HashMap::new();
1234 map.insert(1, a1);
1235 map.insert(2, a2);
1236 map.insert(3, a3);
1237 map.insert(4, a4);
1238
1239 super::parse_connectivity("1-4(2)3", &map, &mut builder).expect("isobutane /c parse");
1240 let mol = builder.build();
1241 assert_eq!(
1243 mol.bond_count(),
1244 3,
1245 "isobutane /c should yield 3 bonds, got {}",
1246 mol.bond_count()
1247 );
1248 }
1249
1250 #[test]
1251 fn test_parse_connectivity_nested_branch() {
1252 use chematic_core::{Atom, Element, MoleculeBuilder};
1254 use std::collections::HashMap;
1255 use chematic_core::AtomIdx;
1256
1257 let mut builder = MoleculeBuilder::new();
1258 let atoms: Vec<AtomIdx> = (0..5).map(|_| builder.add_atom(Atom::new(Element::C))).collect();
1259 let mut map: HashMap<usize, AtomIdx> = HashMap::new();
1260 for (i, &a) in atoms.iter().enumerate() { map.insert(i + 1, a); }
1261
1262 super::parse_connectivity("1-5(2)(3)4", &map, &mut builder).expect("neopentane /c parse");
1263 let mol = builder.build();
1264 assert_eq!(mol.bond_count(), 4, "neopentane /c should yield 4 bonds");
1265 }
1266}