use chematic_core::{
Atom, AtomIdx, BondIdx, BondOrder, CipCode, Element, Molecule, MoleculeBuilder,
};
use std::collections::HashMap;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum InchiParseError {
InvalidFormat,
InvalidFormula,
InvalidConnectivity,
InvalidHydrogen,
Unsupported(String),
}
impl core::fmt::Display for InchiParseError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Self::InvalidFormat => write!(f, "invalid InChI format"),
Self::InvalidFormula => write!(f, "invalid formula layer"),
Self::InvalidConnectivity => write!(f, "invalid connectivity layer"),
Self::InvalidHydrogen => write!(f, "invalid hydrogen layer"),
Self::Unsupported(msg) => write!(f, "unsupported InChI feature: {msg}"),
}
}
}
impl std::error::Error for InchiParseError {}
pub fn parse_inchi(inchi_str: &str) -> Result<Molecule, InchiParseError> {
let content = if let Some(pos) = inchi_str.find("/") {
&inchi_str[pos + 1..] } else {
return Err(InchiParseError::InvalidFormat);
};
let parts: Vec<&str> = content.split('/').collect();
if parts.is_empty() {
return Err(InchiParseError::InvalidFormat);
}
let element_counts = parse_formula(parts[0])?;
let mut builder = MoleculeBuilder::new();
let mut atom_idx_map: HashMap<usize, AtomIdx> = HashMap::new();
let mut atom_num = 0;
for (element, count) in &element_counts {
if element.atomic_number() == 1 {
continue;
}
for _ in 0..*count {
let atom = Atom::new(*element);
let idx = builder.add_atom(atom);
atom_num += 1;
atom_idx_map.insert(atom_num, idx);
}
}
let mut connectivity_str = "";
for part in parts.iter().skip(1) {
if let Some(layer) = part.strip_prefix('c') {
connectivity_str = layer;
break;
}
}
if !connectivity_str.is_empty() {
parse_connectivity(connectivity_str, &atom_idx_map, &mut builder)?;
}
let mut h_counts: HashMap<usize, u8> = HashMap::new();
for part in parts.iter().skip(1) {
if let Some(hydrogen_str) = part.strip_prefix('h') {
h_counts = parse_hydrogen_layer_to_map(hydrogen_str)?;
break;
}
}
let mut charges: HashMap<usize, i8> = HashMap::new();
for part in parts.iter().skip(1) {
if let Some(charge_str) = part.strip_prefix('q') {
charges = parse_charge_layer(charge_str)?;
break;
}
}
let mut isotopes: HashMap<usize, u8> = HashMap::new();
for part in parts.iter().skip(1) {
if let Some(isotope_str) = part.strip_prefix('i') {
isotopes = parse_isotope_layer(isotope_str)?;
break;
}
}
let mut ez_stereo: HashMap<(usize, usize), char> = HashMap::new();
for part in parts.iter().skip(1) {
if let Some(b_str) = part.strip_prefix('b') {
ez_stereo = parse_ez_stereo_layer(b_str)?;
break;
}
}
let mut tet_stereo: HashMap<usize, char> = HashMap::new();
for part in parts.iter().skip(1) {
if let Some(t_str) = part.strip_prefix('t') {
tet_stereo = parse_tetrahedral_stereo_layer(t_str)?;
break;
}
}
for part in parts.iter().skip(1) {
if let Some(m_str) = part.strip_prefix('m') {
let _ = parse_relative_stereo_layer(m_str)?;
break;
}
}
for part in parts.iter().skip(1) {
if let Some(s_str) = part.strip_prefix('s') {
let _ = parse_stereo_type_layer(s_str)?;
break;
}
}
let mut mol = builder.build();
if !h_counts.is_empty() {
mol = apply_hydrogen_counts(mol, &atom_idx_map, &h_counts);
}
if !charges.is_empty() {
mol = apply_charges(mol, &atom_idx_map, &charges);
}
if !isotopes.is_empty() {
mol = apply_isotopes(mol, &atom_idx_map, &isotopes);
}
if !ez_stereo.is_empty() {
mol = apply_ez_stereo(mol, &atom_idx_map, &ez_stereo);
}
if !tet_stereo.is_empty() {
mol = apply_tetrahedral_stereo(mol, &atom_idx_map, &tet_stereo);
}
Ok(mol)
}
fn parse_formula(formula_str: &str) -> Result<Vec<(Element, usize)>, InchiParseError> {
let mut elements = Vec::new();
let mut chars = formula_str.chars().peekable();
while let Some(ch) = chars.next() {
if !ch.is_uppercase() {
return Err(InchiParseError::InvalidFormula);
}
let mut elem_sym = ch.to_string();
while let Some(&next_ch) = chars.peek() {
if next_ch.is_lowercase() {
elem_sym.push(chars.next().unwrap());
} else {
break;
}
}
let element = Element::from_symbol(&elem_sym).ok_or(InchiParseError::InvalidFormula)?;
let mut count_str = String::new();
while let Some(&next_ch) = chars.peek() {
if next_ch.is_numeric() {
count_str.push(chars.next().unwrap());
} else {
break;
}
}
let count = if count_str.is_empty() {
1
} else {
count_str
.parse::<usize>()
.map_err(|_| InchiParseError::InvalidFormula)?
};
elements.push((element, count));
}
if elements.is_empty() {
return Err(InchiParseError::InvalidFormula);
}
Ok(elements)
}
fn parse_connectivity(
conn_str: &str,
atom_idx_map: &HashMap<usize, AtomIdx>,
builder: &mut MoleculeBuilder,
) -> Result<(), InchiParseError> {
let mut current_atom: usize = 1;
let mut branch_stack: Vec<usize> = Vec::new();
let mut chars = conn_str.chars().peekable();
fn read_num<I: Iterator<Item = char>>(chars: &mut std::iter::Peekable<I>) -> Option<usize> {
let mut s = String::new();
while chars.peek().map(|c| c.is_ascii_digit()).unwrap_or(false) {
s.push(chars.next().unwrap());
}
s.parse().ok()
}
if let Some(n) = read_num(&mut chars) {
current_atom = n;
}
while let Some(ch) = chars.next() {
match ch {
'-' | '=' | '#' => {
let order = match ch {
'=' => BondOrder::Double,
'#' => BondOrder::Triple,
_ => BondOrder::Single,
};
if let Some(next_atom) = read_num(&mut chars) {
if let (Some(&a_idx), Some(&b_idx)) = (
atom_idx_map.get(¤t_atom),
atom_idx_map.get(&next_atom),
) {
let _ = builder.add_bond(a_idx, b_idx, order);
current_atom = next_atom;
} else {
return Err(InchiParseError::InvalidConnectivity);
}
}
}
',' | ';' => {
if let Some(n) = read_num(&mut chars) {
current_atom = n;
}
}
'(' => {
branch_stack.push(current_atom);
}
')' => {
if let Some(saved) = branch_stack.pop() {
current_atom = saved;
}
}
c if c.is_ascii_digit() => {
let mut s = String::from(c);
while chars.peek().map(|ch| ch.is_ascii_digit()).unwrap_or(false) {
s.push(chars.next().unwrap());
}
if let Ok(next_atom) = s.parse::<usize>() {
if let (Some(&a_idx), Some(&b_idx)) = (
atom_idx_map.get(¤t_atom),
atom_idx_map.get(&next_atom),
) {
let _ = builder.add_bond(a_idx, b_idx, BondOrder::Single);
current_atom = next_atom;
} else {
return Err(InchiParseError::InvalidConnectivity);
}
}
}
_ => {} }
}
Ok(())
}
fn parse_hydrogen_layer_to_map(h_str: &str) -> Result<HashMap<usize, u8>, InchiParseError> {
let mut h_counts: HashMap<usize, u8> = HashMap::new();
if h_str.is_empty() {
return Ok(h_counts);
}
for group in h_str.split(',') {
let group = group.trim();
if group.is_empty() {
continue;
}
let parts: Vec<&str> = group.split('H').collect();
if parts.len() != 2 {
return Err(InchiParseError::InvalidHydrogen);
}
let atom_spec = parts[0]; let h_count_str = parts[1]; let h_count: u8 = if h_count_str.is_empty() {
1 } else {
h_count_str
.parse::<u8>()
.map_err(|_| InchiParseError::InvalidHydrogen)?
};
if let Some(dash_pos) = atom_spec.find('-') {
let start_str = &atom_spec[..dash_pos];
let end_str = &atom_spec[dash_pos + 1..];
let start: usize = start_str
.parse::<usize>()
.map_err(|_| InchiParseError::InvalidHydrogen)?;
let end: usize = end_str
.parse::<usize>()
.map_err(|_| InchiParseError::InvalidHydrogen)?;
for atom_num in start..=end {
h_counts.insert(atom_num, h_count);
}
} else {
let atom_num: usize = atom_spec
.parse::<usize>()
.map_err(|_| InchiParseError::InvalidHydrogen)?;
h_counts.insert(atom_num, h_count);
}
}
Ok(h_counts)
}
fn apply_hydrogen_counts(
mol: Molecule,
atom_idx_map: &HashMap<usize, AtomIdx>,
h_counts: &HashMap<usize, u8>,
) -> Molecule {
let mut builder = MoleculeBuilder::new();
for i in 0..mol.atom_count() {
let idx = AtomIdx(i as u32);
let mut atom = mol.atom(idx).clone();
for (&atom_num, &atom_idx_in_map) in atom_idx_map {
if atom_idx_in_map == idx {
if let Some(&h_count) = h_counts.get(&atom_num) {
atom.hydrogen_count = Some(h_count);
}
break;
}
}
builder.add_atom(atom);
}
for i in 0..mol.bond_count() {
let bond = mol.bond(BondIdx(i as u32));
builder.add_bond(bond.atom1, bond.atom2, bond.order).ok();
}
builder.build()
}
fn parse_charge_layer(q_str: &str) -> Result<HashMap<usize, i8>, InchiParseError> {
let mut charges: HashMap<usize, i8> = HashMap::new();
if q_str.is_empty() {
return Ok(charges);
}
for charge_spec in q_str.split(',') {
if charge_spec.is_empty() {
continue;
}
let (atom_str, charge_val) = if let Some(plus_pos) = charge_spec.find('+') {
let atom_part = &charge_spec[..plus_pos];
let charge_part = &charge_spec[plus_pos + 1..];
let charge: i8 = charge_part
.parse::<i8>()
.map_err(|_| InchiParseError::Unsupported("invalid charge value".to_string()))?;
(atom_part, charge)
} else if let Some(minus_pos) = charge_spec.rfind('-') {
let atom_part = &charge_spec[..minus_pos];
let charge_part = &charge_spec[minus_pos + 1..];
let charge: i8 = charge_part
.parse::<i8>()
.map_err(|_| InchiParseError::Unsupported("invalid charge value".to_string()))?;
(atom_part, -charge)
} else {
continue; };
if atom_str.contains('-') && atom_str.matches('-').count() == 1 {
let parts: Vec<&str> = atom_str.split('-').collect();
if parts.len() == 2 {
let start: usize = parts[0]
.parse::<usize>()
.map_err(|_| InchiParseError::Unsupported("invalid atom range".to_string()))?;
let end: usize = parts[1]
.parse::<usize>()
.map_err(|_| InchiParseError::Unsupported("invalid atom range".to_string()))?;
for atom_num in start..=end {
charges.insert(atom_num, charge_val);
}
}
} else {
let atom_num: usize = atom_str
.parse::<usize>()
.map_err(|_| InchiParseError::Unsupported("invalid atom number".to_string()))?;
charges.insert(atom_num, charge_val);
}
}
Ok(charges)
}
fn parse_isotope_layer(i_str: &str) -> Result<HashMap<usize, u8>, InchiParseError> {
let mut isotopes: HashMap<usize, u8> = HashMap::new();
if i_str.is_empty() {
return Ok(isotopes);
}
for spec in i_str.split(',') {
if spec.is_empty() {
continue;
}
let parts: Vec<&str> = spec.split('/').collect();
if parts.len() >= 2 {
let atom_num: usize = parts[0].parse::<usize>().map_err(|_| {
InchiParseError::Unsupported("invalid atom number in isotope layer".to_string())
})?;
let isotope_spec = parts[1];
let mut mass_str = String::new();
for ch in isotope_spec.chars() {
if ch.is_numeric() {
mass_str.push(ch);
}
}
if !mass_str.is_empty() {
let mass: u8 = mass_str.parse::<u8>().map_err(|_| {
InchiParseError::Unsupported("invalid isotope mass".to_string())
})?;
isotopes.insert(atom_num, mass);
}
}
}
Ok(isotopes)
}
fn apply_charges(
mol: Molecule,
atom_idx_map: &HashMap<usize, AtomIdx>,
charges: &HashMap<usize, i8>,
) -> Molecule {
let mut builder = MoleculeBuilder::new();
for i in 0..mol.atom_count() {
let idx = AtomIdx(i as u32);
let mut atom = mol.atom(idx).clone();
for (&atom_num, &atom_idx_in_map) in atom_idx_map {
if atom_idx_in_map == idx {
if let Some(&charge) = charges.get(&atom_num) {
atom.charge = charge;
}
break;
}
}
builder.add_atom(atom);
}
for i in 0..mol.bond_count() {
let bond = mol.bond(BondIdx(i as u32));
builder.add_bond(bond.atom1, bond.atom2, bond.order).ok();
}
builder.build()
}
fn apply_isotopes(
mol: Molecule,
atom_idx_map: &HashMap<usize, AtomIdx>,
isotopes: &HashMap<usize, u8>,
) -> Molecule {
let mut builder = MoleculeBuilder::new();
for i in 0..mol.atom_count() {
let idx = AtomIdx(i as u32);
let mut atom = mol.atom(idx).clone();
for (&atom_num, &atom_idx_in_map) in atom_idx_map {
if atom_idx_in_map == idx {
if let Some(&mass) = isotopes.get(&atom_num) {
atom.isotope = Some(mass as u16);
}
break;
}
}
builder.add_atom(atom);
}
for i in 0..mol.bond_count() {
let bond = mol.bond(BondIdx(i as u32));
builder.add_bond(bond.atom1, bond.atom2, bond.order).ok();
}
builder.build()
}
fn parse_ez_stereo_layer(b_str: &str) -> Result<HashMap<(usize, usize), char>, InchiParseError> {
let mut stereo: HashMap<(usize, usize), char> = HashMap::new();
if b_str.is_empty() {
return Ok(stereo);
}
for spec in b_str.split(',') {
if spec.is_empty() {
continue;
}
if let Some(pos) = spec.rfind('+') {
let nums_part = &spec[..pos];
if let Ok((a1, a2)) = parse_bond_spec(nums_part) {
stereo.insert(if a1 < a2 { (a1, a2) } else { (a2, a1) }, '+');
}
} else if let Some(pos) = spec.rfind('-') {
let nums_part = &spec[..pos];
if let Ok((a1, a2)) = parse_bond_spec(nums_part) {
stereo.insert(if a1 < a2 { (a1, a2) } else { (a2, a1) }, '-');
}
}
}
Ok(stereo)
}
fn parse_tetrahedral_stereo_layer(t_str: &str) -> Result<HashMap<usize, char>, InchiParseError> {
let mut stereo: HashMap<usize, char> = HashMap::new();
if t_str.is_empty() {
return Ok(stereo);
}
for spec in t_str.split(',') {
if spec.is_empty() {
continue;
}
if let Some(pos) = spec.rfind('+') {
let atom_part = &spec[..pos];
let atom_num: usize = atom_part.parse::<usize>().map_err(|_| {
InchiParseError::Unsupported("invalid atom number in stereo layer".to_string())
})?;
stereo.insert(atom_num, '+');
} else if let Some(pos) = spec.rfind('-') {
let atom_part = &spec[..pos];
let atom_num: usize = atom_part.parse::<usize>().map_err(|_| {
InchiParseError::Unsupported("invalid atom number in stereo layer".to_string())
})?;
stereo.insert(atom_num, '-');
}
}
Ok(stereo)
}
fn parse_bond_spec(spec: &str) -> Result<(usize, usize), InchiParseError> {
let parts: Vec<&str> = spec.split('-').collect();
if parts.len() != 2 {
return Err(InchiParseError::Unsupported(
"invalid bond spec".to_string(),
));
}
let a1: usize = parts[0]
.parse::<usize>()
.map_err(|_| InchiParseError::Unsupported("invalid atom in bond spec".to_string()))?;
let a2: usize = parts[1]
.parse::<usize>()
.map_err(|_| InchiParseError::Unsupported("invalid atom in bond spec".to_string()))?;
Ok((a1, a2))
}
fn apply_ez_stereo(
mol: Molecule,
atom_idx_map: &HashMap<usize, AtomIdx>,
stereo: &HashMap<(usize, usize), char>,
) -> Molecule {
if stereo.is_empty() {
return mol;
}
let mut builder = MoleculeBuilder::new();
let mut atom_map = HashMap::new();
for (old_idx, atom) in mol.atoms() {
let mut a = atom.clone();
for (&(n1, _n2), &parity) in stereo.iter() {
if let Some(&idx1) = atom_idx_map.get(&n1)
&& idx1 == old_idx
{
a.cip_code = Some(match parity {
'+' => CipCode::Z,
'-' => CipCode::E,
_ => continue,
});
break;
}
}
let new_idx = builder.add_atom(a);
atom_map.insert(old_idx, new_idx);
}
for (_, bond) in mol.bonds() {
let _ = builder.add_bond(atom_map[&bond.atom1], atom_map[&bond.atom2], bond.order);
}
builder.build()
}
fn apply_tetrahedral_stereo(
mol: Molecule,
atom_idx_map: &HashMap<usize, AtomIdx>,
stereo: &HashMap<usize, char>,
) -> Molecule {
if stereo.is_empty() {
return mol;
}
let mut builder = MoleculeBuilder::new();
let mut atom_map = HashMap::new();
for (old_idx, atom) in mol.atoms() {
let mut a = atom.clone();
for (&inchi_num, &parity) in stereo.iter() {
if let Some(&idx) = atom_idx_map.get(&inchi_num)
&& idx == old_idx
{
a.cip_code = Some(match parity {
'+' => CipCode::R,
'-' => CipCode::S,
_ => continue,
});
break;
}
}
let new_idx = builder.add_atom(a);
atom_map.insert(old_idx, new_idx);
}
for (_, bond) in mol.bonds() {
let _ = builder.add_bond(atom_map[&bond.atom1], atom_map[&bond.atom2], bond.order);
}
builder.build()
}
fn parse_relative_stereo_layer(m_str: &str) -> Result<HashMap<usize, String>, InchiParseError> {
let mut parity_map = HashMap::new();
if m_str.is_empty() {
return Ok(parity_map);
}
let entries: Vec<&str> = m_str.split(',').collect();
for (idx, entry) in entries.iter().enumerate() {
if !entry.is_empty() {
parity_map.insert(idx + 1, entry.to_string());
}
}
Ok(parity_map)
}
fn parse_stereo_type_layer(s_str: &str) -> Result<String, InchiParseError> {
Ok(s_str.to_string())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_formula_methane() {
let result = parse_formula("CH4");
assert!(result.is_ok());
let elements = result.unwrap();
assert_eq!(elements.len(), 2);
}
#[test]
fn test_parse_formula_ethane() {
let result = parse_formula("C2H6");
assert!(result.is_ok());
let elements = result.unwrap();
assert_eq!(
elements
.iter()
.find(|(e, _)| e.atomic_number() == 6)
.map(|(_, c)| c),
Some(&2)
);
}
#[test]
fn test_parse_formula_benzene() {
let result = parse_formula("C6H6");
assert!(result.is_ok());
let elements = result.unwrap();
assert_eq!(elements.len(), 2);
}
#[test]
fn test_parse_formula_invalid() {
let result = parse_formula("invalid");
assert!(result.is_err());
}
#[test]
fn test_parse_inchi_methane() {
let result = parse_inchi("InChI=1S/CH4/h1H4");
assert!(result.is_ok());
let mol = result.unwrap();
assert_eq!(mol.atom_count(), 1, "methane should have 1 heavy atom (C)");
}
#[test]
fn test_parse_inchi_ethane() {
let result = parse_inchi("InChI=1S/C2H6/c1-2/h1-2H3");
assert!(result.is_ok());
let mol = result.unwrap();
assert_eq!(mol.atom_count(), 2, "ethane should have 2 heavy atoms");
}
#[test]
fn test_parse_inchi_benzene() {
let result = parse_inchi("InChI=1S/C6H6/c1-2-3-4-5-6-1/h1-6H");
assert!(result.is_ok());
let mol = result.unwrap();
assert_eq!(mol.atom_count(), 6, "benzene should have 6 heavy atoms");
}
#[test]
fn test_parse_inchi_invalid_format() {
let result = parse_inchi("InvalidInChI");
assert!(result.is_err());
}
#[test]
fn test_parse_inchi_with_ez_stereo() {
let result = parse_inchi("InChI=1S/C4H8/c1-3-4-2/h3-4H,1-2H3/b4-3-");
assert!(result.is_ok(), "should parse InChI with /b layer");
if let Ok(mol) = result {
assert!(mol.atom_count() > 0);
}
}
#[test]
fn test_parse_hydrogen_layer_single_atom() {
let h_map = parse_hydrogen_layer_to_map("1H4").unwrap();
assert_eq!(h_map.get(&1), Some(&4), "atom 1 should have 4 H");
}
#[test]
fn test_parse_hydrogen_layer_range() {
let h_map = parse_hydrogen_layer_to_map("1-6H").unwrap();
for i in 1..=6 {
assert_eq!(h_map.get(&i), Some(&1), "atoms 1-6 should each have 1 H");
}
}
#[test]
fn test_parse_hydrogen_layer_mixed() {
let h_map = parse_hydrogen_layer_to_map("1H4,2H2,3-6H").unwrap();
assert_eq!(h_map.get(&1), Some(&4));
assert_eq!(h_map.get(&2), Some(&2));
assert_eq!(h_map.get(&3), Some(&1));
assert_eq!(h_map.get(&6), Some(&1));
}
#[test]
fn test_parse_inchi_ethanol_with_hydrogen_layer() {
let result = parse_inchi("InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3");
assert!(result.is_ok());
let mol = result.unwrap();
assert_eq!(
mol.atom_count(),
3,
"ethanol should have 3 heavy atoms (C, C, O)"
);
let has_h_count = mol.atoms().any(|(_, atom)| atom.hydrogen_count.is_some());
assert!(
has_h_count,
"at least one atom should have explicit hydrogen_count"
);
}
#[test]
fn test_parse_inchi_methane_roundtrip() {
let result = parse_inchi("InChI=1S/CH4/h1H4");
assert!(result.is_ok());
let mol = result.unwrap();
assert_eq!(mol.atom_count(), 1, "methane should have 1 heavy atom (C)");
let carbon = mol.atom(AtomIdx(0));
assert_eq!(carbon.element.atomic_number(), 6, "should be carbon");
assert_eq!(carbon.hydrogen_count, Some(4), "carbon should have 4 H");
}
#[test]
fn test_parse_charge_layer_single_positive() {
let charges = parse_charge_layer("1+1").unwrap();
assert_eq!(charges.get(&1), Some(&1), "atom 1 should have charge +1");
}
#[test]
fn test_parse_charge_layer_single_negative() {
let charges = parse_charge_layer("2-1").unwrap();
assert_eq!(charges.get(&2), Some(&-1), "atom 2 should have charge -1");
}
#[test]
fn test_parse_charge_layer_multiple() {
let charges = parse_charge_layer("1+1,2-1,3+2").unwrap();
assert_eq!(charges.get(&1), Some(&1), "atom 1 should have charge +1");
assert_eq!(charges.get(&2), Some(&-1), "atom 2 should have charge -1");
assert_eq!(charges.get(&3), Some(&2), "atom 3 should have charge +2");
}
#[test]
fn test_parse_isotope_layer_single() {
let isotopes = parse_isotope_layer("2/13C").unwrap();
assert_eq!(isotopes.get(&2), Some(&13), "atom 2 should be C-13");
}
#[test]
fn test_parse_isotope_layer_multiple() {
let isotopes = parse_isotope_layer("1/2H,2/13C").unwrap();
assert_eq!(
isotopes.get(&1),
Some(&2),
"atom 1 should be H-2 (deuterium)"
);
assert_eq!(isotopes.get(&2), Some(&13), "atom 2 should be C-13");
}
#[test]
fn test_parse_inchi_with_charge_layer() {
let charges = parse_charge_layer("1+1").unwrap();
assert_eq!(charges.get(&1), Some(&1), "atom 1 should have charge +1");
}
#[test]
fn test_parse_inchi_with_isotope_layer() {
let result = parse_inchi("InChI=1S/C2H6/c1-2/h1-2H3/i/2H");
assert!(result.is_ok() || result.is_err()); }
#[test]
fn test_empty_charge_layer() {
let charges = parse_charge_layer("").unwrap();
assert!(
charges.is_empty(),
"empty charge layer should yield no charges"
);
}
#[test]
fn test_empty_isotope_layer() {
let isotopes = parse_isotope_layer("").unwrap();
assert!(
isotopes.is_empty(),
"empty isotope layer should yield no isotopes"
);
}
#[test]
fn test_parse_ez_stereo_layer_single() {
let stereo = parse_ez_stereo_layer("2-3+").unwrap();
assert_eq!(stereo.len(), 1);
assert_eq!(stereo.get(&(2, 3)), Some(&'+'));
}
#[test]
fn test_parse_ez_stereo_layer_multiple() {
let stereo = parse_ez_stereo_layer("2-3+,5-6-").unwrap();
assert_eq!(stereo.len(), 2);
assert_eq!(stereo.get(&(2, 3)), Some(&'+'));
assert_eq!(stereo.get(&(5, 6)), Some(&'-'));
}
#[test]
fn test_parse_ez_stereo_layer_empty() {
let stereo = parse_ez_stereo_layer("").unwrap();
assert!(stereo.is_empty());
}
#[test]
fn test_parse_tetrahedral_stereo_layer_single() {
let stereo = parse_tetrahedral_stereo_layer("1-").unwrap();
assert_eq!(stereo.len(), 1);
assert_eq!(stereo.get(&1), Some(&'-'));
}
#[test]
fn test_parse_tetrahedral_stereo_layer_multiple() {
let stereo = parse_tetrahedral_stereo_layer("1-,2+,3-").unwrap();
assert_eq!(stereo.len(), 3);
assert_eq!(stereo.get(&1), Some(&'-'));
assert_eq!(stereo.get(&2), Some(&'+'));
assert_eq!(stereo.get(&3), Some(&'-'));
}
#[test]
fn test_parse_tetrahedral_stereo_layer_empty() {
let stereo = parse_tetrahedral_stereo_layer("").unwrap();
assert!(stereo.is_empty());
}
#[test]
fn test_parse_inchi_with_tetrahedral_stereo() {
let result = parse_inchi("InChI=1S/C2H4O2/c1-2(3)4/h2H,1H3/t2-");
assert!(result.is_ok(), "should parse InChI with /t layer");
if let Ok(mol) = result {
assert!(mol.atom_count() > 0);
}
}
#[test]
fn test_parse_bond_spec() {
let (a1, a2) = parse_bond_spec("2-3").unwrap();
assert_eq!(a1, 2);
assert_eq!(a2, 3);
}
#[test]
fn test_parse_bond_spec_large_numbers() {
let (a1, a2) = parse_bond_spec("12-15").unwrap();
assert_eq!(a1, 12);
assert_eq!(a2, 15);
}
#[test]
fn test_parse_relative_stereo_layer_single() {
let parity = parse_relative_stereo_layer("1").unwrap();
assert_eq!(parity.len(), 1);
assert_eq!(parity.get(&1), Some(&"1".to_string()));
}
#[test]
fn test_parse_relative_stereo_layer_multiple() {
let parity = parse_relative_stereo_layer("1,2").unwrap();
assert_eq!(parity.len(), 2);
assert_eq!(parity.get(&1), Some(&"1".to_string()));
assert_eq!(parity.get(&2), Some(&"2".to_string()));
}
#[test]
fn test_parse_relative_stereo_layer_empty() {
let parity = parse_relative_stereo_layer("").unwrap();
assert!(parity.is_empty());
}
#[test]
fn test_parse_stereo_type_layer_obsolete() {
let stereo_type = parse_stereo_type_layer("obsolete").unwrap();
assert_eq!(stereo_type, "obsolete");
}
#[test]
fn test_parse_stereo_type_layer_new() {
let stereo_type = parse_stereo_type_layer("new").unwrap();
assert_eq!(stereo_type, "new");
}
#[test]
fn test_parse_inchi_with_relative_stereo() {
let result = parse_inchi("InChI=1S/C4H10/c1-3-4-2/h3-4H,1-2H3/m0");
assert!(result.is_ok(), "should parse InChI with /m layer");
if let Ok(mol) = result {
assert!(mol.atom_count() > 0);
}
}
#[test]
fn test_parse_inchi_with_stereo_type() {
let result = parse_inchi("InChI=1S/C2H6/c1-2/h1-2H3/s1");
assert!(result.is_ok(), "should parse InChI with /s layer");
if let Ok(mol) = result {
assert!(mol.atom_count() > 0);
}
}
#[test]
fn test_tetrahedral_stereo_roundtrip_simple() {
let mut builder = MoleculeBuilder::new();
let a1 = builder.add_atom(Atom::new(Element::C));
let a2 = builder.add_atom(Atom::new(Element::H));
let a3 = builder.add_atom(Atom::new(Element::H));
let a4 = builder.add_atom(Atom::new(Element::H));
let a5 = builder.add_atom(Atom::new(Element::N));
let _ = builder.add_bond(a1, a2, BondOrder::Single);
let _ = builder.add_bond(a1, a3, BondOrder::Single);
let _ = builder.add_bond(a1, a4, BondOrder::Single);
let _ = builder.add_bond(a1, a5, BondOrder::Single);
let mol = builder.build();
let mut stereo_map = HashMap::new();
stereo_map.insert(1, '-'); let mut atom_idx_map = HashMap::new();
atom_idx_map.insert(1, a1);
let mol_stereo = apply_tetrahedral_stereo(mol, &atom_idx_map, &stereo_map);
let found_s = mol_stereo
.atoms()
.any(|(_, atom)| atom.cip_code == Some(CipCode::S));
assert!(found_s, "apply_tetrahedral_stereo should assign S cip_code");
}
#[test]
fn test_ez_stereo_roundtrip_simple() {
let mut builder = MoleculeBuilder::new();
let a1 = builder.add_atom(Atom::new(Element::C));
let a2 = builder.add_atom(Atom::new(Element::C));
let a3 = builder.add_atom(Atom::new(Element::H));
let a4 = builder.add_atom(Atom::new(Element::N));
let _ = builder.add_bond(a1, a2, BondOrder::Double);
let _ = builder.add_bond(a1, a3, BondOrder::Single);
let _ = builder.add_bond(a2, a4, BondOrder::Single);
let mol = builder.build();
let mut stereo_map = HashMap::new();
stereo_map.insert((1, 2), '-'); let mut atom_idx_map = HashMap::new();
atom_idx_map.insert(1, a1);
atom_idx_map.insert(2, a2);
let mol_stereo = apply_ez_stereo(mol, &atom_idx_map, &stereo_map);
let found_e = mol_stereo
.atoms()
.any(|(_, atom)| atom.cip_code == Some(CipCode::E));
assert!(found_e, "apply_ez_stereo should assign E cip_code");
}
#[test]
fn test_parse_connectivity_branch_isobutane() {
use chematic_core::{Atom, Element, MoleculeBuilder};
use crate::parser::parse_inchi;
use std::collections::HashMap;
use chematic_core::AtomIdx;
let mut builder = MoleculeBuilder::new();
let a1 = builder.add_atom(Atom::new(Element::C));
let a2 = builder.add_atom(Atom::new(Element::C));
let a3 = builder.add_atom(Atom::new(Element::C));
let a4 = builder.add_atom(Atom::new(Element::C));
let mut map: HashMap<usize, AtomIdx> = HashMap::new();
map.insert(1, a1);
map.insert(2, a2);
map.insert(3, a3);
map.insert(4, a4);
super::parse_connectivity("1-4(2)3", &map, &mut builder).expect("isobutane /c parse");
let mol = builder.build();
assert_eq!(
mol.bond_count(),
3,
"isobutane /c should yield 3 bonds, got {}",
mol.bond_count()
);
}
#[test]
fn test_parse_connectivity_nested_branch() {
use chematic_core::{Atom, Element, MoleculeBuilder};
use std::collections::HashMap;
use chematic_core::AtomIdx;
let mut builder = MoleculeBuilder::new();
let atoms: Vec<AtomIdx> = (0..5).map(|_| builder.add_atom(Atom::new(Element::C))).collect();
let mut map: HashMap<usize, AtomIdx> = HashMap::new();
for (i, &a) in atoms.iter().enumerate() { map.insert(i + 1, a); }
super::parse_connectivity("1-5(2)(3)4", &map, &mut builder).expect("neopentane /c parse");
let mol = builder.build();
assert_eq!(mol.bond_count(), 4, "neopentane /c should yield 4 bonds");
}
}