use inchi::{
check_inchi, check_inchikey, from_molfile, inchi_to_inchi, inchikey, inchikey_with_hashes,
struct_from_aux_info, struct_from_inchi, struct_from_inchi_ex, Atom, BondOrder,
InchiKeyValidity, InchiValidity, Molecule, Options, PolymerConnection, PolymerUnit,
PolymerUnitKind, Polymers,
};
const METHANE: &str = include_str!("fixtures/methane.mol");
const WATER: &str = include_str!("fixtures/water.mol");
const ETHANOL: &str = include_str!("fixtures/ethanol.mol");
const SODIUM_ACETATE: &str = include_str!("fixtures/sodium_acetate.mol");
const BENZENE: &str = include_str!("fixtures/benzene.mol");
const CAFFEINE: &str = include_str!("fixtures/caffeine.mol");
const ASPIRIN: &str = include_str!("fixtures/aspirin.mol");
const IBUPROFEN: &str = include_str!("fixtures/ibuprofen.mol");
const L_ALANINE: &str = include_str!("fixtures/l_alanine.mol");
const GLUCOSE: &str = include_str!("fixtures/glucose.mol");
const L_TRYPTOPHAN: &str = include_str!("fixtures/l_tryptophan.mol");
const CASES: &[(&str, &str, &str)] = &[
(METHANE, "InChI=1S/CH4/h1H4", "VNWKTOKETHGBQD-UHFFFAOYSA-N"),
(WATER, "InChI=1S/H2O/h1H2", "XLYOFNOQVPJJNP-UHFFFAOYSA-N"),
(
ETHANOL,
"InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3",
"LFQSCWFLJHTTHZ-UHFFFAOYSA-N",
),
(
SODIUM_ACETATE,
"InChI=1S/C2H4O2.Na/c1-2(3)4;/h1H3,(H,3,4);/q;+1/p-1",
"VMHLLURERBWHNL-UHFFFAOYSA-M",
),
(
BENZENE,
"InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H",
"UHOVQNZJYSORNB-UHFFFAOYSA-N",
),
(
CAFFEINE,
"InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)8(14)11(6)2/h4H,1-3H3",
"RYYVLZVUVIJVGH-UHFFFAOYSA-N",
),
(
ASPIRIN,
"InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)",
"BSYNRYMUTXBXSQ-UHFFFAOYSA-N",
),
(
IBUPROFEN,
"InChI=1S/C13H18O2/c1-9(2)8-11-4-6-12(7-5-11)10(3)13(14)15/h4-7,9-10H,8H2,1-3H3,(H,14,15)",
"HEFNNWSXXWATRW-UHFFFAOYSA-N",
),
(
L_ALANINE,
"InChI=1S/C3H7NO2/c1-2(4)3(5)6/h2H,4H2,1H3,(H,5,6)/t2-/m0/s1",
"QNAYBMKLOCPYGJ-REOHCLBHSA-N",
),
(
GLUCOSE,
"InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6+/m1/s1",
"WQZGKKKJIJFFOK-DVKNGEFBSA-N",
),
(
L_TRYPTOPHAN,
"InChI=1S/C11H12N2O2/c12-9(11(14)15)5-7-6-13-10-4-2-1-3-8(7)10/h1-4,6,9,13H,5,12H2,(H,14,15)/t9-/m0/s1",
"QIVBCDIJIAJPQS-VIFPVBQESA-N",
),
];
fn inchi_of(molfile: &str) -> String {
from_molfile(molfile, Options::new())
.unwrap_or_else(|e| panic!("generation failed: {e}"))
.into_inchi()
}
#[test]
fn forward_generation_matches_references() {
for &(molfile, expected_inchi, expected_key) in CASES {
let inchi = inchi_of(molfile);
assert_eq!(inchi, expected_inchi, "InChI mismatch");
let key = inchikey(&inchi).expect("key");
assert_eq!(key, expected_key, "InChIKey mismatch for {expected_inchi}");
}
}
#[test]
fn every_reference_key_is_valid() {
for &(_, inchi, expected_key) in CASES {
let key = inchikey(inchi).expect("key");
assert_eq!(key, expected_key);
assert_eq!(
check_inchikey(&key).expect("check"),
InchiKeyValidity::Standard
);
}
}
#[test]
fn molecule_builder_matches_known_identifiers() {
let mut benzene = Molecule::new();
let c: Vec<usize> = (0..6).map(|_| benzene.add_atom(Atom::new("C"))).collect();
for i in 0..6 {
let order = if i % 2 == 0 {
BondOrder::Double
} else {
BondOrder::Single
};
benzene.add_bond(c[i], c[(i + 1) % 6], order).expect("bond");
}
let inchi = benzene.to_inchi(Options::new()).expect("gen").into_inchi();
assert_eq!(inchi, "InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H");
assert_eq!(
inchikey(&inchi).expect("key"),
"UHOVQNZJYSORNB-UHFFFAOYSA-N"
);
let mut gly = Molecule::new();
let n = gly.add_atom(Atom::new("N"));
let ca = gly.add_atom(Atom::new("C"));
let cc = gly.add_atom(Atom::new("C"));
let od = gly.add_atom(Atom::new("O"));
let oh = gly.add_atom(Atom::new("O"));
gly.add_bond(n, ca, BondOrder::Single).expect("bond");
gly.add_bond(ca, cc, BondOrder::Single).expect("bond");
gly.add_bond(cc, od, BondOrder::Double).expect("bond");
gly.add_bond(cc, oh, BondOrder::Single).expect("bond");
let inchi = gly.to_inchi(Options::new()).expect("gen").into_inchi();
assert_eq!(inchi, "InChI=1S/C2H5NO2/c3-1-2(4)5/h1,3H2,(H,4,5)");
assert_eq!(
inchikey(&inchi).expect("key"),
"DHMQDGOQFOQNFH-UHFFFAOYSA-N"
);
}
#[test]
fn parse_structure_of_caffeine() {
let s =
struct_from_inchi("InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)8(14)11(6)2/h4H,1-3H3")
.expect("parse");
assert_eq!(s.atoms().len(), 14);
let carbons = s.atoms().iter().filter(|a| a.element == "C").count();
let nitrogens = s.atoms().iter().filter(|a| a.element == "N").count();
let oxygens = s.atoms().iter().filter(|a| a.element == "O").count();
assert_eq!((carbons, nitrogens, oxygens), (8, 4, 2));
assert!(s.bonds().len() >= s.atoms().len());
}
#[test]
fn parse_recovers_stereo() {
let trp = struct_from_inchi(
"InChI=1S/C11H12N2O2/c12-9(11(14)15)5-7-6-13-10-4-2-1-3-8(7)10/h1-4,6,9,13H,5,12H2,(H,14,15)/t9-/m0/s1",
)
.expect("parse");
assert_eq!(trp.stereo().len(), 1);
let ala = struct_from_inchi("InChI=1S/C3H7NO2/c1-2(4)3(5)6/h2H,4H2,1H3,(H,5,6)/t2-/m0/s1")
.expect("parse");
assert_eq!(ala.stereo().len(), 1);
let glc = struct_from_inchi(
"InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6+/m1/s1",
)
.expect("parse");
assert_eq!(glc.stereo().len(), 5);
}
#[test]
fn aux_info_round_trip() {
let out = from_molfile(ASPIRIN, Options::new()).expect("gen");
assert!(out.aux_info().starts_with("AuxInfo="));
let s = struct_from_aux_info(out.aux_info(), true).expect("parse");
assert_eq!(s.atoms().len(), 21);
assert_eq!(s.atoms().iter().filter(|a| a.element == "C").count(), 9);
assert_eq!(s.atoms().iter().filter(|a| a.element == "O").count(), 4);
assert_eq!(s.atoms().iter().filter(|a| a.element == "H").count(), 8);
let no_add = struct_from_aux_info(out.aux_info(), false).expect("parse");
assert_eq!(no_add.atoms().len(), 21);
}
#[test]
fn aux_info_present_and_suppressible() {
let with = from_molfile(ETHANOL, Options::new()).expect("gen");
assert!(with.aux_info().starts_with("AuxInfo="));
let without = from_molfile(ETHANOL, Options::new().aux_info(false)).expect("gen");
assert!(without.aux_info().is_empty());
}
#[test]
fn inchi_to_inchi_normalizes_and_converts() {
for &(_, inchi, _) in CASES {
let out = inchi_to_inchi(inchi, Options::new()).expect("convert");
assert_eq!(out.inchi(), inchi);
}
let std = "InChI=1S/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)";
let fixed = inchi_to_inchi(std, Options::new().fixed_h(true)).expect("convert");
assert_eq!(
fixed.inchi(),
"InChI=1/C9H8O4/c1-6(10)13-8-5-3-2-4-7(8)9(11)12/h2-5H,1H3,(H,11,12)"
);
assert_eq!(
check_inchi(fixed.inchi(), false).expect("check"),
InchiValidity::NonStandard
);
}
#[test]
fn check_inchi_classifies_validity() {
for &(_, inchi, _) in CASES {
assert_eq!(
check_inchi(inchi, false).expect("check"),
InchiValidity::Standard
);
}
for &(_, inchi, _) in CASES {
assert_eq!(
check_inchi(inchi, true).expect("strict check"),
InchiValidity::FailedRoundtrip
);
}
let nonstd = "InChI=1/CH4/h1H4";
assert_eq!(
check_inchi(nonstd, false).expect("check"),
InchiValidity::NonStandard
);
assert_eq!(
check_inchi(nonstd, true).expect("strict check"),
InchiValidity::NonStandard
);
assert_eq!(
check_inchi("not an inchi", false).expect("check"),
InchiValidity::InvalidPrefix
);
assert!(!check_inchi("InChI=1S/garbage layout", false)
.expect("check")
.is_valid());
}
#[test]
fn check_inchikey_classifies_validity() {
assert_eq!(
check_inchikey("RYYVLZVUVIJVGH-UHFFFAOYSA-N").expect("check"),
InchiKeyValidity::Standard
);
assert_eq!(
check_inchikey("TOOSHORT").expect("check"),
InchiKeyValidity::InvalidLength
);
assert!(!check_inchikey("RYYVLZVUVIJVGH/UHFFFAOYSA/N")
.expect("check")
.is_valid());
}
#[test]
fn inchikey_extra_hashes() {
let inchi = "InChI=1S/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)8(14)11(6)2/h4H,1-3H3";
let none = inchikey_with_hashes(inchi, false, false).expect("key");
assert_eq!(none.key, "RYYVLZVUVIJVGH-UHFFFAOYSA-N");
assert!(none.extra1.is_none() && none.extra2.is_none());
let both = inchikey_with_hashes(inchi, true, true).expect("key");
assert_eq!(both.key, "RYYVLZVUVIJVGH-UHFFFAOYSA-N");
let e1 = both.extra1.expect("extra1");
let e2 = both.extra2.expect("extra2");
assert!(!e1.is_empty() && e1.len() <= 64 && e1.chars().all(|c| c.is_ascii_hexdigit()));
assert!(!e2.is_empty() && e2.len() <= 64 && e2.chars().all(|c| c.is_ascii_hexdigit()));
assert_ne!(e1, e2);
let only1 = inchikey_with_hashes(inchi, true, false).expect("key");
assert!(only1.extra1.is_some() && only1.extra2.is_none());
}
const POLYMER_SRU: &str = include_str!("fixtures/polymer_sru.mol");
const POLYETHYLENE_INCHI: &str = "InChI=1B/C2H4Zz2/c3-1-2-4/h1-2H2/z101-1-2(4-2,3-1)";
#[test]
fn polymer_generation_from_molfile() {
let out = from_molfile(POLYMER_SRU, Options::new().polymers(Polymers::On)).expect("gen");
assert_eq!(out.inchi(), POLYETHYLENE_INCHI);
assert!(out.inchi().contains("/z"), "expected a polymer layer");
assert!(from_molfile(POLYMER_SRU, Options::new()).is_err());
}
#[test]
fn polymer_generation_from_builder() {
let mut mol = Molecule::new();
let s1 = mol.add_atom(Atom::new("Zz"));
let c1 = mol.add_atom(Atom::new("C"));
let c2 = mol.add_atom(Atom::new("C"));
let s2 = mol.add_atom(Atom::new("Zz"));
mol.add_bond(s1, c1, BondOrder::Single).expect("bond");
mol.add_bond(c1, c2, BondOrder::Single).expect("bond");
mol.add_bond(c2, s2, BondOrder::Single).expect("bond");
mol.add_polymer_unit(PolymerUnit::sru([c1, c2], [[s1, c1], [c2, s2]]));
let inchi = mol
.to_inchi(Options::new().polymers(Polymers::On))
.expect("gen")
.into_inchi();
assert_eq!(inchi, POLYETHYLENE_INCHI);
}
#[test]
fn polymer_round_trip_parse() {
let ext = struct_from_inchi_ex(POLYETHYLENE_INCHI).expect("parse");
assert_eq!(ext.structure.atoms().len(), 4); assert_eq!(ext.polymer_units.len(), 1);
let unit = &ext.polymer_units[0];
assert_eq!(unit.kind, PolymerUnitKind::StructureBasedRepeat);
assert_eq!(unit.connection, PolymerConnection::HeadToTail);
assert_eq!(unit.atoms.len(), 2); assert_eq!(unit.crossing_bonds.len(), 2);
let plain = struct_from_inchi_ex("InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3").expect("parse");
assert!(plain.polymer_units.is_empty());
assert_eq!(
plain.structure,
struct_from_inchi("InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3").unwrap()
);
}
#[test]
fn invalid_input_is_error() {
assert!(struct_from_inchi("not an inchi").is_err());
assert!(inchikey("not an inchi").is_err());
assert!(inchi_to_inchi("not an inchi", Options::new()).is_err());
assert!(struct_from_aux_info("not aux info", true).is_err());
assert!(Molecule::new().to_inchi(Options::new()).is_err());
}