Skip to main content

chematic_inchi/
lib.rs

1//! Pure Rust InChI and InChIKey generation and parsing for IUPAC standard molecules.
2//!
3//! Generates deterministic InChI strings (formula, connectivity, hydrogen, charge, isotope layers)
4//! and parses InChI strings back to Molecule representation (simple molecules).
5//! Fully WASM-compatible, FFI-free.
6//!
7//! # Examples
8//!
9//! ```ignore
10//! use chematic_smiles::parse;
11//! use chematic_inchi::{inchi, parse_inchi};
12//!
13//! let mol = parse("c1ccccc1").expect("benzene");
14//! let inchi_str = inchi(&mol);
15//! assert_eq!(inchi_str, "InChI=1S/C6H6/c1-2-3-4-5-6-1/h1-6H");
16//!
17//! // Parse InChI back to Molecule
18//! let mol2 = parse_inchi(&inchi_str).expect("parse");
19//! assert_eq!(mol2.atom_count(), 6);
20//! ```
21
22pub mod layers;
23pub mod key;
24pub mod parser;
25
26use chematic_core::{Molecule, AtomIdx};
27use chematic_smiles::canonical::canonical_atom_order;
28use layers::{formula, connection, hydrogen, charge, isotope, stereo};
29use std::collections::HashMap;
30
31/// Build a mapping from AtomIdx to InChI 1-indexed atom numbers (excluding H).
32pub fn build_inchi_index(mol: &Molecule) -> HashMap<AtomIdx, usize> {
33    let canonical_order = canonical_atom_order(mol);
34    let mut inchi_index: HashMap<AtomIdx, usize> = HashMap::new();
35    let mut inchi_num = 0;
36    for &canon_idx in &canonical_order {
37        let atom_idx = AtomIdx(canon_idx as u32);
38        let atom = mol.atom(atom_idx);
39        if atom.element.atomic_number() != 1 {
40            inchi_num += 1;
41            inchi_index.insert(atom_idx, inchi_num);
42        }
43    }
44    inchi_index
45}
46
47/// Generate InChI string for a molecule.
48///
49/// Layers included: formula, connectivity (/c), hydrogen (/h), double-bond stereo (/b),
50/// tetrahedral stereo (/t), charge (/q if net charge ≠ 0), isotope (/i if present),
51/// relative stereo parity (/m if 2+ stereocenters), stereo type (/s).
52pub fn inchi(mol: &Molecule) -> String {
53    let mut result = String::from("InChI=1S/");
54    let inchi_index = build_inchi_index(mol);
55
56    // Formula layer (prefix)
57    let formula_str = formula::formula_layer(mol);
58    result.push_str(&formula_str);
59
60    // Connectivity layer /c
61    if let Some(c_layer) = connection::connectivity_layer(mol) {
62        result.push_str("/c");
63        result.push_str(&c_layer);
64    }
65
66    // Hydrogen layer /h
67    if let Some(h_layer) = hydrogen::hydrogen_layer(mol) {
68        result.push_str("/h");
69        result.push_str(&h_layer);
70    }
71
72    // Double-bond stereo layer /b (E/Z)
73    if let Some(b_layer) = stereo::ez_stereo_layer(mol, &inchi_index) {
74        result.push_str("/b");
75        result.push_str(&b_layer);
76    }
77
78    // Tetrahedral stereo layer /t (R/S)
79    if let Some(t_layer) = stereo::tetrahedral_stereo_layer(mol, &inchi_index) {
80        result.push_str("/t");
81        result.push_str(&t_layer);
82    }
83
84    // Relative stereo parity layer /m (for 2+ stereocenters)
85    if let Some(m_layer) = stereo::relative_stereo_parity_layer(mol, &inchi_index) {
86        result.push_str("/m");
87        result.push_str(&m_layer);
88    }
89
90    // Stereo type layer /s (absolute=1, relative=2, racemic=3)
91    if let Some(s_layer) = stereo::stereo_type_layer(mol) {
92        result.push_str("/s");
93        result.push_str(&s_layer);
94    }
95
96    // Charge layer /q (conditional)
97    if let Some(q_layer) = charge::charge_layer(mol) {
98        result.push_str("/q");
99        result.push_str(&q_layer);
100    }
101
102    // Isotope layer /i (conditional)
103    if let Some(i_layer) = isotope::isotope_layer(mol) {
104        result.push_str("/i");
105        result.push_str(&i_layer);
106    }
107
108    result
109}
110
111/// Generate InChIKey (27-character alphanumeric identifier) from an InChI string.
112///
113/// Format: `XXXXXXXXXXXXXX-XXXXXXXXXX-N` where N is the version/protonation flag.
114pub fn inchi_key(inchi_str: &str) -> String {
115    key::inchi_key(inchi_str)
116}
117
118/// Parse an InChI string back into a Molecule representation.
119///
120/// Parse an InChI string back into a Molecule.
121///
122/// Supports organic molecules with stereo layers (`/b`, `/t`, `/m`, `/s`),
123/// isotope layers (`/i`), and charge layers (`/q`).
124pub use parser::{parse_inchi, InchiParseError};
125
126#[cfg(test)]
127mod tests {
128    use super::*;
129    use chematic_smiles::parse;
130
131    #[test]
132    fn test_inchi_methane() {
133        let mol = parse("C").expect("methane");
134        let inchi_str = inchi(&mol);
135        assert!(inchi_str.starts_with("InChI=1S/CH4"));
136    }
137
138    #[test]
139    fn test_inchi_ethane() {
140        let mol = parse("CC").expect("ethane");
141        let inchi_str = inchi(&mol);
142        assert!(inchi_str.starts_with("InChI=1S/C2H6"));
143    }
144
145    #[test]
146    fn test_inchi_benzene() {
147        let mol = parse("c1ccccc1").expect("benzene");
148        let inchi_str = inchi(&mol);
149        eprintln!("Benzene InChI: {}", inchi_str);
150        assert!(inchi_str.starts_with("InChI=1S/C6H6"));
151        // Benzene should have ring closure: /c1-2-3-4-5-6-1/h1-6H
152        assert!(inchi_str.contains("/c1-2-3-4-5-6-1"), "Benzene should have ring closure in connectivity");
153        assert!(inchi_str.contains("/h1-6H"), "Benzene should have hydrogen layer");
154    }
155
156    #[test]
157    fn test_inchi_ethanol() {
158        let mol = parse("CCO").expect("ethanol");
159        let inchi_str = inchi(&mol);
160        assert!(inchi_str.starts_with("InChI=1S/C2H6O"));
161    }
162
163    #[test]
164    fn test_inchi_key_format() {
165        let mol = parse("c1ccccc1").expect("benzene");
166        let inchi_str = inchi(&mol);
167        let key = inchi_key(&inchi_str);
168        assert_eq!(key.len(), 27, "InChIKey should be 27 characters");
169        assert_eq!(&key[14..15], "-", "First dash at position 14");
170        assert_eq!(&key[25..26], "-", "Second dash at position 25");
171    }
172
173    #[test]
174    fn test_inchi_l_alanine_with_stereo_layers() {
175        let mol = parse("N[C@@H](C)C(=O)O").expect("L-alanine");
176        let inchi_str = inchi(&mol);
177        eprintln!("L-alanine InChI: {}", inchi_str);
178        assert!(inchi_str.contains("/t"), "L-alanine should have /t layer (R/S)");
179        assert!(inchi_str.contains("/s1"), "L-alanine should have /s1 layer (absolute stereo)");
180        // Single stereocenter should NOT have /m layer
181        assert!(!inchi_str.contains("/m"), "Single stereocenter should not have /m layer");
182    }
183
184    #[test]
185    fn test_inchi_tartaric_acid_with_relative_parity() {
186        // Tartaric acid: 2R,3S configuration
187        let mol = parse("C[C@H](O)[C@@H](O)C(=O)O").expect("tartaric acid");
188        let inchi_str = inchi(&mol);
189        eprintln!("Tartaric acid InChI: {}", inchi_str);
190        assert!(inchi_str.contains("/t"), "Tartaric acid should have /t layer");
191        assert!(inchi_str.contains("/m"), "Two stereocenters should have /m layer");
192        assert!(inchi_str.contains("/s1"), "With chirality markers, should have /s1");
193    }
194
195    #[test]
196    fn test_inchi_ethane_no_stereo() {
197        let mol = parse("CC").expect("ethane");
198        let inchi_str = inchi(&mol);
199        eprintln!("Ethane InChI: {}", inchi_str);
200        assert!(!inchi_str.contains("/t"), "Ethane should not have /t layer");
201        assert!(!inchi_str.contains("/m"), "Ethane should not have /m layer");
202        assert!(inchi_str.contains("/s3"), "Achiral ethane should have /s3 (racemic)");
203    }
204}