pub mod chapter_map;
pub mod detector;
pub use chapter_map::HeadingHint;
pub use detector::{FunctionalGroup, StructuralFeatures};
use crate::types::OrganicInorganic;
#[derive(Debug, Clone, serde::Serialize)]
pub struct SmilesClassification {
pub organic_class: OrganicInorganic,
pub functional_groups: Vec<FunctionalGroup>,
pub structural_features: StructuralFeatures,
pub heading_hint: HeadingHint,
}
pub const MAX_SMILES_LEN: usize = 4096;
pub fn classify_smiles(smiles: &str) -> Option<SmilesClassification> {
let smiles = smiles.trim();
if smiles.is_empty() || smiles.len() > MAX_SMILES_LEN {
return None;
}
let organic_class = detector::classify_organic(smiles);
let functional_groups = detector::detect_functional_groups(smiles);
let structural_features = detector::detect_structural_features(smiles);
let heading_hint = chapter_map::map_to_subheading(
&functional_groups,
&organic_class,
&structural_features,
);
Some(SmilesClassification {
organic_class,
functional_groups,
structural_features,
heading_hint,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_smiles_returns_none() {
assert!(classify_smiles("").is_none());
assert!(classify_smiles(" ").is_none());
}
#[test]
fn acetone_ketone_heading() {
let r = classify_smiles("CC(C)=O").unwrap();
assert_eq!(r.heading_hint.heading, Some(2914));
assert!(r.functional_groups.contains(&FunctionalGroup::Ketone));
assert!(matches!(r.organic_class, OrganicInorganic::Organic));
}
#[test]
fn acetic_acid_heading() {
let r = classify_smiles("CC(=O)O").unwrap();
assert_eq!(r.heading_hint.heading, Some(2915));
assert!(r.functional_groups.contains(&FunctionalGroup::CarboxylicAcid));
}
#[test]
fn ethyl_acetate_heading() {
let r = classify_smiles("CCOC(C)=O").unwrap();
assert_eq!(r.heading_hint.heading, Some(2915));
assert!(r.functional_groups.contains(&FunctionalGroup::Ester));
}
#[test]
fn benzaldehyde_heading() {
let r = classify_smiles("O=Cc1ccccc1").unwrap();
assert_eq!(r.heading_hint.heading, Some(2912));
assert!(r.functional_groups.contains(&FunctionalGroup::Aldehyde));
}
#[test]
fn ethanol_heading() {
let r = classify_smiles("CCO").unwrap();
assert_eq!(r.heading_hint.chapter, 22);
assert_eq!(r.heading_hint.heading, Some(2207));
assert_eq!(r.heading_hint.subheading.as_deref(), Some("220710"));
assert!(r.functional_groups.contains(&FunctionalGroup::Alcohol));
}
#[test]
fn methylamine_heading() {
let r = classify_smiles("CN").unwrap();
assert_eq!(r.heading_hint.heading, Some(2921));
}
#[test]
fn chlorobenzene_heading() {
let r = classify_smiles("Clc1ccccc1").unwrap();
assert_eq!(r.heading_hint.heading, Some(2903));
assert!(r.functional_groups.contains(&FunctionalGroup::Halide));
}
#[test]
fn co2_is_inorganic_ch28() {
let r = classify_smiles("O=C=O").unwrap();
assert_eq!(r.heading_hint.chapter, 28);
assert!(matches!(r.organic_class, OrganicInorganic::Inorganic));
}
#[test]
fn epoxide_heading() {
let r = classify_smiles("C1CO1").unwrap();
assert_eq!(r.heading_hint.heading, Some(2910));
}
#[test]
fn phthalic_anhydride_heading() {
let r = classify_smiles("O=C1OC(=O)c2ccccc21").unwrap();
assert!(r.functional_groups.contains(&FunctionalGroup::Anhydride));
assert_eq!(r.heading_hint.heading, Some(2915));
}
}