use chematic_core::Molecule;
use chematic_smarts::{find_matches, parse_smarts};
use crate::bitvec::BitVec2048;
static MACCS_SMARTS: &[&str] = &[
"", "[#103]", "[#102]", "[#101]", "[#100]", "[#99]", "[#98]", "[#97]", "[#96]", "[#95]", "[#94]", "[#93]", "[#92]", "[#90,#91]", "[#89]", "[#88]", "[#87]", "[#85]", "[#84]", "[#83]", "[#82]", "[#81]", "[#80]", "[#79]", "[#77,#78]", "[#76]", "[#75]", "[#74]", "[#73]", "[#72]", "[#71]", "[#70]", "[#69]", "[#68]", "[#67]", "[#66]", "[#65]", "[#64]", "[#63]", "[#62]", "[#61]", "[#60]", "[#59]", "[#58]", "[#57]", "[#55,#56]", "[#52,#53,#54]", "[#51]", "[#50]", "[#49]", "[#47,#48]", "[#46]", "[#45]", "[#44]", "[#43]", "[#42]", "[#41]", "[#40]", "[#39]", "[#37,#38]", "[#36]", "[#35]", "[#34]", "[#33]", "[#32]", "[#31]", "[#30]", "[#29]", "[#28]", "[#27]", "[#26]", "[#25]", "[#24]", "[#23]", "[#22]", "[#21]", "[#16;R]", "[#8;R]", "[#7;R]", "[#16]", "[#15]", "[#14]", "[#6]~[#16]", "[#7]~[#6]~[#7]", "[#7]~[#7]", "[#8]~[#8]", "[#8]~[#15]", "[#16]~[#8]", "[#6]=[#16]", "[#16]=[#7]", "[#6]=[#7]", "[#7]~[#6]=[#8]", "[#8]~[#6]=[#8]", "[#6]=[#6]", "[#6]#[#7]", "[#6]#[#6]", "[#6]~[#15]", "[#6]~[#8]~[#6]", "[#6]~[#7]~[#6]", "[#6]~[#16]~[#6]", "[#8]~[#6]~[#8]", "[#7]~[#6]~[#8]", "[#7]~[#6]~[#16]", "[#6]=[#6]~[#6]", "[#6]=[#6]~[#7]", "[#6]=[#6]~[#8]", "[#6]=[#6]~[#16]", "[#6]=[#6]~[#6]=[#6]", "[#6]=[#7]~[#6]=[#8]", "[#6]=[#7]~[#6]=[#7]", "[#6]=[#8]~[#7]~[#6]=[#8]", "[#6]~[#6]~[#8]~[#6]=[#8]", "[#6]~[#6]~[#7]~[#6]=[#8]", "[#6]~[#8]~[#6]=[#8]", "[#7]~[#6](=[#8])~[#7]", "[#6]=[#8]", "[#6]~[#7](~[#6])~[#6]", "[#8]~[#6]~[#7]", "[!#1;!#6]~[#6]=[#8]", "[#6]=[#8]~[#8]", "[#7]=[#8]", "[#7;R]~[#6;R]=[#7;R]", "[#6]~[#8]~[#8]~[#6]", "[#16]=[#8]", "[!#6;!#1]~[!#6;!#1]", "[!#6;!#1;!#7;!#8;!#16;!#15;!#9;!#17;!#35;!#53]", "[#7]~[#6]~[#7]~[#6]~[#8]", "[#7]~[#6]~[#16]", "[#7]~[#7]~[#6]", "[#7]~[#6]=[#6]~[#7]", "[#6]=[#7]~[#7]=[#6]", "[#8]~[#16](=[#8])=[#8]", "[#16]~[#6]~[#16]", "[!#1;!#6]~[!#1;!#6]~[!#1;!#6]", "[#6]~[#16]~[#8]~[#6]", "[#6]~[#7]~[#8]", "[#7]~[#7]~[#7]", "[#6]~[#7]~[#7]~[#7]", "[#8;!R]~[#6;R]", "[#7;R]~[#6;!R]=[#8]", "[#6]~[#8]~[#6]~[#8]", "[#7]~[#6](~[#8])~[#7]", "[!#1;!#6]~[!#1;!#6]~[!#1;!#6]~[!#1;!#6]", "[#6]~[#7;R]~[#6]~[#7;R]", "[#6]~[#6]~[#8]~[#6]~[#6]", "[#7]~[#7]~[#6]=[#8]", "[#6]~[#6]~[#7]~[#7]", "[#7;R]~[#6;R]~[#7;R]~[#6;R]", "[#6]~[#8]~[#6]~[#6]", "[#16;R]~[#6;R]~[#7;R]", "[#16;R]~[#6;R]~[#8;R]", "[#16;R]~[#6;R]=[#7;R]", "[#7;R]~[#6;R]=[#7;R]", "[#7;R]~[#6;R]=[#8;R]", "[#8;R]~[#6;R]=[#8;R]", "[#8;R]~[#6;R]~[#7;R]", "[#8;R]~[#6;R]~[#8;R]", "[#8;R]~[#6;R]~[#6;R]", "[#7;R]~[#6;R]~[#6;R]", "[#6;R]~[#6;R]~[#6;R]~[#6;R]~[#6;R]~[#6;R]", "[a]~[a]~[a]~[a]~[a]~[a]", "[a]", "[!#6;a]", "[!#6;!#1]", "[#6;R]", "[R]", ];
pub fn maccs(mol: &Molecule) -> BitVec2048 {
let mut fp = BitVec2048::new();
for (i, &pattern) in MACCS_SMARTS.iter().enumerate() {
if pattern.is_empty() {
continue;
}
if let Ok(query) = parse_smarts(pattern) {
if !find_matches(&query, mol).is_empty() {
fp.set(i);
}
}
}
fp
}
#[cfg(test)]
mod tests {
use super::*;
use chematic_smiles::parse;
#[test]
fn maccs_benzene_nonzero() {
let mol = parse("c1ccccc1").unwrap();
let fp = maccs(&mol);
assert!(fp.popcount() > 0, "benzene maccs should be nonzero");
}
#[test]
fn maccs_ethanol_nonzero() {
let mol = parse("CCO").unwrap();
let fp = maccs(&mol);
assert!(fp.popcount() > 0, "ethanol maccs should be nonzero");
}
#[test]
fn maccs_benzene_has_aromatic_bit() {
let mol = parse("c1ccccc1").unwrap();
let fp = maccs(&mol);
assert!(fp.get(161), "benzene should have aromatic bit (key 162, index 161) set");
}
#[test]
fn maccs_deterministic() {
let mol = parse("c1ccccc1").unwrap();
assert_eq!(maccs(&mol), maccs(&mol), "maccs must be deterministic");
}
#[test]
fn maccs_aspirin_has_carbonyl_bit() {
let mol = parse("CC(=O)Oc1ccccc1C(=O)O").unwrap();
let fp = maccs(&mol);
assert!(fp.get(115), "aspirin should have C=O bit (key 116, index 115) set");
}
#[test]
fn maccs_acetonitrile_has_triple_bond_bit() {
let mol = parse("CC#N").unwrap();
let fp = maccs(&mol);
assert!(fp.get(94), "acetonitrile should have C#N bit (key 95, index 94) set");
}
#[test]
fn maccs_bromobenzene_has_bromine_bit() {
let mol = parse("c1ccccc1Br").unwrap();
let fp = maccs(&mol);
assert!(fp.get(61), "bromobenzene should have Br bit (key 62, index 61) set");
}
}