use chematic_core::{Molecule, AtomIdx};
use chematic_smiles::canonical::canonical_atom_order;
use std::collections::{HashMap, HashSet};
pub fn connectivity_layer(mol: &Molecule) -> Option<String> {
let canonical_order = canonical_atom_order(mol);
let mut inchi_index: HashMap<AtomIdx, usize> = HashMap::new();
let mut inchi_num = 0;
for &canon_idx in &canonical_order {
let atom_idx = AtomIdx(canon_idx as u32);
let atom = mol.atom(atom_idx);
if atom.element.atomic_number() != 1 {
inchi_num += 1;
inchi_index.insert(atom_idx, inchi_num);
}
}
if inchi_num == 0 {
return None;
}
if inchi_num == 1 {
return Some("1".to_string());
}
let first_atom = canonical_order
.iter()
.find_map(|&idx| {
let atom_idx = AtomIdx(idx as u32);
if inchi_index.contains_key(&atom_idx) {
Some(atom_idx)
} else {
None
}
})
.expect("at least one heavy atom");
let mut visited = HashSet::new();
let mut tree_edges = HashSet::new();
let mut result = String::new();
dfs_connection(&first_atom, None, mol, &inchi_index, &mut visited, &mut result, &mut tree_edges);
let mut ring_closures = Vec::new();
for (_bond_idx, bond) in mol.bonds() {
let atom1 = bond.atom1;
let atom2 = bond.atom2;
if !inchi_index.contains_key(&atom1) || !inchi_index.contains_key(&atom2) {
continue; }
let normalized_edge = if atom1 < atom2 { (atom1, atom2) } else { (atom2, atom1) };
if !tree_edges.contains(&normalized_edge) {
let i1 = inchi_index[&atom1];
let i2 = inchi_index[&atom2];
let (lo, hi) = if i1 < i2 { (i1, i2) } else { (i2, i1) };
ring_closures.push((lo, hi));
}
}
ring_closures.sort();
for (lo, _hi) in ring_closures {
result.push('-');
result.push_str(&lo.to_string());
}
Some(result)
}
fn dfs_connection(
atom: &AtomIdx,
parent: Option<AtomIdx>,
mol: &Molecule,
inchi_index: &HashMap<AtomIdx, usize>,
visited: &mut HashSet<AtomIdx>,
result: &mut String,
tree_edges: &mut HashSet<(AtomIdx, AtomIdx)>,
) {
if visited.contains(atom) {
return;
}
visited.insert(*atom);
let my_index = inchi_index[atom];
if result.is_empty() {
result.push_str(&my_index.to_string());
} else if let Some(_parent_idx) = parent {
result.push('-');
result.push_str(&my_index.to_string());
}
let mut neighbors: Vec<AtomIdx> = mol
.neighbors(*atom)
.filter_map(|(n_idx, _bond_idx)| {
let n_atom = mol.atom(n_idx);
if n_atom.element.atomic_number() != 1 && inchi_index.contains_key(&n_idx) {
Some(n_idx)
} else {
None
}
})
.collect();
neighbors.sort_by_key(|n| inchi_index[n]);
let mut first = true;
for &neighbor in &neighbors {
if !visited.contains(&neighbor) && parent != Some(neighbor) {
let normalized_edge = if *atom < neighbor { (*atom, neighbor) } else { (neighbor, *atom) };
tree_edges.insert(normalized_edge);
if first {
first = false;
dfs_connection(&neighbor, Some(*atom), mol, inchi_index, visited, result, tree_edges);
} else {
let mut branch = String::new();
dfs_connection(&neighbor, Some(*atom), mol, inchi_index, visited, &mut branch, tree_edges);
if !branch.is_empty() {
result.push('(');
result.push_str(&branch);
result.push(')');
result.push(',');
}
}
}
}
if result.ends_with(',') {
result.pop();
}
}
#[cfg(test)]
mod tests {
use super::*;
use chematic_smiles::parse;
#[test]
fn test_connectivity_methane() {
let mol = parse("C").expect("methane");
let c_layer = connectivity_layer(&mol);
assert_eq!(c_layer, Some("1".to_string()));
}
#[test]
fn test_connectivity_ethane() {
let mol = parse("CC").expect("ethane");
let c_layer = connectivity_layer(&mol);
assert_eq!(c_layer, Some("1-2".to_string()));
}
#[test]
fn test_connectivity_benzene() {
let mol = parse("c1ccccc1").expect("benzene");
let c_layer = connectivity_layer(&mol);
assert!(c_layer.is_some());
let c_str = c_layer.unwrap();
assert_eq!(c_str, "1-2-3-4-5-6-1", "Benzene should have ring closure bond");
}
#[test]
fn test_connectivity_branched_propane() {
let mol = parse("CCC").expect("propane");
let c_layer = connectivity_layer(&mol);
assert!(c_layer.is_some());
let c_str = c_layer.unwrap();
assert!(!c_str.ends_with(','), "Linear propane should not end with comma");
}
#[test]
fn test_connectivity_branched_isobutane() {
let mol = parse("CC(C)C").expect("isobutane");
let c_layer = connectivity_layer(&mol);
assert!(c_layer.is_some());
let c_str = c_layer.unwrap();
assert!(c_str.contains('('), "Should have parentheses for branches");
assert!(c_str.contains(','), "Should have cursor resets (commas) between branches");
}
#[test]
fn test_connectivity_neopentane() {
let mol = parse("CC(C)(C)C").expect("neopentane");
let c_layer = connectivity_layer(&mol);
assert!(c_layer.is_some());
let c_str = c_layer.unwrap();
assert!(c_str.contains(','), "Multiple branches should have cursor resets");
}
#[test]
fn test_connectivity_cursor_reset_position() {
let mol = parse("CC(C)C").expect("isobutane");
let c_layer = connectivity_layer(&mol);
assert!(c_layer.is_some());
let c_str = c_layer.unwrap();
assert!(!c_str.contains(",,"), "Should not have double commas");
assert!(!c_str.ends_with(','), "Should not end with trailing comma");
}
#[test]
fn test_connectivity_multi_level_branches() {
let mol = parse("CC(CC)C").expect("branched");
let c_layer = connectivity_layer(&mol);
assert!(c_layer.is_some());
let c_str = c_layer.unwrap();
assert!(c_str.contains('('), "Should have parentheses for branches");
}
#[test]
fn test_connectivity_toluene() {
let mol = parse("Cc1ccccc1").expect("toluene");
let c_layer = connectivity_layer(&mol);
assert!(c_layer.is_some());
}
#[test]
fn test_connectivity_dimethylbenzene() {
let mol = parse("Cc1ccccc1C").expect("xylene");
let c_layer = connectivity_layer(&mol);
assert!(c_layer.is_some());
}
#[test]
fn test_connectivity_no_false_commas() {
let mol = parse("CCCC").expect("butane");
let c_layer = connectivity_layer(&mol);
assert!(c_layer.is_some());
let c_str = c_layer.unwrap();
assert!(!c_str.ends_with(','), "Linear molecules should not end with trailing comma");
}
#[test]
fn test_connectivity_cursor_reset_inchi_standard() {
let mol = parse("CC(C)C").expect("isobutane");
let c_layer = connectivity_layer(&mol);
assert!(c_layer.is_some());
let c_str = c_layer.unwrap();
let paren_count_open = c_str.matches('(').count();
let paren_count_close = c_str.matches(')').count();
assert_eq!(paren_count_open, paren_count_close, "Parentheses should be balanced");
}
#[test]
fn test_connectivity_pentane_isomers() {
let lin = parse("CCCCC").expect("n-pentane");
let lin_c = connectivity_layer(&lin).unwrap();
assert!(!lin_c.contains(','), "Linear pentane should have no commas");
let iso = parse("CC(C)CC").expect("isopentane");
let iso_c = connectivity_layer(&iso).unwrap();
assert!(iso_c.contains('('), "Branched pentane should have parentheses");
}
#[test]
fn test_connectivity_single_branch_no_trailing_comma() {
let mol = parse("CC(C)C").expect("isobutane");
let c_layer = connectivity_layer(&mol);
assert!(c_layer.is_some());
let c_str = c_layer.unwrap();
assert!(!c_str.ends_with(','), "Should not end with comma after single branch");
assert!(!c_str.ends_with("),"), "Should not have comma after last closing paren");
}
}