use crate::bond::{BondOrder, BondStereo};
use crate::descriptors::aromaticity::{all_aromatic_atoms, all_aromatic_bonds};
use crate::descriptors::chirality::all_chiralities;
use crate::descriptors::conjugation::all_conjugated_bonds;
use crate::descriptors::elements::get_element;
use crate::descriptors::hybridization::all_hybridizations;
use crate::descriptors::topological::ring_atoms;
use crate::descriptors::valence;
use crate::graph::AdjacencyList;
use crate::molecule::Molecule;
#[derive(Debug, Clone)]
pub struct OgbAtomFeatures {
pub num_atoms: usize,
pub features: Vec<Vec<i32>>,
}
#[derive(Debug, Clone)]
pub struct OgbBondFeatures {
pub num_bonds: usize,
pub features: Vec<Vec<i32>>,
}
#[derive(Debug, Clone)]
pub struct OgbGraphFeatures {
pub atom_features: OgbAtomFeatures,
pub bond_features: OgbBondFeatures,
pub edge_src: Vec<usize>,
pub edge_dst: Vec<usize>,
}
pub fn ogb_atom_features(mol: &Molecule) -> OgbAtomFeatures {
let n = mol.atom_count();
let adj = AdjacencyList::from_molecule(mol);
let hybridizations = all_hybridizations(mol);
let aromatic_atoms = all_aromatic_atoms(mol);
let in_ring = ring_atoms(mol);
let chiralities = all_chiralities(mol);
let mut features = Vec::with_capacity(n);
for i in 0..n {
let atom = &mol.atoms[i];
let mut feat = vec![0i32; 9];
feat[0] = get_element(&atom.element)
.map(|e| e.atomic_number as i32)
.unwrap_or(0);
feat[1] = chiralities[i].to_ogb_index() as i32;
feat[2] = adj.degree(i) as i32;
feat[3] = (atom.formal_charge as i32) + 5;
feat[4] = valence::implicit_hydrogen_count(mol, i) as i32;
feat[5] = atom.radical.unwrap_or(0) as i32;
feat[6] = hybridizations[i].to_ogb_index() as i32;
feat[7] = if aromatic_atoms[i] { 1 } else { 0 };
feat[8] = if in_ring[i] { 1 } else { 0 };
features.push(feat);
}
OgbAtomFeatures {
num_atoms: n,
features,
}
}
pub fn ogb_bond_features(mol: &Molecule) -> OgbBondFeatures {
let m = mol.bond_count();
let conjugated = all_conjugated_bonds(mol);
let aromatic_bonds = all_aromatic_bonds(mol);
let mut features = Vec::with_capacity(m);
for (i, bond) in mol.bonds.iter().enumerate() {
let mut feat = vec![0i32; 3];
feat[0] = if aromatic_bonds[i] {
3 } else {
match bond.order {
BondOrder::Single => 0,
BondOrder::Double => 1,
BondOrder::Triple => 2,
BondOrder::Aromatic => 3,
BondOrder::SingleOrDouble => 0,
BondOrder::SingleOrAromatic => 0,
BondOrder::DoubleOrAromatic => 1,
BondOrder::Any => 0,
BondOrder::Coordination => 0,
BondOrder::Hydrogen => 0,
}
};
feat[1] = if bond.order == BondOrder::Double {
match bond.stereo {
BondStereo::None => 0,
BondStereo::Up => 1, BondStereo::Either => 2,
BondStereo::Down => 3, }
} else {
0 };
feat[2] = if conjugated[i] { 1 } else { 0 };
features.push(feat);
}
OgbBondFeatures {
num_bonds: m,
features,
}
}
pub fn ogb_graph_features(mol: &Molecule) -> OgbGraphFeatures {
let atom_features = ogb_atom_features(mol);
let bond_feats = ogb_bond_features(mol);
let mut edge_src = Vec::with_capacity(mol.bond_count() * 2);
let mut edge_dst = Vec::with_capacity(mol.bond_count() * 2);
let mut directed_bond_features = Vec::with_capacity(mol.bond_count() * 2);
for (i, bond) in mol.bonds.iter().enumerate() {
edge_src.push(bond.atom1);
edge_dst.push(bond.atom2);
directed_bond_features.push(bond_feats.features[i].clone());
edge_src.push(bond.atom2);
edge_dst.push(bond.atom1);
directed_bond_features.push(bond_feats.features[i].clone());
}
OgbGraphFeatures {
atom_features,
bond_features: OgbBondFeatures {
num_bonds: directed_bond_features.len(),
features: directed_bond_features,
},
edge_src,
edge_dst,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::atom::Atom;
use crate::bond::{Bond, BondOrder};
fn make_water() -> Molecule {
let mut mol = Molecule::new("water");
mol.atoms.push(Atom::new(0, "O", 0.0, 0.0, 0.0));
mol.atoms.push(Atom::new(1, "H", 0.96, 0.0, 0.0));
mol.atoms.push(Atom::new(2, "H", -0.24, 0.93, 0.0));
mol.bonds.push(Bond::new(0, 1, BondOrder::Single));
mol.bonds.push(Bond::new(0, 2, BondOrder::Single));
mol
}
fn make_benzene() -> Molecule {
let mut mol = Molecule::new("benzene");
for i in 0..6 {
mol.atoms.push(Atom::new(i, "C", 0.0, 0.0, 0.0));
}
for i in 0..6 {
mol.bonds
.push(Bond::new(i, (i + 1) % 6, BondOrder::Aromatic));
}
mol
}
#[test]
fn test_atom_features_water() {
let mol = make_water();
let feats = ogb_atom_features(&mol);
assert_eq!(feats.num_atoms, 3);
assert_eq!(feats.features.len(), 3);
assert_eq!(feats.features[0][0], 8); assert_eq!(feats.features[0][2], 2);
assert_eq!(feats.features[1][0], 1); assert_eq!(feats.features[1][2], 1); }
#[test]
fn test_atom_features_9_elements() {
let mol = make_water();
let feats = ogb_atom_features(&mol);
for feat in &feats.features {
assert_eq!(feat.len(), 9);
}
}
#[test]
fn test_bond_features_water() {
let mol = make_water();
let feats = ogb_bond_features(&mol);
assert_eq!(feats.num_bonds, 2);
assert_eq!(feats.features.len(), 2);
for feat in &feats.features {
assert_eq!(feat[0], 0); assert_eq!(feat.len(), 3);
}
}
#[test]
fn test_bond_features_benzene() {
let mol = make_benzene();
let feats = ogb_bond_features(&mol);
for feat in &feats.features {
assert_eq!(feat[0], 3); assert_eq!(feat[2], 1); }
}
#[test]
fn test_atom_features_aromatic() {
let mol = make_benzene();
let feats = ogb_atom_features(&mol);
for feat in &feats.features {
assert_eq!(feat[7], 1); assert_eq!(feat[8], 1); }
}
#[test]
fn test_graph_features() {
let mol = make_water();
let graph = ogb_graph_features(&mol);
assert_eq!(graph.atom_features.num_atoms, 3);
assert_eq!(graph.edge_src.len(), 4);
assert_eq!(graph.edge_dst.len(), 4);
assert_eq!(graph.bond_features.num_bonds, 4);
}
#[test]
fn test_formal_charge_encoding() {
let mut mol = Molecule::new("test");
let mut atom = Atom::new(0, "N", 0.0, 0.0, 0.0);
atom.formal_charge = 1;
mol.atoms.push(atom);
let feats = ogb_atom_features(&mol);
assert_eq!(feats.features[0][3], 6); }
#[test]
fn test_empty_molecule() {
let mol = Molecule::new("empty");
let atom_feats = ogb_atom_features(&mol);
assert_eq!(atom_feats.num_atoms, 0);
assert!(atom_feats.features.is_empty());
let bond_feats = ogb_bond_features(&mol);
assert_eq!(bond_feats.num_bonds, 0);
assert!(bond_feats.features.is_empty());
}
#[test]
fn test_hybridization_in_features() {
let mut mol = Molecule::new("test");
mol.atoms.push(Atom::new(0, "C", 0.0, 0.0, 0.0));
mol.atoms.push(Atom::new(1, "C", 1.2, 0.0, 0.0));
mol.bonds.push(Bond::new(0, 1, BondOrder::Triple));
let feats = ogb_atom_features(&mol);
assert_eq!(feats.features[0][6], 1); }
}