#![warn(missing_docs)]
#![doc = include_str!("../README.md")]
use std::{
collections::{HashMap, HashSet},
fmt,
};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Error {
AtomIdx {
provided: u16,
bond_idx: usize,
atom: u8,
max: usize,
},
BondMult {
provided: u16,
bond_idx: usize,
},
AtomicNum {
provided: u8,
atom_idx: usize,
},
ParallelBonds {
bond1: usize,
bond2: usize,
},
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::AtomIdx {
provided,
bond_idx,
atom,
max,
} => {
write!(f, "bond {} contained atomic index {} for coincident atom {}, but only {} atoms exist", bond_idx, provided, atom, max)
}
Self::BondMult { provided, bond_idx } => {
write!(
f,
"bond {} contained multiplicity {}, but it must be 1, 2, or 3",
bond_idx, provided
)
}
Self::AtomicNum { provided, atom_idx } => {
write!(
f,
"atom {} had an atomic number {}, no such atom currently exists",
atom_idx, provided
)
}
Self::ParallelBonds { bond1, bond2 } => {
write!(
f,
"bond {} and bond {} connect the same two atoms",
bond1, bond2
)
}
}
}
}
impl std::error::Error for Error {}
extern {
fn get_coordinates(
n_atoms: usize,
atoms: *const u8,
n_bonds: usize,
bonds: *const u16,
coords: *mut f32,
);
}
pub unsafe fn gen_coords_unchecked(atoms: &[u8], bonds: &[[u16; 3]]) -> Vec<(f32, f32)> {
let n_atoms = atoms.len();
let n_bonds = bonds.len();
debug_assert!(bonds
.iter()
.all(|[atom1, atom2, _]| (*atom1 as usize) < n_atoms && (*atom2 as usize) < n_atoms));
debug_assert_eq!(
bonds
.iter()
.map(|[atom1, atom2, _]| {
let mut atoms = [*atom1, *atom2];
atoms.sort_unstable();
atoms
})
.collect::<HashSet<[u16; 2]>>()
.len(),
bonds.len()
);
debug_assert!(bonds.iter().all(|[_, _, mult]| (1..=3).contains(mult)));
debug_assert!(atoms
.iter()
.all(|atomic_num| (1..=118).contains(atomic_num)));
let atoms = atoms.as_ptr();
let bonds = bonds.iter().copied().flatten().collect::<Vec<u16>>();
let bonds = bonds.as_ptr();
let mut raw_coords = Vec::<f32>::with_capacity(2 * n_atoms);
get_coordinates(n_atoms, atoms, n_bonds, bonds, raw_coords.as_mut_ptr());
raw_coords.set_len(2 * n_atoms);
let mut coords = Vec::with_capacity(n_atoms);
for idx in 0..n_atoms {
let coord = (raw_coords[2 * idx], raw_coords[2 * idx + 1]);
coords.push(coord);
}
coords
}
pub fn gen_coords(atoms: &[u8], bonds: &[[u16; 3]]) -> Result<Vec<(f32, f32)>, Error> {
let mut seen_bonds = HashMap::<[u16; 2], usize>::new();
for (bond_idx, [atom1, atom2, mult]) in bonds.iter().enumerate() {
let mut bond = [*atom1, *atom2];
bond.sort_unstable();
if let Some(bond1) = seen_bonds.insert(bond, bond_idx) {
return Err(Error::ParallelBonds {
bond1,
bond2: bond_idx,
});
}
let max = atoms.len();
let mut invalid_atom = false;
let mut provided = *atom1;
let mut atom = 0;
if *atom1 as usize >= max {
invalid_atom = true;
}
else if *atom2 as usize >= max {
invalid_atom = true;
provided = *atom2;
atom = 1;
}
else if !(1..=3).contains(mult) {
return Err(Error::BondMult {
provided: *mult,
bond_idx,
});
}
if invalid_atom {
return Err(Error::AtomIdx {
provided,
bond_idx,
atom,
max,
});
}
}
for (atom_idx, &provided) in atoms.iter().enumerate() {
if !(1..=118).contains(&provided) {
return Err(Error::AtomicNum { provided, atom_idx });
}
}
Ok(unsafe { gen_coords_unchecked(atoms, bonds) })
}
#[cfg(test)]
mod tests {
use std::time::Duration;
use super::{gen_coords_unchecked, get_coordinates};
use serde::Deserialize;
#[derive(Deserialize)]
struct PubchemAtoms {
element: Vec<u8>,
}
#[derive(Deserialize)]
struct PubchemBonds {
aid1: Vec<u16>,
aid2: Vec<u16>,
order: Vec<u16>,
}
#[derive(Deserialize)]
struct PubchemRecord {
atoms: PubchemAtoms,
bonds: PubchemBonds,
}
#[allow(non_snake_case)]
#[derive(Deserialize)]
struct PubchemResponse {
PC_Compounds: Vec<PubchemRecord>,
}
#[test]
fn raw_ffi() {
let atoms = vec![7u8, 6];
let atoms: *const u8 = atoms.as_ptr();
let bonds = vec![0u16, 1, 1];
let bonds: *const u16 = bonds.as_ptr();
let mut coords = Vec::with_capacity(4);
let coords_ptr = coords.as_mut_ptr();
unsafe {
get_coordinates(2usize, atoms, 1usize, bonds, coords_ptr);
coords.set_len(4);
};
assert_eq!(coords, vec![-50.0, 0.0, 0.0, 0.0]);
}
fn pubchem_consistency(cid: usize) {
let compound = reqwest::blocking::get(format!(
"http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{}/JSON",
cid
))
.unwrap()
.json::<PubchemResponse>()
.unwrap();
let atoms = &compound.PC_Compounds[0].atoms.element;
let bonds = compound.PC_Compounds[0]
.bonds
.aid1
.iter()
.zip(compound.PC_Compounds[0].bonds.aid2.iter())
.zip(compound.PC_Compounds[0].bonds.order.iter())
.map(|((atom1, atom2), order)| [*atom1 - 1, *atom2 - 1, *order])
.collect::<Vec<[u16; 3]>>();
let mut c_coords: Vec<f32> = Vec::with_capacity(2 * atoms.len());
let flat_bonds = bonds.clone().into_iter().flatten().collect::<Vec<u16>>();
let rust_coords = unsafe { gen_coords_unchecked(&atoms, &bonds) };
unsafe {
get_coordinates(
atoms.len(),
atoms.as_ptr(),
bonds.len(),
flat_bonds.as_ptr(),
c_coords.as_mut_ptr(),
);
c_coords.set_len(2 * atoms.len());
}
for idx in 0..atoms.len() {
assert_eq!(c_coords[2 * idx], rust_coords[idx].0);
assert_eq!(c_coords[2 * idx + 1], rust_coords[idx].1);
}
std::thread::sleep(Duration::from_secs(1));
}
#[test]
fn consistency() {
pubchem_consistency(2244);
pubchem_consistency(234);
pubchem_consistency(1000);
pubchem_consistency(234578);
pubchem_consistency(992342);
}
}