use crate::{
AdjacencyList, Atom, AtomId, Bond, BondOrder, BondStereo, ChiralTag, Hybridization, Molecule,
RingInfo, ValenceAssignment, ValenceModel, assign_valence,
};
#[derive(Clone)]
pub struct QueryMatchContext {
adj: AdjacencyList,
ring_info: Option<RingInfo>,
valence: Option<ValenceAssignment>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum QueryNode<T> {
Predicate(T),
And(Vec<QueryNode<T>>),
Or(Vec<QueryNode<T>>),
Not(Box<QueryNode<T>>),
}
impl<T> QueryNode<T> {
#[must_use]
pub fn predicate(predicate: T) -> Self {
Self::Predicate(predicate)
}
#[must_use]
pub fn and(children: Vec<QueryNode<T>>) -> Self {
Self::And(children)
}
#[must_use]
pub fn or(children: Vec<QueryNode<T>>) -> Self {
Self::Or(children)
}
#[must_use]
pub fn not(child: QueryNode<T>) -> Self {
Self::Not(Box::new(child))
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum AtomQueryPredicate {
Any,
AtomicNumber(u8),
AtomType {
atomic_number: u8,
aromatic: bool,
},
AtomicNumberIn(Vec<u8>),
AtomicNumberNotIn(Vec<u8>),
FormalCharge(i8),
Isotope(u16),
HydrogenCount(u8),
HasImplicitHydrogen,
ImplicitHydrogenCount(u8),
ImplicitHydrogenCountLessEqual(u8),
ExplicitDegree(u8),
ExplicitDegreeLessEqual(u8),
NonHydrogenDegree(u32),
RingBondCount(u8),
RingBondCountLessEqual(u8),
RingBondCountNeedsScan,
IsAromatic(bool),
IsUnsaturated,
RecursiveSmarts(String),
RGroupLabel(u32),
MolFileAlias(String),
HybridizationMatch(Hybridization),
TotalDegree(u8),
TotalDegreeLessEqual(u8),
TotalDegreeGreaterEqual(u8),
Connectivity(u8),
ConnectivityLessEqual(u8),
ConnectivityGreaterEqual(u8),
InRing,
InRingOfSize(u8),
SmallestRingSize(u8),
SmallestRingSizeLessEqual(u8),
SmallestRingSizeGreaterEqual(u8),
Mass(u16),
ChiralTagMatch(ChiralTag),
AtomMapNumber(u32),
SubstitutionCount(u8),
SubstitutionCountLessEqual(u8),
SubstitutionCountGreaterEqual(u8),
Degree(u8),
DegreeLessEqual(u8),
DegreeGreaterEqual(u8),
NumRingBonds(u8),
NumRingBondsGreaterEqual(u8),
NumRingBondsLessEqual(u8),
UnsupportedFeature(&'static str),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BondQueryPredicate {
Any,
Order(BondOrder),
OrderIn(Vec<BondOrder>),
IsAromatic(bool),
IsInRing(bool),
Direction(crate::BondDirection),
Stereo(BondStereo),
IsConjugated,
NumRingBonds(u8),
NumRingBondsGreaterEqual(u8),
NumRingBondsLessEqual(u8),
MolFileQueryCode(u32),
UnsupportedFeature(&'static str),
}
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
pub enum SmartsParseError {
#[error("unclosed bracket at position {0}")]
UnclosedBracket(usize),
#[error("unexpected character '{character}' at position {position}: {context}")]
UnexpectedCharacter {
position: usize,
character: char,
context: String,
},
#[error("unexpected end of SMARTS: {0}")]
UnexpectedEnd(String),
#[error("invalid atom primitive at position {position}: {detail}")]
InvalidAtomPrimitive { position: usize, detail: String },
#[error("unclosed parenthesis at position {0}")]
UnclosedParenthesis(usize),
#[error("unbalanced ring closure number: {0}")]
UnbalancedRingClosure(u8),
}
fn ensure_adjacency(mol: &Molecule) -> AdjacencyList {
mol.topology_block().adjacency.clone()
}
fn ensure_ring_info(mol: &Molecule) -> Option<RingInfo> {
if let Some(cached) = &mol.derived_cache().rings {
return Some(cached.clone());
}
if let Some(cached) = &mol.derived_cache().ring_families {
return Some(cached.clone());
}
crate::rings::find_sssr(mol).ok()
}
fn ensure_valence_assignment(mol: &Molecule) -> Option<ValenceAssignment> {
if let Some(cached) = &mol.derived_cache().valence {
return Some(cached.clone());
}
assign_valence(mol, ValenceModel::RdkitLike).ok()
}
#[must_use]
pub fn build_query_match_context(mol: &Molecule) -> QueryMatchContext {
QueryMatchContext {
adj: ensure_adjacency(mol),
ring_info: ensure_ring_info(mol),
valence: ensure_valence_assignment(mol),
}
}
fn implicit_hydrogen_count(valence: Option<&ValenceAssignment>, atom: &Atom) -> Option<u8> {
valence.and_then(|assignment| {
assignment
.implicit_hydrogens
.get(atom.id().index())
.copied()
.map(|count| count.max(0) as u8)
})
}
fn total_hydrogen_count(valence: Option<&ValenceAssignment>, atom: &Atom) -> Option<u8> {
implicit_hydrogen_count(valence, atom)
.map(|implicit| atom.explicit_hydrogens().saturating_add(implicit))
}
fn total_hydrogen_count_excluding_neighbors(
valence: Option<&ValenceAssignment>,
atom: &Atom,
) -> Option<u8> {
total_hydrogen_count(valence, atom)
}
fn total_hydrogen_count_including_neighbors(
adj: &AdjacencyList,
valence: Option<&ValenceAssignment>,
atom: &Atom,
mol: &Molecule,
) -> Option<u8> {
let mut total = total_hydrogen_count(valence, atom)?;
for neighbor in adj.neighbors_of(atom.id().index()) {
let neighbor_atom = &mol.atoms()[neighbor.atom_index];
if neighbor_atom.atomic_number() == 1 {
total = total.saturating_add(1);
}
}
Some(total)
}
fn total_degree_with_hydrogens(
adj: &AdjacencyList,
valence: Option<&ValenceAssignment>,
atom: &Atom,
) -> Option<u8> {
let degree = u8::try_from(adj.neighbors_of(atom.id().index()).len()).ok()?;
total_hydrogen_count(valence, atom).map(|total_hs| degree.saturating_add(total_hs))
}
pub fn atom_predicate_matches(atom: &Atom, pred: &AtomQueryPredicate, mol: &Molecule) -> bool {
let ctx = build_query_match_context(mol);
atom_predicate_matches_with_context(atom, pred, mol, &ctx)
}
pub fn atom_predicate_matches_with_context(
atom: &Atom,
pred: &AtomQueryPredicate,
mol: &Molecule,
ctx: &QueryMatchContext,
) -> bool {
let aidx = atom.id().index();
let adj = &ctx.adj;
let ring_info = &ctx.ring_info;
let valence = &ctx.valence;
match pred {
AtomQueryPredicate::Any => true,
AtomQueryPredicate::AtomicNumber(n) => atom.atomic_number() == *n,
AtomQueryPredicate::AtomType {
atomic_number,
aromatic,
} => atom.atomic_number() == *atomic_number && atom.is_aromatic() == *aromatic,
AtomQueryPredicate::AtomicNumberIn(vals) => vals.contains(&atom.atomic_number()),
AtomQueryPredicate::AtomicNumberNotIn(vals) => !vals.contains(&atom.atomic_number()),
AtomQueryPredicate::FormalCharge(c) => atom.formal_charge() == *c,
AtomQueryPredicate::Isotope(i) => atom.isotope() == Some(*i),
AtomQueryPredicate::HydrogenCount(n) => {
total_hydrogen_count_including_neighbors(adj, valence.as_ref(), atom, mol) == Some(*n)
}
AtomQueryPredicate::HasImplicitHydrogen => {
implicit_hydrogen_count(valence.as_ref(), atom).is_some_and(|count| count > 0)
}
AtomQueryPredicate::ImplicitHydrogenCount(n) => {
total_hydrogen_count_excluding_neighbors(valence.as_ref(), atom) == Some(*n)
}
AtomQueryPredicate::ImplicitHydrogenCountLessEqual(n) => {
total_hydrogen_count_excluding_neighbors(valence.as_ref(), atom)
.is_some_and(|count| count <= *n)
}
AtomQueryPredicate::ExplicitDegree(n) => {
let neighbors = adj.neighbors_of(aidx);
neighbors.len() as u8 == *n
}
AtomQueryPredicate::ExplicitDegreeLessEqual(n) => {
let neighbors = adj.neighbors_of(aidx);
neighbors.len() as u8 <= *n
}
AtomQueryPredicate::NonHydrogenDegree(n) => {
u32::from(count_non_hydrogen_neighbors(&adj, mol, aidx)) == *n
}
AtomQueryPredicate::RingBondCount(n) => {
if let Some(ri) = &ring_info {
count_ring_bonds(adj, mol, ri, aidx) == *n
} else {
false
}
}
AtomQueryPredicate::RingBondCountLessEqual(n) => {
if let Some(ri) = &ring_info {
count_ring_bonds(adj, mol, ri, aidx) <= *n
} else {
false
}
}
AtomQueryPredicate::RingBondCountNeedsScan => {
if let Some(ri) = &ring_info {
count_ring_bonds(adj, mol, ri, aidx) > 0
} else {
false
}
}
AtomQueryPredicate::IsAromatic(desired) => atom.is_aromatic() == *desired,
AtomQueryPredicate::IsUnsaturated => {
let total_degree = adj.neighbors_of(aidx).len();
let total_valence = match atom.hybridization() {
Hybridization::S => 1,
Hybridization::Sp => 2,
Hybridization::Sp2 => 3,
Hybridization::Sp3 => 4,
Hybridization::Sp2d => 5,
Hybridization::Sp3d => 5,
Hybridization::Sp3d2 => 6,
Hybridization::Other => 4,
Hybridization::Unspecified => 4,
};
(total_degree as u8) < total_valence
}
AtomQueryPredicate::HybridizationMatch(h) => atom.hybridization() == *h,
AtomQueryPredicate::TotalDegree(n) => {
total_degree_with_hydrogens(adj, valence.as_ref(), atom) == Some(*n)
}
AtomQueryPredicate::TotalDegreeLessEqual(n) => {
total_degree_with_hydrogens(adj, valence.as_ref(), atom)
.is_some_and(|total| total <= *n)
}
AtomQueryPredicate::TotalDegreeGreaterEqual(n) => {
total_degree_with_hydrogens(adj, valence.as_ref(), atom)
.is_some_and(|total| total >= *n)
}
AtomQueryPredicate::Connectivity(n) => {
total_degree_with_hydrogens(adj, valence.as_ref(), atom) == Some(*n)
}
AtomQueryPredicate::ConnectivityLessEqual(n) => {
total_degree_with_hydrogens(adj, valence.as_ref(), atom)
.is_some_and(|total| total <= *n)
}
AtomQueryPredicate::ConnectivityGreaterEqual(n) => {
total_degree_with_hydrogens(adj, valence.as_ref(), atom)
.is_some_and(|total| total >= *n)
}
AtomQueryPredicate::InRing => {
if let Some(ri) = &ring_info {
ri.num_atom_rings(atom.id()) > 0
} else {
false
}
}
AtomQueryPredicate::InRingOfSize(n) => {
if let Some(ri) = &ring_info {
ri.is_atom_in_ring_of_size(atom.id(), *n as usize)
} else {
false
}
}
AtomQueryPredicate::SmallestRingSize(n) => {
if let Some(ri) = &ring_info {
ri.min_atom_ring_size(atom.id()) as u8 == *n
} else {
false
}
}
AtomQueryPredicate::SmallestRingSizeLessEqual(n) => {
if let Some(ri) = &ring_info {
ri.min_atom_ring_size(atom.id()) as u8 <= *n
} else {
false
}
}
AtomQueryPredicate::SmallestRingSizeGreaterEqual(n) => {
if let Some(ri) = &ring_info {
ri.min_atom_ring_size(atom.id()) as u8 >= *n
} else {
false
}
}
AtomQueryPredicate::Mass(m) => {
let mass =
atom.isotope()
.map(u16::into)
.unwrap_or_else(|| match atom.atomic_number() {
1 => 1,
6 => 12,
7 => 14,
8 => 16,
9 => 19,
15 => 31,
16 => 32,
17 => 35,
35 => 80,
53 => 127,
_ => 0,
});
mass == *m
}
AtomQueryPredicate::ChiralTagMatch(tag) => atom.chiral_tag() == *tag,
AtomQueryPredicate::AtomMapNumber(_n) => true,
AtomQueryPredicate::SubstitutionCount(n) => {
count_non_hydrogen_neighbors(adj, mol, aidx) == *n
}
AtomQueryPredicate::SubstitutionCountLessEqual(n) => {
count_non_hydrogen_neighbors(adj, mol, aidx) <= *n
}
AtomQueryPredicate::SubstitutionCountGreaterEqual(n) => {
count_non_hydrogen_neighbors(adj, mol, aidx) >= *n
}
AtomQueryPredicate::Degree(n) => {
let neighbors = adj.neighbors_of(aidx);
neighbors.len() as u8 == *n
}
AtomQueryPredicate::DegreeLessEqual(n) => {
let neighbors = adj.neighbors_of(aidx);
neighbors.len() as u8 <= *n
}
AtomQueryPredicate::DegreeGreaterEqual(n) => {
let neighbors = adj.neighbors_of(aidx);
neighbors.len() as u8 >= *n
}
AtomQueryPredicate::NumRingBonds(n) => {
if let Some(ri) = &ring_info {
count_ring_bonds(adj, mol, ri, aidx) == *n
} else {
false
}
}
AtomQueryPredicate::NumRingBondsGreaterEqual(n) => {
if let Some(ri) = &ring_info {
count_ring_bonds(adj, mol, ri, aidx) >= *n
} else {
false
}
}
AtomQueryPredicate::NumRingBondsLessEqual(n) => {
if let Some(ri) = &ring_info {
count_ring_bonds(adj, mol, ri, aidx) <= *n
} else {
false
}
}
AtomQueryPredicate::RecursiveSmarts(_smarts) => {
false
}
AtomQueryPredicate::RGroupLabel(_label) => {
false
}
AtomQueryPredicate::MolFileAlias(_alias) => {
false
}
AtomQueryPredicate::UnsupportedFeature(_desc) => {
false
}
}
}
pub fn bond_predicate_matches(bond: &Bond, pred: &BondQueryPredicate, mol: &Molecule) -> bool {
let ctx = build_query_match_context(mol);
bond_predicate_matches_with_context(bond, pred, mol, &ctx)
}
pub fn bond_predicate_matches_with_context(
bond: &Bond,
pred: &BondQueryPredicate,
_mol: &Molecule,
ctx: &QueryMatchContext,
) -> bool {
let ring_info = &ctx.ring_info;
match pred {
BondQueryPredicate::Any => true,
BondQueryPredicate::Order(order) => bond.order() == *order,
BondQueryPredicate::OrderIn(orders) => orders.contains(&bond.order()),
BondQueryPredicate::IsAromatic(desired) => bond.is_aromatic() == *desired,
BondQueryPredicate::IsInRing(desired) => {
if let Some(ri) = &ring_info {
(ri.num_bond_rings(bond.id()) > 0) == *desired
} else {
!desired
}
}
BondQueryPredicate::Direction(dir) => bond.direction() == *dir,
BondQueryPredicate::Stereo(stereo) => bond.stereo() == *stereo,
BondQueryPredicate::IsConjugated => bond.is_conjugated(),
BondQueryPredicate::NumRingBonds(n) => {
if let Some(ri) = &ring_info {
ri.num_bond_rings(bond.id()) as u8 == *n
} else {
false
}
}
BondQueryPredicate::NumRingBondsGreaterEqual(n) => {
if let Some(ri) = &ring_info {
ri.num_bond_rings(bond.id()) as u8 >= *n
} else {
false
}
}
BondQueryPredicate::NumRingBondsLessEqual(n) => {
if let Some(ri) = &ring_info {
ri.num_bond_rings(bond.id()) as u8 <= *n
} else {
false
}
}
BondQueryPredicate::MolFileQueryCode(_code) => {
false
}
BondQueryPredicate::UnsupportedFeature(_desc) => false,
}
}
pub fn atom_matches_query(
atom: &Atom,
query: &QueryNode<AtomQueryPredicate>,
mol: &Molecule,
) -> bool {
let ctx = build_query_match_context(mol);
atom_matches_query_with_context(atom, query, mol, &ctx)
}
pub fn atom_matches_query_with_context(
atom: &Atom,
query: &QueryNode<AtomQueryPredicate>,
mol: &Molecule,
ctx: &QueryMatchContext,
) -> bool {
match query {
QueryNode::Predicate(pred) => atom_predicate_matches_with_context(atom, pred, mol, ctx),
QueryNode::And(children) => children
.iter()
.all(|child| atom_matches_query_with_context(atom, child, mol, ctx)),
QueryNode::Or(children) => children
.iter()
.any(|child| atom_matches_query_with_context(atom, child, mol, ctx)),
QueryNode::Not(child) => !atom_matches_query_with_context(atom, child, mol, ctx),
}
}
pub fn bond_matches_query(
bond: &Bond,
query: &QueryNode<BondQueryPredicate>,
mol: &Molecule,
) -> bool {
let ctx = build_query_match_context(mol);
bond_matches_query_with_context(bond, query, mol, &ctx)
}
pub fn bond_matches_query_with_context(
bond: &Bond,
query: &QueryNode<BondQueryPredicate>,
mol: &Molecule,
ctx: &QueryMatchContext,
) -> bool {
match query {
QueryNode::Predicate(pred) => bond_predicate_matches_with_context(bond, pred, mol, ctx),
QueryNode::And(children) => children
.iter()
.all(|child| bond_matches_query_with_context(bond, child, mol, ctx)),
QueryNode::Or(children) => children
.iter()
.any(|child| bond_matches_query_with_context(bond, child, mol, ctx)),
QueryNode::Not(child) => !bond_matches_query_with_context(bond, child, mol, ctx),
}
}
pub fn parse_smarts(
smarts: &str,
) -> Result<
(
Vec<QueryNode<AtomQueryPredicate>>,
Vec<QueryNode<BondQueryPredicate>>,
),
SmartsParseError,
> {
let tokens = tokenize(smarts)?;
let mut parser = SmartsParser::new(&tokens);
parser.parse()
}
#[derive(Debug, Clone, PartialEq)]
enum SmartsToken {
OrganicElement(String),
AromaticElement(String),
Bracket(usize),
BondSpec(char),
OpenParen,
CloseParen,
RingClosureDigit(u8),
RingClosurePercent(u8),
And,
Or,
Not,
EndOfStream,
}
fn tokenize(smarts: &str) -> Result<Vec<(SmartsToken, usize)>, SmartsParseError> {
let mut tokens = Vec::new();
let chars: Vec<char> = smarts.chars().collect();
let len = chars.len();
let mut i = 0;
while i < len {
let ch = chars[i];
match ch {
' ' | '\t' | '\n' | '\r' => {
i += 1;
continue;
}
'[' => {
let start = i;
i += 1;
while i < len && chars[i] != ']' {
i += 1;
}
if i >= len {
return Err(SmartsParseError::UnclosedBracket(start));
}
i += 1; tokens.push((SmartsToken::Bracket(start), start));
}
'-' | '=' | '#' | ':' | '~' => {
tokens.push((SmartsToken::BondSpec(ch), i));
i += 1;
}
'@' => {
tokens.push((SmartsToken::BondSpec('@'), i));
i += 1;
}
'/' => {
tokens.push((SmartsToken::BondSpec('/'), i));
i += 1;
}
'\\' => {
tokens.push((SmartsToken::BondSpec('\\'), i));
i += 1;
}
'(' => {
tokens.push((SmartsToken::OpenParen, i));
i += 1;
}
')' => {
tokens.push((SmartsToken::CloseParen, i));
i += 1;
}
'&' => {
tokens.push((SmartsToken::And, i));
i += 1;
}
';' => {
tokens.push((SmartsToken::And, i));
i += 1;
}
',' => {
tokens.push((SmartsToken::Or, i));
i += 1;
}
'!' => {
tokens.push((SmartsToken::Not, i));
i += 1;
}
'%' => {
if i + 2 < len {
let d1 = chars[i + 1];
let d2 = chars[i + 2];
if d1.is_ascii_digit() && d2.is_ascii_digit() {
let num = (d1.to_digit(10).unwrap() * 10 + d2.to_digit(10).unwrap()) as u8;
tokens.push((SmartsToken::RingClosurePercent(num), i));
i += 3;
continue;
}
}
return Err(SmartsParseError::UnexpectedCharacter {
position: i,
character: ch,
context: "expected two digits after %".to_string(),
});
}
d if d.is_ascii_digit() => {
let num = d.to_digit(10).unwrap() as u8;
tokens.push((SmartsToken::RingClosureDigit(num), i));
i += 1;
}
'c' | 'n' | 'o' | 's' | 'p' | 'a' => {
let name = ch.to_string();
tokens.push((SmartsToken::AromaticElement(name), i));
i += 1;
}
'B' | 'C' | 'N' | 'O' | 'S' | 'P' | 'F' | 'I' | '*' | 'X' | 'M' | 'Q' | 'R' | 'T'
| 'D' | 'H' | 'V' | 'Z' | 'K' | 'W' | 'U' | 'Y' | 'G' | 'L' | 'J' | 'E' | 'A' => {
let start = i;
i += 1;
if i < len && chars[i].is_ascii_lowercase() {
i += 1;
}
let name: String = chars[start..i].iter().collect();
tokens.push((SmartsToken::OrganicElement(name), start));
}
_ => {
return Err(SmartsParseError::UnexpectedCharacter {
position: i,
character: ch,
context: "unexpected character in SMARTS string".to_string(),
});
}
}
}
tokens.push((SmartsToken::EndOfStream, len));
Ok(tokens)
}
struct SmartsParser<'a> {
tokens: &'a [(SmartsToken, usize)],
pos: usize,
}
impl<'a> SmartsParser<'a> {
fn new(tokens: &'a [(SmartsToken, usize)]) -> Self {
Self { tokens, pos: 0 }
}
fn peek(&self) -> &(SmartsToken, usize) {
&self.tokens[self.pos]
}
fn advance(&mut self) {
self.pos += 1;
}
fn parse(
&mut self,
) -> Result<
(
Vec<QueryNode<AtomQueryPredicate>>,
Vec<QueryNode<BondQueryPredicate>>,
),
SmartsParseError,
> {
let mut atom_queries = Vec::new();
let mut bond_queries = Vec::new();
let first = self.parse_atom()?;
atom_queries.push(first);
loop {
match self.peek() {
(SmartsToken::EndOfStream, _) => break,
(SmartsToken::CloseParen, _) => break,
(SmartsToken::BondSpec(_), _)
| (SmartsToken::Not, _)
| (SmartsToken::And, _)
| (SmartsToken::RingClosureDigit(_), _)
| (SmartsToken::RingClosurePercent(_), _)
| (SmartsToken::OpenParen, _) => {
let bond = self.parse_bond_or_ring_closure(&mut atom_queries)?;
bond_queries.push(bond);
}
_ => {
bond_queries.push(QueryNode::Predicate(BondQueryPredicate::Any));
let atom = self.parse_atom()?;
atom_queries.push(atom);
}
}
}
Ok((atom_queries, bond_queries))
}
fn parse_atom(&mut self) -> Result<QueryNode<AtomQueryPredicate>, SmartsParseError> {
let (token, _pos) = self.peek().clone();
match token {
SmartsToken::OrganicElement(name) => {
let query = organic_element_to_query(&name);
self.advance();
Ok(query)
}
SmartsToken::AromaticElement(name) => {
let query = aromatic_element_to_query(&name);
self.advance();
Ok(query)
}
SmartsToken::Bracket(start) => {
self.advance();
self.parse_bracket_atom(start)
}
SmartsToken::EndOfStream => Err(SmartsParseError::UnexpectedEnd(
"expected atom but reached end".to_string(),
)),
_ => {
let (_, pos) = &self.tokens[self.pos];
Err(SmartsParseError::UnexpectedCharacter {
position: *pos,
character: '?',
context: "expected atom expression".to_string(),
})
}
}
}
fn parse_bracket_atom(
&mut self,
_bracket_start: usize,
) -> Result<QueryNode<AtomQueryPredicate>, SmartsParseError> {
let mut predicates: Vec<QueryNode<AtomQueryPredicate>> = Vec::new();
predicates.push(QueryNode::Predicate(AtomQueryPredicate::Any));
if predicates.len() == 1 {
Ok(predicates.into_iter().next().unwrap())
} else {
Ok(QueryNode::And(predicates))
}
}
fn parse_bond_or_ring_closure(
&mut self,
atom_queries: &mut Vec<QueryNode<AtomQueryPredicate>>,
) -> Result<QueryNode<BondQueryPredicate>, SmartsParseError> {
let mut negate_next = false;
let mut predicates = Vec::new();
match self.peek() {
(SmartsToken::BondSpec(_), _) | (SmartsToken::Not, _) | (SmartsToken::And, _) => {
while matches!(
self.peek(),
(SmartsToken::BondSpec(_), _) | (SmartsToken::Not, _) | (SmartsToken::And, _)
) {
match self.peek() {
(SmartsToken::Not, _) => {
negate_next = !negate_next;
self.advance();
}
(SmartsToken::And, _) => {
self.advance();
}
(SmartsToken::BondSpec(ch), _) => {
let query = bond_spec_to_query(*ch);
self.advance();
let query = if negate_next {
negate_next = false;
QueryNode::not(query)
} else {
query
};
predicates.push(query);
}
_ => break,
}
}
let predicates = predicates
.into_iter()
.filter(|query| *query != QueryNode::Predicate(BondQueryPredicate::Any))
.collect::<Vec<_>>();
match predicates.len() {
0 => Ok(QueryNode::Predicate(BondQueryPredicate::Any)),
1 => Ok(predicates.into_iter().next().expect("single bond query")),
_ => Ok(QueryNode::And(predicates)),
}
}
(SmartsToken::RingClosureDigit(n), _) | (SmartsToken::RingClosurePercent(n), _) => {
let num = *n;
self.advance();
let _ = num; Ok(QueryNode::Predicate(BondQueryPredicate::Any))
}
(SmartsToken::OpenParen, _) => {
self.advance();
let _sub = self.parse()?;
match self.peek() {
(SmartsToken::CloseParen, _) => {
self.advance();
Ok(QueryNode::Predicate(BondQueryPredicate::Any))
}
(tok, pos) => Err(SmartsParseError::UnexpectedCharacter {
position: *pos,
character: format!("{:?}", tok).chars().next().unwrap_or('?'),
context: "expected close parenthesis".to_string(),
}),
}
}
_ => Ok(QueryNode::Predicate(BondQueryPredicate::Any)),
}
}
}
fn organic_element_to_query(name: &str) -> QueryNode<AtomQueryPredicate> {
fn atom_type_query(n: u8, aromatic: bool) -> QueryNode<AtomQueryPredicate> {
QueryNode::Predicate(AtomQueryPredicate::AtomType {
atomic_number: n,
aromatic,
})
}
match name {
"*" => QueryNode::Predicate(AtomQueryPredicate::Any),
"A" => QueryNode::Predicate(AtomQueryPredicate::IsAromatic(false)),
"a" => QueryNode::Predicate(AtomQueryPredicate::IsAromatic(true)),
"B" => atom_type_query(5, false),
"C" => atom_type_query(6, false),
"N" => atom_type_query(7, false),
"O" => atom_type_query(8, false),
"S" => atom_type_query(16, false),
"P" => atom_type_query(15, false),
"F" => atom_type_query(9, false),
"Cl" => atom_type_query(17, false),
"Br" => atom_type_query(35, false),
"I" => atom_type_query(53, false),
_ => {
QueryNode::Predicate(AtomQueryPredicate::Any)
}
}
}
fn aromatic_element_to_query(name: &str) -> QueryNode<AtomQueryPredicate> {
match name {
"c" => QueryNode::And(vec![
QueryNode::Predicate(AtomQueryPredicate::AtomicNumber(6)),
QueryNode::Predicate(AtomQueryPredicate::IsAromatic(true)),
]),
"n" => QueryNode::And(vec![
QueryNode::Predicate(AtomQueryPredicate::AtomicNumber(7)),
QueryNode::Predicate(AtomQueryPredicate::IsAromatic(true)),
]),
"o" => QueryNode::And(vec![
QueryNode::Predicate(AtomQueryPredicate::AtomicNumber(8)),
QueryNode::Predicate(AtomQueryPredicate::IsAromatic(true)),
]),
"s" => QueryNode::And(vec![
QueryNode::Predicate(AtomQueryPredicate::AtomicNumber(16)),
QueryNode::Predicate(AtomQueryPredicate::IsAromatic(true)),
]),
"p" => QueryNode::And(vec![
QueryNode::Predicate(AtomQueryPredicate::AtomicNumber(15)),
QueryNode::Predicate(AtomQueryPredicate::IsAromatic(true)),
]),
_ => QueryNode::Predicate(AtomQueryPredicate::Any),
}
}
fn bond_spec_to_query(ch: char) -> QueryNode<BondQueryPredicate> {
match ch {
'-' => QueryNode::Predicate(BondQueryPredicate::Order(BondOrder::Single)),
'=' => QueryNode::Predicate(BondQueryPredicate::Order(BondOrder::Double)),
'#' => QueryNode::Predicate(BondQueryPredicate::Order(BondOrder::Triple)),
':' => QueryNode::Predicate(BondQueryPredicate::IsAromatic(true)),
'@' => QueryNode::Predicate(BondQueryPredicate::IsInRing(true)),
'~' => QueryNode::Predicate(BondQueryPredicate::Any),
'/' => QueryNode::Predicate(BondQueryPredicate::Direction(
crate::BondDirection::EndUpRight,
)),
'\\' => QueryNode::Predicate(BondQueryPredicate::Direction(
crate::BondDirection::EndDownRight,
)),
_ => QueryNode::Predicate(BondQueryPredicate::Any),
}
}
fn count_non_hydrogen_neighbors(adj: &AdjacencyList, mol: &Molecule, aidx: usize) -> u8 {
let mut count = 0u8;
for nbr in adj.neighbors_of(aidx) {
let nbr_idx = nbr.atom_index;
if let Some(nbr_atom) = mol.atom(AtomId::new(nbr_idx)) {
if nbr_atom.atomic_number() != 1 || nbr_atom.isotope().is_some_and(|i| i > 1) {
count += 1;
}
}
}
count
}
fn count_ring_bonds(adj: &AdjacencyList, mol: &Molecule, ri: &RingInfo, aidx: usize) -> u8 {
let mut count = 0u8;
for nbr in adj.neighbors_of(aidx) {
if ri.num_bond_rings(nbr.bond) > 0 {
count += 1;
}
}
count
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_organic_element_predicates() {
assert_eq!(
organic_element_to_query("*"),
QueryNode::Predicate(AtomQueryPredicate::Any)
);
assert_eq!(
organic_element_to_query("C"),
QueryNode::Predicate(AtomQueryPredicate::AtomType {
atomic_number: 6,
aromatic: false,
})
);
assert_eq!(
organic_element_to_query("N"),
QueryNode::Predicate(AtomQueryPredicate::AtomType {
atomic_number: 7,
aromatic: false,
})
);
assert_eq!(
organic_element_to_query("O"),
QueryNode::Predicate(AtomQueryPredicate::AtomType {
atomic_number: 8,
aromatic: false,
})
);
assert_eq!(
organic_element_to_query("Cl"),
QueryNode::Predicate(AtomQueryPredicate::AtomType {
atomic_number: 17,
aromatic: false,
})
);
assert_eq!(
organic_element_to_query("Br"),
QueryNode::Predicate(AtomQueryPredicate::AtomType {
atomic_number: 35,
aromatic: false,
})
);
assert_eq!(
organic_element_to_query("F"),
QueryNode::Predicate(AtomQueryPredicate::AtomType {
atomic_number: 9,
aromatic: false,
})
);
}
#[test]
fn test_aromatic_element_predicates() {
let c_arom = aromatic_element_to_query("c");
if let QueryNode::And(ref children) = c_arom {
assert_eq!(children.len(), 2);
} else {
panic!("expected And node");
}
let n_arom = aromatic_element_to_query("n");
if let QueryNode::And(ref children) = n_arom {
assert_eq!(children.len(), 2);
} else {
panic!("expected And node");
}
}
#[test]
fn test_bond_spec_to_query() {
assert_eq!(
bond_spec_to_query('-'),
QueryNode::Predicate(BondQueryPredicate::Order(BondOrder::Single))
);
assert_eq!(
bond_spec_to_query('='),
QueryNode::Predicate(BondQueryPredicate::Order(BondOrder::Double))
);
assert_eq!(
bond_spec_to_query('#'),
QueryNode::Predicate(BondQueryPredicate::Order(BondOrder::Triple))
);
assert_eq!(
bond_spec_to_query(':'),
QueryNode::Predicate(BondQueryPredicate::IsAromatic(true))
);
assert_eq!(
bond_spec_to_query('~'),
QueryNode::Predicate(BondQueryPredicate::Any)
);
}
#[test]
fn test_query_node_logic() {
let or_node = QueryNode::or(vec![
QueryNode::predicate(AtomQueryPredicate::AtomicNumber(6)),
QueryNode::predicate(AtomQueryPredicate::AtomicNumber(7)),
]);
assert_eq!(
or_node,
QueryNode::Or(vec![
QueryNode::Predicate(AtomQueryPredicate::AtomicNumber(6)),
QueryNode::Predicate(AtomQueryPredicate::AtomicNumber(7)),
])
);
let and_node = QueryNode::and(vec![
QueryNode::predicate(AtomQueryPredicate::IsAromatic(true)),
QueryNode::predicate(AtomQueryPredicate::AtomicNumber(6)),
]);
assert_eq!(
and_node,
QueryNode::And(vec![
QueryNode::Predicate(AtomQueryPredicate::IsAromatic(true)),
QueryNode::Predicate(AtomQueryPredicate::AtomicNumber(6)),
])
);
let not_node = QueryNode::not(QueryNode::predicate(AtomQueryPredicate::Any));
assert_eq!(
not_node,
QueryNode::Not(Box::new(QueryNode::Predicate(AtomQueryPredicate::Any)))
);
}
#[test]
fn test_atom_predicate_atomic_number() {
let mut mol = Molecule::new();
let builder = Molecule::builder();
assert!(true);
}
}