#include <openbabel/babelconfig.h>
#include <openbabel/obmolecformat.h>
#include <openbabel/mol.h>
#include <openbabel/atom.h>
#include <openbabel/elements.h>
#include <openbabel/bond.h>
#include <openbabel/obiter.h>
#include <openbabel/typer.h>
#include <openbabel/stereo/tetrahedral.h>
#include <openbabel/stereo/cistrans.h>
#include "smiley.h"
using namespace std;
namespace OpenBabel
{
template<typename T>
void print_vector(const std::string &label, const std::vector<T> &v)
{
std::cout << label << ": ";
for (std::size_t i = 0; i < v.size(); ++i)
std::cout << v[i] << " ";
std::cout << std::endl;
}
struct OpenBabelCallback : public Smiley::CallbackBase
{
enum UpDown {
IsNotUpDown,
IsUp,
IsDown
};
OpenBabelCallback(OBMol *pmol) : mol(pmol)
{
}
void clear()
{
}
void addAtom(int element, bool aromatic, int isotope, int hCount, int charge, int atomClass)
{
OBAtom *atom = mol->NewAtom();
atom->SetAtomicNum(element);
indices.push_back(mol->NumAtoms());
if (aromatic)
atom->SetAromatic();
if (hCount > -1) {
if (hCount == 0)
atom->SetSpinMultiplicity(2);
for (int i = 0; i < hCount; ++i) {
OBAtom *hydrogen = mol->NewAtom();
hydrogen->SetAtomicNum(1);
mol->AddBond(atom->GetIdx(), hydrogen->GetIdx(), 1);
upDown.push_back(IsNotUpDown);
}
}
if (isotope > 0)
atom->SetIsotope(isotope);
atom->SetFormalCharge(charge);
}
void addBond(int source, int target, int order, bool isUp, bool isDown)
{
if (isDown)
upDown.push_back(IsDown);
else if (isUp)
upDown.push_back(IsUp);
else
upDown.push_back(IsNotUpDown);
mol->AddBond(indices[source], indices[target], order);
if (order == 5)
mol->GetBond(mol->NumBonds() - 1)->SetAromatic();
}
void setChiral(int index, Smiley::Chirality chirality, const std::vector<int> &chiralNbrs)
{
unsigned long center = indices[index] - 1;
unsigned long from = indices[chiralNbrs[0]] - 1;
std::vector<unsigned long> refs(chiralNbrs.size() - 1);
for (std::size_t i = 0; i < refs.size(); ++i)
if (chiralNbrs[i + 1] == Smiley::implicitHydrogen())
refs[i] = OBStereo::ImplicitRef;
else
refs[i] = indices[chiralNbrs[i + 1]] - 1;
switch (chirality) {
case Smiley::Clockwise:
switch (chiralNbrs.size()) {
case 4:
OBTetrahedralStereo *stereo = new OBTetrahedralStereo(mol);
stereo->SetConfig(OBTetrahedralStereo::Config(center, from, refs));
mol->SetData(stereo);
break;
}
break;
case Smiley::AntiClockwise:
switch (chiralNbrs.size()) {
case 4:
OBTetrahedralStereo *stereo = new OBTetrahedralStereo(mol);
stereo->SetConfig(OBTetrahedralStereo::Config(center, from, refs, OBStereo::AntiClockwise));
mol->SetData(stereo);
break;
}
break;
}
}
OBMol *mol;
std::vector<UpDown> upDown;
std::vector<int> indices;
};
class SmileyFormat : public OBMoleculeFormat
{
public:
SmileyFormat()
{
OBConversion::RegisterFormat("smy", this);
}
virtual const char* Description() {
return "SMILES format using Smiley parser\n\n"
"The Smiley parser presents an alternative to the standard SMILES parser\n"
"(:ref:`SMILES_format`). It was written to be strictly compatible with the\n"
"OpenSMILES standard (http://opensmiles.org). In comparison, the standard\n"
"parser is more forgiving to erroneous input, and also supports some extensions\n"
"such as for radicals.\n\n"
"In addition, the Smiley parser returns detailed error messages when problems\n"
"arise parsing or validating the SMILES, whereas the standard parser seldom\n"
"describes the specific problem. For a detailed description of the OpenSMILES\n"
"semantics, the specification should be consulted. In addition to syntactical\n"
"and grammatical correctness, the Smiley parser also verifies some basic\n"
"semantics.\n"
"\n"
"Here are some examples of the errors reported::\n"
"\n"
" SyntaxError: Bracket atom expression contains invalid trailing characters.\n"
" F.FB(F)F.[NH2+251][C@@H](CP(c1ccccc1)c1ccccc1)C(C)(C)C 31586112\n"
" ^^\n"
" SyntaxError: Unmatched branch opening.\n"
" CC(CC\n"
" ^^^\n"
" SyntaxError: Unmatched branch closing.\n"
" CC)CC\n"
" ^^^\n"
" SemanticsError: Unmatched ring bond.\n"
" C1CCC\n"
" ^\n"
" SemanticsError: Conflicing ring bonds.\n"
" C-1CCCCC=1\n"
"\n"
"Hydrogen with Hydrogen Count\n"
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"
"Hydrogen atoms can not have a hydrogen count. Hydrogen bound to a hydrogen\n"
"atom should be specified by two bracket atom expressions.\n"
"\n"
"Examples::\n"
"\n"
" [HH] invalid\n"
" [HH1] invalid (same as [HH]\n"
" [HH3] invalid\n"
" [HH0] valid (same as [H])\n"
" [H][H] valid\n"
"\n"
"Unmatched Ring Bond\n"
"~~~~~~~~~~~~~~~~~~~\n"
"Report unmatched ring bonds.\n"
"\n"
"Example::\n"
"\n"
" C1CCC\n"
"\n"
"Conflicting Ring Bonds\n"
"~~~~~~~~~~~~~~~~~~~~~~\n"
"When the bond type for ring bonds are explicitly specified at both ends,\n"
"these should be the same.\n"
"\n"
"Example::\n"
"\n"
" C-1CCCCCC=1\n"
"\n"
"Invalid Ring Bonds\n"
"~~~~~~~~~~~~~~~~~~\n"
"There are two types of invalid ring bonds. The first is when two atoms both\n"
"have the same two ring bonds. This would mean adding a parallel edge in the\n"
"graph which is not allowed. The second type is similar but results in a\n"
"self-loop by having a ring bond number twice.\n"
"\n"
"Examples::\n"
"\n"
" C12CCCC12 parallel bond\n"
" C11 self-loop bond\n"
"\n"
"Invalid Chiral Valence\n"
"~~~~~~~~~~~~~~~~~~~~~~\n"
"When an atom is specified as being chiral, it should have the correct\n"
"number of neighboring atoms (possibly including an implicit H inside the\n"
"bracket.\n"
"\n"
"The valid valences are::\n"
"\n"
" Tetrahedral (TH) : 4\n"
" Allene (AL) : 4 (*)\n"
" Square Planar (SP) : 4\n"
" Trigonal Bypiramidal (TB) : 5\n"
" Octahedral(OH) : 6\n"
"\n"
" (*) The chiral atom has only 2 bonds but the neighbor's neighbors are\n"
" counted: NC(Br)=[C@AL1]=C(F)I\n"
"\n"
"Invalid Chiral Hydrogen Count\n"
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"
"Chiral atoms can only have one hydrogen in their bracket since multiple\n"
"hydrogens would make them not chiral.\n"
"\n"
"Example::\n"
"\n"
" C[C@H2]F\n\n";
}
virtual const char* SpecificationURL()
{
return "http://opensmiles.org";
}
virtual const char* GetMIMEType()
{
return "chemical/x-daylight-smiles";
}
virtual unsigned int Flags()
{
return NOTWRITABLE;
}
virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv);
private:
void CreateCisTrans(OBMol *mol, const std::vector<OpenBabelCallback::UpDown> &upDown);
bool AssignNbrAtoms(const std::vector<OpenBabelCallback::UpDown> &upDown, OBAtom *atom,
unsigned long &above, unsigned long &below);
};
SmileyFormat theSmileyFormat;
bool SmileyFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
{
OBMol* pmol = pOb->CastAndClear<OBMol>();
if (!pmol)
return false;
istream& ifs = *pConv->GetInStream();
std::string smiles;
std::getline(ifs, smiles);
std::size_t space_pos = smiles.find(" ");
std::size_t tab_pos = smiles.find("\t");
if (space_pos != std::string::npos && tab_pos != std::string::npos)
space_pos = std::min(space_pos, tab_pos);
else if (tab_pos != std::string::npos)
space_pos = tab_pos;
if (space_pos != std::string::npos) {
while (space_pos < smiles.size() && (smiles[space_pos] == ' ' || smiles[space_pos] == '\t'))
++space_pos;
pmol->SetTitle(smiles.substr(space_pos).c_str());
}
pmol->BeginModify();
pmol->SetDimension(0);
OpenBabelCallback callback(pmol);
Smiley::Parser<OpenBabelCallback> parser(callback);
try {
parser.parse(smiles);
} catch (Smiley::Exception &e) {
if (e.type() == Smiley::Exception::SyntaxError)
std::cerr << "Syntax";
else
std::cerr << "Semantics";
std::cerr << "Error: " << e.what() << "." << std::endl;
std::cerr << smiles << std::endl;
for (std::size_t i = 0; i < e.pos(); ++i)
std::cerr << " ";
for (std::size_t i = 0; i < e.length(); ++i)
std::cerr << "^";
std::cerr << std::endl;
}
pmol->EndModify();
pmol->SetAromaticPerceived();
CreateCisTrans(pmol, callback.upDown);
StereoFrom0D(pmol);
return true;
}
bool SmileyFormat::AssignNbrAtoms(const std::vector<OpenBabelCallback::UpDown> &upDown,
OBAtom *atom, unsigned long &aboveId, unsigned long &belowId)
{
OBAtom *above = nullptr;
OBAtom *below = nullptr;
OBAtom *unspecified = nullptr;
FOR_BONDS_OF_ATOM (bond, atom) {
if (!bond->IsAromatic() && bond->GetBondOrder() == 2)
continue;
OBAtom *nbr = bond->GetNbrAtom(atom);
switch (upDown[bond->GetIdx()]) {
case OpenBabelCallback::IsUp:
if (nbr->GetIndex() < atom->GetIndex() && bond->GetBeginAtomIdx() < bond->GetEndAtomIdx()) {
if (below)
return false;
below = nbr;
} else {
if (above)
return false;
above = nbr;
}
break;
case OpenBabelCallback::IsDown:
if (nbr->GetIndex() < atom->GetIndex() && bond->GetBeginAtomIdx() < bond->GetEndAtomIdx()) {
if (above)
return false;
above = nbr;
} else {
if (below)
return false;
below = nbr;
}
break;
case OpenBabelCallback::IsNotUpDown:
unspecified = nbr;
break;
}
}
if (!above && !below)
return true;
if (above && unspecified)
below = unspecified;
else if (below && unspecified)
above = unspecified;
aboveId = above ? above->GetId() : OBStereo::ImplicitRef;
belowId = below ? below->GetId() : OBStereo::ImplicitRef;
return true;
}
void SmileyFormat::CreateCisTrans(OBMol *mol, const std::vector<OpenBabelCallback::UpDown> &upDown)
{
FOR_BONDS_OF_MOL (doubleBond, mol) {
if (doubleBond->GetBondOrder() != 2 || doubleBond->IsAromatic())
continue;
OBAtom *source = doubleBond->GetBeginAtom();
OBAtom *target = doubleBond->GetEndAtom();
int v1 = source->GetExplicitDegree();
int v2 = target->GetExplicitDegree();
if (v1 < 2 || v1 > 3 || v2 < 2 || v2 > 3)
continue;
unsigned long aboveSource = OBStereo::ImplicitRef;
unsigned long belowSource = OBStereo::ImplicitRef;
if (!AssignNbrAtoms(upDown, source, aboveSource, belowSource)) {
std::cerr << "Invalid cis/trans specification" << std::endl;
continue;
}
if (aboveSource == OBStereo::ImplicitRef && belowSource == OBStereo::ImplicitRef)
continue;
unsigned long aboveTarget = OBStereo::ImplicitRef;
unsigned long belowTarget = OBStereo::ImplicitRef;
if (!AssignNbrAtoms(upDown, target, aboveTarget, belowTarget)) {
std::cerr << "Invalid cis/trans specification" << std::endl;
continue;
}
if (aboveTarget == OBStereo::ImplicitRef && belowTarget == OBStereo::ImplicitRef)
continue;
OBCisTransStereo *stereo = new OBCisTransStereo(mol);
stereo->SetConfig(OBCisTransStereo::Config(source->GetId(), target->GetId(),
OBStereo::MakeRefs(aboveSource, belowSource, belowTarget, aboveTarget),
OBStereo::ShapeU));
mol->SetData(stereo);
}
}
}