#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
#include <openbabel/babelconfig.h>
#include <openbabel/obmolecformat.h>
#include <openbabel/mol.h>
#include <openbabel/atom.h>
#include <openbabel/elements.h>
#include <openbabel/bond.h>
#include <openbabel/obiter.h>
#include <openbabel/generic.h>
#include <cstdlib>
#include <openbabel/math/spacegroup.h>
#include <sstream>
#include <vector>
#include <list>
#include <map>
#include <set>
#define NOCHARGE FLT_MAX
#ifdef _MSC_VER
#pragma warning( disable : 4503 )
#endif
using namespace std;
namespace OpenBabel
{
class CIFFormat : public OBMoleculeFormat
{
public:
CIFFormat()
{
RegisterFormat("cif", "chemical/x-cif");
}
virtual const char* Description() {
return
"Crystallographic Information File\n"
"The CIF file format is the standard interchange format for small-molecule crystal structures\n\n"
"Fractional coordinates are converted to cartesian ones using the following convention:\n\n"
"- The x axis is parallel to a\n"
"- The y axis is in the (a,b) plane\n"
"- The z axis is along c*\n\n"
"Ref: Int. Tables for Crystallography (2006), vol. B, sec 3.3.1.1.1\n"
" (the matrix used is the 2nd form listed)\n\n"
"Read Options e.g. -ab:\n"
" s Output single bonds only\n"
" b Disable bonding entirely\n"
" B Use bonds listed in CIF file from _geom_bond_etc records (overrides option b)\n\n"
"Write Options e.g. -xg:\n"
" g Write bonds using _geom_bond_etc fields \n\n";
};
virtual const char* SpecificationURL()
{return "http://www.iucr.org/iucr-top/cif/spec/";};
virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv);
virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);
};
struct ci_char_traits : public std::char_traits<char>
{
static bool eq( char c1, char c2 );
static bool ne( char c1, char c2 );
static bool lt( char c1, char c2 );
static int compare(const char* s1,const char* s2,size_t n );
static const char* find( const char* s, int n, char a );
};
typedef std::basic_string<char, ci_char_traits> ci_string;
int strnicmp(const char *s1, const char *s2, int len)
{
unsigned char c1, c2;
while (len)
{
c1 = *s1; c2 = *s2;
s1++; s2++;
if (!c1) return c2 ? -1 : 0;
if (!c2) return 1;
if (c1 != c2)
{
c1 = tolower(c1);
c2 = tolower(c2);
if (c1 != c2) return c1 < c2 ? -1 : 1;
}
len--;
}
return 0;
}
bool ci_char_traits::eq( char c1, char c2 )
{return tolower(c1) == tolower(c2);}
bool ci_char_traits::ne( char c1, char c2 )
{return tolower(c1) != tolower(c2);}
bool ci_char_traits::lt( char c1, char c2 )
{return tolower(c1) < tolower(c2);}
int ci_char_traits::compare(const char* s1,const char* s2,size_t n )
{return strnicmp( s1, s2, n );}
const char* ci_char_traits::find( const char* s, int n, char a )
{
while( n-- > 0 && tolower(*s) != tolower(a) ) ++s;
return s;
}
class CIFData
{
public:
CIFData();
void ExtractAll();
void ExtractName();
void ExtractUnitCell();
void ExtractSpacegroup();
void ExtractAtomicPositions();
void ExtractBonds();
void ExtractCharges();
void Cartesian2FractionalCoord();
void Fractional2CartesianCoord();
void f2c(float &x,float &y, float &z);
void c2f(float &x,float &y, float &z);
void CalcMatrices();
std::list<std::string> mvComment;
std::map<ci_string,std::string> mvItem;
std::map<std::set<ci_string>,std::map<ci_string,std::vector<std::string> > > mvLoop;
std::vector<float> mvLatticePar;
unsigned int mSpacegroupNumberIT;
std::string mSpacegroupSymbolHall;
std::string mSpacegroupHermannMauguin;
std::string mName;
std::string mFormula;
struct CIFAtom
{
CIFAtom();
std::string mLabel;
std::string mSymbol;
std::vector<float> mCoordFrac;
std::vector<float> mCoordCart;
float mOccupancy;
float mCharge;
};
std::vector<CIFAtom> mvAtom;
struct CIFBond
{
std::string mLabel1,mLabel2;
float mDistance;
};
std::vector<CIFBond> mvBond;
float mOrthMatrix[3][3];
float mOrthMatrixInvert[3][3];
const SpaceGroup *mSpaceGroup;
std::string mDataBlockName;
};
class CIF
{
public:
CIF(std::istream &in, const bool interpret=true);
void Parse(std::istream &in);
std::map<std::string,CIFData> mvData;
std::list<std::string> mvComment;
};
float CIFNumeric2Float(const std::string &s);
int CIFNumeric2Int(const std::string &s);
template <typename T> string to_string(T pNumber)
{
ostringstream oOStrStream;
oOStrStream << pNumber;
return oOStrStream.str();
}
bool is_double(const std::string& s, double& r_double);
CIFData::CIFAtom::CIFAtom():
mLabel(""),mSymbol(""),mOccupancy(1.0f)
{}
CIFData::CIFData()
{}
void CIFData::ExtractAll()
{
{
stringstream ss;
ss<<"CIF: interpreting data block: "<<mDataBlockName;
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obInfo);
}
if(mDataBlockName=="data_global")
{ bool empty_iucrjournal_block=true;
if(mvItem.find("_cell_length_a" )!=mvItem.end()) empty_iucrjournal_block=false;
if(mvItem.find("_cell_length_b" )!=mvItem.end()) empty_iucrjournal_block=false;
if(mvItem.find("_cell_length_c" )!=mvItem.end()) empty_iucrjournal_block=false;
for(map<set<ci_string>,map<ci_string,vector<string> > >::const_iterator loop=mvLoop.begin();
loop!=mvLoop.end();++loop)
{
if(loop->second.find("_atom_site_fract_x")!=loop->second.end()) empty_iucrjournal_block=false;
if(loop->second.find("_atom_site_fract_y")!=loop->second.end()) empty_iucrjournal_block=false;
if(loop->second.find("_atom_site_fract_z")!=loop->second.end()) empty_iucrjournal_block=false;
if(loop->second.find("_atom_site_Cartn_x")!=loop->second.end()) empty_iucrjournal_block=false;
if(loop->second.find("_atom_site_Cartn_y")!=loop->second.end()) empty_iucrjournal_block=false;
if(loop->second.find("_atom_site_Cartn_z")!=loop->second.end()) empty_iucrjournal_block=false;
}
if(empty_iucrjournal_block)
{
stringstream ss;
ss << "CIF WARNING: found en empty 'data_global' block - SKIPPING\n"
<< " (you can safely ignore this if reading a CIF file from an IUCr journal)";
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
return;
}
}
this->ExtractName();
this->ExtractSpacegroup();
this->ExtractUnitCell();
this->ExtractAtomicPositions();
if(mvAtom.size()==0)
{
stringstream ss;
ss << "CIF Error: no atom found ! (in data block:"<<mDataBlockName<<")";
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obError);
}
this->ExtractBonds();
this->ExtractCharges();
}
void CIFData::ExtractUnitCell()
{
const int spgid= mSpaceGroup->GetId();
if( (mvItem.find("_cell_length_a")!=mvItem.end())
||(mvItem.find("_cell_length_b")!=mvItem.end())
||(mvItem.find("_cell_length_c")!=mvItem.end()) )
{
mvLatticePar.resize(6);
for(unsigned int i=0;i<6;i++) mvLatticePar[i]=float(0);
map<ci_string,string>::const_iterator positem;
positem=mvItem.find("_cell_length_a");
if(positem!=mvItem.end())
mvLatticePar[0]=CIFNumeric2Float(positem->second);
positem=mvItem.find("_cell_length_b");
if(positem!=mvItem.end())
mvLatticePar[1]=CIFNumeric2Float(positem->second);
positem=mvItem.find("_cell_length_c");
if(positem!=mvItem.end())
mvLatticePar[2]=CIFNumeric2Float(positem->second);
positem=mvItem.find("_cell_angle_alpha");
if(positem!=mvItem.end())
mvLatticePar[3]=CIFNumeric2Float(positem->second);
positem=mvItem.find("_cell_angle_beta");
if(positem!=mvItem.end())
mvLatticePar[4]=CIFNumeric2Float(positem->second);
positem=mvItem.find("_cell_angle_gamma");
if(positem!=mvItem.end())
mvLatticePar[5]=CIFNumeric2Float(positem->second);
stringstream ss;
ss << "Found Lattice parameters:" << mvLatticePar[0] << " , " << mvLatticePar[1] << " , " << mvLatticePar[2]
<< " , " << mvLatticePar[3] << " , " << mvLatticePar[4] << " , " << mvLatticePar[5];
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
mvLatticePar[3] = static_cast<float> (mvLatticePar[3] * DEG_TO_RAD); mvLatticePar[4] = static_cast<float> (mvLatticePar[4] * DEG_TO_RAD);
mvLatticePar[5] = static_cast<float> (mvLatticePar[5] * DEG_TO_RAD);
if((spgid>2)&&(spgid<=15))
{ }
if((spgid>15)&&(spgid<=142))
{ if(mvLatticePar[3]==0) mvLatticePar[3]=M_PI/2;
if(mvLatticePar[4]==0) mvLatticePar[4]=M_PI/2;
if(mvLatticePar[5]==0) mvLatticePar[5]=M_PI/2;
}
if((spgid>74)&&(spgid<=142))
{ if(mvLatticePar[1]==0) mvLatticePar[1]=mvLatticePar[0];
if(mvLatticePar[0]==0) mvLatticePar[0]=mvLatticePar[1];
}
if((spgid>142)&&(spgid<=194))
{ const string::size_type pos = mSpaceGroup->GetHallName().find('R');
if(pos==std::string::npos)
{ float a=0;
if(mvLatticePar[0]>a) a=mvLatticePar[0];
if(mvLatticePar[1]>a) a=mvLatticePar[1];
if(mvLatticePar[2]>a) a=mvLatticePar[2];
if(mvLatticePar[0]==0) mvLatticePar[0]=a;
if(mvLatticePar[1]==0) mvLatticePar[1]=a;
if(mvLatticePar[2]==0) mvLatticePar[2]=a;
float alpha=0;
if(mvLatticePar[3]>alpha) alpha=mvLatticePar[3];
if(mvLatticePar[4]>alpha) alpha=mvLatticePar[4];
if(mvLatticePar[5]>alpha) alpha=mvLatticePar[5];
if(mvLatticePar[3]==0) mvLatticePar[3]=alpha;
if(mvLatticePar[4]==0) mvLatticePar[4]=alpha;
if(mvLatticePar[5]==0) mvLatticePar[5]=alpha;
}
else
{ if(mvLatticePar[1]==0) mvLatticePar[1]=mvLatticePar[0];
if(mvLatticePar[0]==0) mvLatticePar[0]=mvLatticePar[1];
if(mvLatticePar[3]==0) mvLatticePar[3]=M_PI/2;
if(mvLatticePar[4]==0) mvLatticePar[4]=M_PI/2;
if(mvLatticePar[5]==0) mvLatticePar[5]=2*M_PI/3;
}
}
if(spgid>194)
{
if(mvLatticePar[3]==0) mvLatticePar[3]=M_PI/2;
if(mvLatticePar[4]==0) mvLatticePar[4]=M_PI/2;
if(mvLatticePar[5]==0) mvLatticePar[5]=M_PI/2;
float a=0;
if(mvLatticePar[0]>a) a=mvLatticePar[0];
if(mvLatticePar[1]>a) a=mvLatticePar[1];
if(mvLatticePar[2]>a) a=mvLatticePar[2];
if(mvLatticePar[0]==0) mvLatticePar[0]=a;
if(mvLatticePar[1]==0) mvLatticePar[1]=a;
if(mvLatticePar[2]==0) mvLatticePar[2]=a;
}
if(mvLatticePar[3]<1e-6)
{
stringstream ss;
ss << "CIF WARNING: missing alpha value, defaulting to 90 degrees (in data block:"<<mDataBlockName<<")";
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
mvLatticePar[3]=90*DEG_TO_RAD;
}
if(mvLatticePar[4]<1e-6)
{
stringstream ss;
ss << "CIF WARNING: missing beta value, defaulting to 90 degrees (in data block:"<<mDataBlockName<<")";
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
mvLatticePar[4]=90*DEG_TO_RAD;
}
if(mvLatticePar[5]<1e-6)
{
stringstream ss;
ss << "CIF WARNING: missing gamma value, defaulting to 90 degrees (in data block:"<<mDataBlockName<<")";
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
mvLatticePar[5]=90*DEG_TO_RAD;
}
if(mvLatticePar[1]<1e-6)
{
stringstream ss;
ss << "CIF Error: missing b lattice parameter - cannot interpret structure ! (in data block:"<<mDataBlockName<<")";
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obError);
}
if(mvLatticePar[2]<1e-6)
{
stringstream ss;
ss << "CIF Error: missing c lattice parameter - cannot interpret structure ! (in data block:"<<mDataBlockName<<")";
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obError);
}
this->CalcMatrices();
}
else
{
stringstream ss;
ss << "CIF Error: missing a,b and c value - cannot interpret structure ! (in data block:"<<mDataBlockName<<")";
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obError);
}
}
void CIFData::ExtractSpacegroup()
{
map<ci_string,string>::const_iterator positem;
bool found = false;
positem=mvItem.find("_space_group_IT_number");
if(positem!=mvItem.end())
{
mSpacegroupNumberIT=CIFNumeric2Int(positem->second);
found = true;
stringstream ss;
ss << "Found spacegroup IT number:" << mSpacegroupNumberIT;
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
}
else
{
positem=mvItem.find("_symmetry_Int_Tables_number");
if(positem!=mvItem.end())
{
mSpacegroupNumberIT=CIFNumeric2Int(positem->second);
found = true;
stringstream ss;
ss << "Found spacegroup IT number (with OBSOLETE CIF #1.0 TAG):" << mSpacegroupNumberIT;
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
}
else {
positem=mvItem.find("_symmetry_group_IT_number");
if(positem!=mvItem.end())
{
mSpacegroupNumberIT=CIFNumeric2Int(positem->second);
found = true;
stringstream ss;
ss << "Found spacegroup IT number (with NON-STANDARD CIF TAG):" << mSpacegroupNumberIT;
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
}
else
mSpacegroupNumberIT=0;
}
}
positem=mvItem.find("_space_group_name_Hall");
if(positem!=mvItem.end())
{
mSpacegroupSymbolHall=positem->second;
found = true;
obErrorLog.ThrowError(__FUNCTION__, "Found spacegroup Hall symbol:"+mSpacegroupSymbolHall, obDebug);
}
else
{
positem=mvItem.find("_symmetry_space_group_name_Hall");
if(positem!=mvItem.end())
{
mSpacegroupSymbolHall=positem->second;
found = true;
obErrorLog.ThrowError(__FUNCTION__, "Found spacegroup Hall symbol (with OBSOLETE CIF #1.0 TAG):"+mSpacegroupSymbolHall, obDebug);
}
}
positem=mvItem.find("_space_group_name_H-M_alt");
if(positem!=mvItem.end())
{
mSpacegroupHermannMauguin=positem->second;
found = true;
obErrorLog.ThrowError(__FUNCTION__, "Found spacegroup Hermann-Mauguin symbol:"+mSpacegroupHermannMauguin, obDebug);
}
else
{
positem=mvItem.find("_symmetry_space_group_name_H-M");
if(positem!=mvItem.end())
{
mSpacegroupHermannMauguin=positem->second;
found = true;
obErrorLog.ThrowError(__FUNCTION__, "Found spacegroup Hermann-Mauguin symbol (with OBSOLETE CIF #1.0 TAG):"+mSpacegroupHermannMauguin, obDebug);
}
}
positem=mvItem.find("_space_group_IT_coordinate_system_code");
if(positem!=mvItem.end())
{
obErrorLog.ThrowError(__FUNCTION__, "Found spacegroup IT_coordinate_system_code:"+positem->second, obDebug);
if((mSpacegroupHermannMauguin.length()>0) && (positem->second=="1" || positem->second=="2"))
{
mSpacegroupHermannMauguin=mSpacegroupHermannMauguin+string(":")+positem->second;
}
else
{
stringstream ss;
ss << "CIF Error: found DDL2 tag _space_group.IT_coordinate_system_code ("<<positem->second<<")"<<endl
<<" but could not interpret it ! Origin choice or axis may be incorrect.";
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
}
}
mSpaceGroup = nullptr;
if (mSpacegroupSymbolHall.length() > 0) {
for(std::string::iterator pos=mSpacegroupSymbolHall.begin();pos!=mSpacegroupSymbolHall.end();)
{
if((char)(*pos)==' ') pos=mSpacegroupSymbolHall.erase(pos);
else ++pos;
}
mSpaceGroup = SpaceGroup::GetSpaceGroup(mSpacegroupSymbolHall);
}
if (mSpaceGroup == nullptr && mSpacegroupHermannMauguin.length() > 0) {
mSpaceGroup = SpaceGroup::GetSpaceGroup(mSpacegroupHermannMauguin);
}
if (mSpaceGroup == nullptr && mSpacegroupNumberIT != 0) {
mSpaceGroup = SpaceGroup::GetSpaceGroup(mSpacegroupNumberIT);
}
if (mSpaceGroup == nullptr) {
SpaceGroup *sg = new SpaceGroup();
positem=mvItem.find("_space_group_symop_operation_xyz");
if(positem==mvItem.end())
positem=mvItem.find("_symmetry_equiv_pos_as_xyz");
if(positem!=mvItem.end())
{
sg->AddTransform (positem->second);
found = true;
}
else {
for(map<set<ci_string>,map<ci_string,vector<string> > >::const_iterator loop=mvLoop.begin();
loop!=mvLoop.end();++loop)
{
map<ci_string,vector<string> >::const_iterator pos;
unsigned i, nb;
pos=loop->second.find("_space_group_symop_operation_xyz");
if (pos==loop->second.end())
pos=loop->second.find("_symmetry_equiv_pos_as_xyz");
if (pos!=loop->second.end())
{
nb=pos->second.size();
found = true;
for (i = 0; i < nb; i++)
sg->AddTransform(pos->second[i]);
break; }
}
if (found)
mSpaceGroup = SpaceGroup::Find(sg);
if (mSpaceGroup == nullptr && sg->IsValid())
mSpaceGroup = sg;
else
delete sg;
}
}
if (mSpaceGroup == nullptr)
{
stringstream ss;
ss << "CIF Error: missing spacegroup description: defaulting to P1... (in data block:"<<mDataBlockName<<")";
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
mSpaceGroup = SpaceGroup::GetSpaceGroup(1);
}
mSpacegroupSymbolHall = mSpaceGroup->GetHallName();
}
void CIFData::ExtractName()
{
map<ci_string,string>::const_iterator positem;
positem=mvItem.find("_chemical_name_systematic");
if(positem!=mvItem.end())
{
mName=positem->second;
obErrorLog.ThrowError(__FUNCTION__, "Found chemical name:"+mName, obDebug);
}
else
{
positem=mvItem.find("_chemical_name_mineral");
if(positem!=mvItem.end())
{
mName=positem->second;
obErrorLog.ThrowError(__FUNCTION__, "Found chemical name:"+mName, obDebug);
}
else
{
positem=mvItem.find("_chemical_name_structure_type");
if(positem!=mvItem.end())
{
mName=positem->second;
obErrorLog.ThrowError(__FUNCTION__, "Found chemical name:"+mName, obDebug);
}
else
{
positem=mvItem.find("_chemical_name_common");
if(positem!=mvItem.end())
{
mName=positem->second;
obErrorLog.ThrowError(__FUNCTION__, "Found chemical name:"+mName, obDebug);
}
}
}
}
positem=mvItem.find("_chemical_formula_analytical");
if(positem!=mvItem.end())
{
mFormula=positem->second;
obErrorLog.ThrowError(__FUNCTION__, "Found chemical formula:"+mFormula, obDebug);
}
else
{
positem=mvItem.find("_chemical_formula_structural");
if(positem!=mvItem.end())
{
mFormula=positem->second;
obErrorLog.ThrowError(__FUNCTION__, "Found chemical formula:"+mFormula, obDebug);
}
else
{
positem=mvItem.find("_chemical_formula_iupac");
if(positem!=mvItem.end())
{
mFormula=positem->second;
obErrorLog.ThrowError(__FUNCTION__, "Found chemical formula:"+mFormula, obDebug);
}
else
{
positem=mvItem.find("_chemical_formula_moiety");
if(positem!=mvItem.end())
{
mFormula=positem->second;
obErrorLog.ThrowError(__FUNCTION__, "Found chemical formula:"+mFormula, obDebug);
}
}
}
}
}
void CIFData::ExtractAtomicPositions()
{
map<ci_string,string>::const_iterator positem;
for(map<set<ci_string>,map<ci_string,vector<string> > >::const_iterator loop=mvLoop.begin();
loop!=mvLoop.end();++loop)
{
if(mvAtom.size()>0) break; map<ci_string,vector<string> >::const_iterator posx,posy,posz,poslabel,possymbol,posoccup;
posx=loop->second.find("_atom_site_fract_x");
posy=loop->second.find("_atom_site_fract_y");
posz=loop->second.find("_atom_site_fract_z");
unsigned int nb = 0;
if( (posx!=loop->second.end()) && (posy!=loop->second.end()) && (posz!=loop->second.end()))
{
nb=posx->second.size();
mvAtom.resize(nb);
for(unsigned int i=0;i<nb;++i)
{
mvAtom[i].mCoordFrac.resize(3);
mvAtom[i].mCoordFrac[0]=CIFNumeric2Float(posx->second[i]);
mvAtom[i].mCoordFrac[1]=CIFNumeric2Float(posy->second[i]);
mvAtom[i].mCoordFrac[2]=CIFNumeric2Float(posz->second[i]);
}
this->Fractional2CartesianCoord();
}
else
{
posx=loop->second.find("_atom_site_Cartn_x");
posy=loop->second.find("_atom_site_Cartn_y");
posz=loop->second.find("_atom_site_Cartn_z");
if( (posx!=loop->second.end()) && (posy!=loop->second.end()) && (posz!=loop->second.end()))
{
nb=posx->second.size();
mvAtom.resize(nb);
for(unsigned int i=0;i<nb;++i)
{
mvAtom[i].mCoordCart.resize(3);
mvAtom[i].mCoordCart[0]=CIFNumeric2Float(posx->second[i]);
mvAtom[i].mCoordCart[1]=CIFNumeric2Float(posy->second[i]);
mvAtom[i].mCoordCart[2]=CIFNumeric2Float(posz->second[i]);
}
this->Cartesian2FractionalCoord();
}
}
if(mvAtom.size()>0)
{ possymbol=loop->second.find("_atom_site_type_symbol");
if(possymbol!=loop->second.end())
for(unsigned int i=0;i<nb;++i)
mvAtom[i].mSymbol=possymbol->second[i];
poslabel=loop->second.find("_atom_site_label");
if(poslabel!=loop->second.end())
for(unsigned int i=0;i<nb;++i)
{
mvAtom[i].mLabel=poslabel->second[i];
if(possymbol==loop->second.end())
{ int nbc=0;
if(mvAtom[i].mLabel.size()==1)
if(isalpha(mvAtom[i].mLabel[0])) nbc=1;
if(mvAtom[i].mLabel.size()>=2)
{
if(isalpha(mvAtom[i].mLabel[0]) && isalpha(mvAtom[i].mLabel[1])) nbc=2;
else if(isalpha(mvAtom[i].mLabel[0])) nbc=1;
}
if(nbc>0) mvAtom[i].mSymbol=mvAtom[i].mLabel.substr(0,nbc);
else mvAtom[i].mSymbol="H"; }
}
posoccup=loop->second.find("_atom_site_occupancy");
if(posoccup!=loop->second.end())
for(unsigned int i=0;i<nb;++i)
{
mvAtom[i].mOccupancy=CIFNumeric2Float(posoccup->second[i]);
if( (mvAtom[i].mOccupancy <= 0.0) || (mvAtom[i].mOccupancy > 1.0) )
mvAtom[i].mOccupancy = 1.0;
}
stringstream ss;
ss << "Found "<<nb<<" atoms."<<endl;
for(unsigned int i=0;i<nb;++i)
{
ss<<mvAtom[i].mLabel<<" "<<mvAtom[i].mSymbol;
if(mvAtom[i].mCoordFrac.size()>0)
{
ss<<" , Fractional: ";
for(unsigned int j=0;j<mvAtom[i].mCoordFrac.size();++j)
ss<<mvAtom[i].mCoordFrac[j]<<" ";
}
if(mvAtom[i].mCoordCart.size()>0)
{
ss<<" , Cartesian: ";
for(unsigned int j=0;j<mvAtom[i].mCoordCart.size();++j)
ss<<mvAtom[i].mCoordCart[j]<<" ";
}
ss<<" , Occupancy= "<<mvAtom[i].mOccupancy<<endl;
}
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
}
}
}
void CIFData::ExtractBonds()
{
map<ci_string,string>::const_iterator positem;
for(map<set<ci_string>,map<ci_string,vector<string> > >::const_iterator loop=mvLoop.begin(); loop!=mvLoop.end();++loop)
{
map<ci_string,vector<string> >::const_iterator poslabel1,poslabel2,posdist;
poslabel1=loop->second.find("_geom_bond_atom_site_label_1");
poslabel2=loop->second.find("_geom_bond_atom_site_label_2");
posdist=loop->second.find("_geom_bond_distance");
if( (poslabel1!=loop->second.end()) && (poslabel2!=loop->second.end()) && (posdist!=loop->second.end()))
{
obErrorLog.ThrowError(__FUNCTION__, "Found _geom_bond* record...", obDebug);
const unsigned long nb=poslabel1->second.size();
mvBond.resize(nb);
for(unsigned int i=0;i<nb;++i)
{
mvBond[i].mLabel1=poslabel1->second[i];
mvBond[i].mLabel2=poslabel2->second[i];
mvBond[i].mDistance=CIFNumeric2Float(posdist->second[i]);
stringstream ss;
ss << " d(" << mvBond[i].mLabel1 << "-" << mvBond[i].mLabel2 << ")=" << mvBond[i].mDistance;
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
}
}
}
}
void CIFData::ExtractCharges()
{
map<ci_string,string>::const_iterator positem;
map<std::string, double> lbl2ox;
for(map<set<ci_string>, map<ci_string, vector<string> > >::const_iterator loop=mvLoop.begin(); loop!=mvLoop.end(); ++loop)
{
map<ci_string,vector<string> >::const_iterator pos_symbol, pos_ox_number, posdist;
pos_symbol =loop->second.find("_atom_type_symbol");
pos_ox_number =loop->second.find("_atom_type_oxidation_number");
if( (pos_symbol != loop->second.end()) && (pos_ox_number != loop->second.end()) )
{
obErrorLog.ThrowError(__FUNCTION__, " Found _atom_type* record with oxydation number...", obDebug);
const unsigned long nl = pos_symbol->second.size();
for(unsigned int i = 0; i < nl; i++)
{
lbl2ox[pos_symbol->second[i]] = CIFNumeric2Float(pos_ox_number->second[i]);
obErrorLog.ThrowError(__FUNCTION__, " has oxydation "+pos_ox_number->second[i], obDebug);
}
}
}
for (std::vector<CIFAtom>::iterator it = mvAtom.begin() ; it != mvAtom.end(); ++it)
{
string label = (*it).mLabel;
if( lbl2ox.count(label) > 0 )
(*it).mCharge = lbl2ox[label];
else
{
(*it).mCharge = NOCHARGE;
obErrorLog.ThrowError(__FUNCTION__, "Charge for label: "+label+" cannot be found.", obDebug);
}
}
}
void CIFData::CalcMatrices()
{
if(mvLatticePar.size()==0) return; float a,b,c,alpha,beta,gamma; float aa,bb,cc,alphaa,betaa,gammaa; float v; a=mvLatticePar[0];
b=mvLatticePar[1];
c=mvLatticePar[2];
alpha=mvLatticePar[3];
beta=mvLatticePar[4];
gamma=mvLatticePar[5];
v=sqrt(1-cos(alpha)*cos(alpha)-cos(beta)*cos(beta)-cos(gamma)*cos(gamma)
+2*cos(alpha)*cos(beta)*cos(gamma));
aa=sin(alpha)/a/v;
bb=sin(beta )/b/v;
cc=sin(gamma)/c/v;
alphaa=acos( (cos(beta )*cos(gamma)-cos(alpha))/sin(beta )/sin(gamma) );
betaa =acos( (cos(alpha)*cos(gamma)-cos(beta ))/sin(alpha)/sin(gamma) );
gammaa=acos( (cos(alpha)*cos(beta )-cos(gamma))/sin(alpha)/sin(beta ) );
mOrthMatrix[0][0]=a;
mOrthMatrix[0][1]=b*cos(gamma);
mOrthMatrix[0][2]=c*cos(beta);
mOrthMatrix[1][0]=0;
mOrthMatrix[1][1]=b*sin(gamma);
mOrthMatrix[1][2]=-c*sin(beta)*cos(alphaa);
mOrthMatrix[2][0]=0;
mOrthMatrix[2][1]=0;
mOrthMatrix[2][2]=1/cc;
float cm[3][3];
cm[0][0]=mOrthMatrix[0][0];
cm[0][1]=mOrthMatrix[0][1];
cm[0][2]=mOrthMatrix[0][2];
cm[1][0]=mOrthMatrix[1][0];
cm[1][1]=mOrthMatrix[1][1];
cm[1][2]=mOrthMatrix[1][2];
cm[2][0]=mOrthMatrix[2][0];
cm[2][1]=mOrthMatrix[2][1];
cm[2][2]=mOrthMatrix[2][2];
for(long i=0;i<3;i++)
for(long j=0;j<3;j++)
if(i==j) mOrthMatrixInvert[i][j]=1;
else mOrthMatrixInvert[i][j]=0;
for(long i=0;i<3;i++)
{
float a;
for(long j=i-1;j>=0;j--)
{
a=cm[j][i]/cm[i][i];
for(long k=0;k<3;k++) mOrthMatrixInvert[j][k] -= mOrthMatrixInvert[i][k]*a;
for(long k=0;k<3;k++) cm[j][k] -= cm[i][k]*a;
}
a=cm[i][i];
for(long k=0;k<3;k++) mOrthMatrixInvert[i][k] /= a;
for(long k=0;k<3;k++) cm[i][k] /= a;
}
stringstream ss;
ss <<"Fractional2Cartesian matrix:"<<endl
<<mOrthMatrix[0][0]<<" "<<mOrthMatrix[0][1]<<" "<<mOrthMatrix[0][2]<<endl
<<mOrthMatrix[1][0]<<" "<<mOrthMatrix[1][1]<<" "<<mOrthMatrix[1][2]<<endl
<<mOrthMatrix[2][0]<<" "<<mOrthMatrix[2][1]<<" "<<mOrthMatrix[2][2]<<endl<<endl;
ss <<"Cartesian2Fractional matrix:"<<endl
<<mOrthMatrixInvert[0][0]<<" "<<mOrthMatrixInvert[0][1]<<" "<<mOrthMatrixInvert[0][2]<<endl
<<mOrthMatrixInvert[1][0]<<" "<<mOrthMatrixInvert[1][1]<<" "<<mOrthMatrixInvert[1][2]<<endl
<<mOrthMatrixInvert[2][0]<<" "<<mOrthMatrixInvert[2][1]<<" "<<mOrthMatrixInvert[2][2];
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
}
void CIFData::f2c(float &x,float &y, float &z)
{
const float x0=x,y0=y,z0=z;
x=mOrthMatrix[0][0]*x0+mOrthMatrix[0][1]*y0+mOrthMatrix[0][2]*z0;
y=mOrthMatrix[1][0]*x0+mOrthMatrix[1][1]*y0+mOrthMatrix[1][2]*z0;
z=mOrthMatrix[2][0]*x0+mOrthMatrix[2][1]*y0+mOrthMatrix[2][2]*z0;
}
void CIFData::c2f(float &x,float &y, float &z)
{
const float x0=x,y0=y,z0=z;
x=mOrthMatrixInvert[0][0]*x0+mOrthMatrixInvert[0][1]*y0+mOrthMatrixInvert[0][2]*z0;
y=mOrthMatrixInvert[1][0]*x0+mOrthMatrixInvert[1][1]*y0+mOrthMatrixInvert[1][2]*z0;
z=mOrthMatrixInvert[2][0]*x0+mOrthMatrixInvert[2][1]*y0+mOrthMatrixInvert[2][2]*z0;
}
void CIFData::Cartesian2FractionalCoord()
{
if(mvLatticePar.size()==0) return; for(vector<CIFAtom>::iterator pos=mvAtom.begin();pos!=mvAtom.end();++pos)
{
pos->mCoordFrac.resize(3);
pos->mCoordFrac[0]=pos->mCoordCart.at(0);
pos->mCoordFrac[1]=pos->mCoordCart.at(1);
pos->mCoordFrac[2]=pos->mCoordCart.at(2);
c2f(pos->mCoordFrac[0],pos->mCoordFrac[1],pos->mCoordFrac[2]);
}
}
void CIFData::Fractional2CartesianCoord()
{
if(mvLatticePar.size()==0) return; for(vector<CIFAtom>::iterator pos=mvAtom.begin();pos!=mvAtom.end();++pos)
{
pos->mCoordCart.resize(3);
pos->mCoordCart[0]=pos->mCoordFrac.at(0);
pos->mCoordCart[1]=pos->mCoordFrac.at(1);
pos->mCoordCart[2]=pos->mCoordFrac.at(2);
f2c(pos->mCoordCart[0],pos->mCoordCart[1],pos->mCoordCart[2]);
}
}
CIF::CIF(istream &is, const bool interpret)
{
bool found_atoms=false;
while(!found_atoms)
{
mvData.clear();
this->Parse(is);
if(interpret)
for(map<string,CIFData>::iterator posd=mvData.begin();posd!=mvData.end();++posd)
{
posd->second.ExtractAll();
if(posd->second.mvAtom.size()>0) found_atoms=true;
}
}
}
bool iseol(const char c) { return ((c=='\n')||(c=='\r'));}
string CIFReadValue(istream &in,char &lastc)
{
bool vv=false; string value("");
while(!isgraph(in.peek())) in.get(lastc);
while(in.peek()=='#')
{ string tmp;
getline(in,tmp);
lastc='\r';
while(!isgraph(in.peek())) in.get(lastc);
}
if(in.peek()=='_') {
stringstream errorMsg;
errorMsg << "Warning: Trying to read a value but found a new CIF tag !";
obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obError);
return value;
}
if(in.peek()==';')
{ bool warning=!iseol(lastc);
if(warning){
stringstream errorMsg;
errorMsg << "Warning: Trying to read a SemiColonTextField but last char is not an end-of-line char !";
obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obError);
}
value="";
in.get(lastc);
while(in.peek()!=';')
{
if (in.peek() == '_') {
stringstream errorMsg;
errorMsg << "Warning: Trying to read a value but found a new CIF tag !";
obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obError);
warning = true;
break;
}
string tmp;
getline(in,tmp);
value+=tmp+" ";
}
if (!warning)
in.get(lastc);
if(vv) obErrorLog.ThrowError(__FUNCTION__, "SemiColonTextField:"+value, obDebug);
if(warning && !vv) obErrorLog.ThrowError(__FUNCTION__, "SemiColonTextField:"+value, obDebug);
return value;
}
if((in.peek()=='\'') || (in.peek()=='\"'))
{ char delim;
in.get(delim);
value="";
while(!((lastc==delim)&&(!isgraph(in.peek()))) )
{
in.get(lastc);
value+=lastc;
}
if(vv) obErrorLog.ThrowError(__FUNCTION__, "QuotedString:"+value, obDebug);
return value.substr(0,value.size()-1);
}
in>>value;
if(vv) obErrorLog.ThrowError(__FUNCTION__, "NormalValue:"+value, obDebug);
return value;
}
void CIF::Parse(istream &in)
{
bool vv=false; char lastc=' ';
string block=""; while(!in.eof())
{
while(!isgraph(in.peek()) && !in.eof()) in.get(lastc);
if(in.peek()=='#')
{ string tmp;
getline(in,tmp);
if(block=="") mvComment.push_back(tmp);
else mvData[block].mvComment.push_back(tmp);
lastc='\r';
continue;
}
if(in.peek()=='_')
{ string tag,value;
in>>tag;
for (string::size_type pos = tag.find('.'); pos != string::npos; pos = tag.find('.', ++ pos))
tag.replace(pos, 1, 1, '_');
value=CIFReadValue(in,lastc);
mvData[block].mvItem[ci_string(tag.c_str())]=value;
if(vv)
{
stringstream ss;
ss<<"New Tag:"<<tag<<" ("<<value.size()<<"):"<<value;
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
}
continue;
}
if((in.peek()=='d') || (in.peek()=='D'))
{ if(!mvData.empty()) return;
string tmp;
in>>tmp;
block=tmp.substr(5);
if(vv)
{
stringstream ss;
ss<<endl<<endl<<"NEW BLOCK DATA: !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ->"<<block<<endl<<endl;
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
}
mvData[block]=CIFData();
mvData[block].mDataBlockName=tmp;
continue;
}
if((in.peek()=='l') || (in.peek()=='L'))
{ vector<ci_string> tit;
string tmp;
in>>tmp; if(vv) obErrorLog.ThrowError(__FUNCTION__, "LOOP : "+tmp, obDebug);
while(true)
{ while(!isgraph(in.peek()) && !in.eof()) in.get(lastc);
if(in.peek()=='#')
{
getline(in,tmp);
if(block=="") mvComment.push_back(tmp);
else mvData[block].mvComment.push_back(tmp);
continue;
}
if(in.peek()!='_')
{
stringstream ss;
ss << "End of loop titles:"<<(char)in.peek();
if(vv) obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
break;
}
in>>tmp;
for (string::size_type pos = tmp.find('.'); pos != string::npos; pos = tmp.find('.', ++ pos))
tmp.replace(pos, 1, 1, '_');
tit.push_back(ci_string(tmp.c_str()));
if(vv) obErrorLog.ThrowError(__FUNCTION__, " , "+tmp, obDebug);
}
map<ci_string,vector<string> > lp;
while(true)
{
std::ios::pos_type pos0=in.tellg();
while(!isgraph(in.peek()) && !in.eof()) in.get(lastc);
if(in.eof()) break;
if(in.peek()=='_') break;
if(in.peek()=='#')
{ string tmp;
getline(in,tmp);
pos0=in.tellg();
if(block=="") mvComment.push_back(tmp);
else mvData[block].mvComment.push_back(tmp);
lastc='\r';
if(vv) obErrorLog.ThrowError(__FUNCTION__, "Comment in a loop (?):"+tmp, obDebug);
break;
}
tmp=CIFReadValue(in,lastc);
if(ci_string(tmp.c_str())=="loop_")
{ in.clear();
in.seekg(pos0,std::ios::beg);
stringstream ss;
ss <<"END OF LOOP :"<<tmp<<","<<(char)in.peek()<<","<<in.tellg();
if(vv) obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
break;
}
if(tmp.size()>=5)
if(ci_string(tmp.substr(0,5).c_str())=="data_")
{ in.clear();
in.seekg(pos0,std::ios::beg);
stringstream ss;
ss <<"END OF LOOP :"<<tmp<<","<<(char)in.peek()<<","<<in.tellg();
if(vv) obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
break;
}
for(unsigned int i=0;i<tit.size();++i)
{ if(i>0) tmp=CIFReadValue(in,lastc);
lp[tit[i]].push_back(tmp);
stringstream ss;
ss <<" LOOP VALUE #"<<lp[tit[i]].size()<<","<<i<<" : "<<tmp;
if(vv) obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
}
}
set<ci_string> stit;
for(unsigned int i=0;i<tit.size();++i) stit.insert(tit[i]);
mvData[block].mvLoop[stit]=lp;
continue;
}
string junk;
getline(in,junk);
if(junk.size()>0)
{
stringstream errorMsg;
errorMsg << "Warning: one line could not be interpreted while reading a CIF file:"<<endl
<< " -> line contents:" << junk;
obErrorLog.ThrowError(__FUNCTION__, errorMsg.str() , obWarning);
}
}
}
float CIFNumeric2Float(const string &s)
{
if((s==".") || (s=="?")) return 0.0;
float v;
const int n=sscanf(s.c_str(),"%f",&v);
if(n!=1) return 0.0;
return v;
}
int CIFNumeric2Int(const string &s)
{
if((s==".") || (s=="?")) return 0;
int v;
const int n=sscanf(s.c_str(),"%d",&v);
if(n!=1) return 0;
return v;
}
bool is_double(const std::string& s, double& r_double)
{
std::istringstream i(s);
if (i >> r_double)
return true;
r_double = 0.0;
return false;
}
CIFFormat theCIFFormat;
bool CIFisWaterOxygen(OBAtom *atom)
{
if (atom->GetAtomicNum() != OBElements::Oxygen)
return false;
int nonHydrogenCount = 0;
int hydrogenCount = 0;
FOR_NBORS_OF_ATOM(neighbor, *atom) {
if (neighbor->GetAtomicNum() != OBElements::Hydrogen)
nonHydrogenCount++;
else
hydrogenCount++;
}
return (hydrogenCount == 2 && nonHydrogenCount <= 1);
}
void CorrectFormalCharges(OBMol *mol)
{
if (!mol)
return;
FOR_ATOMS_OF_MOL(atom, *mol) {
if ((atom->GetAtomicNum() == 7 || atom->GetAtomicNum() == 15)
&& atom->GetExplicitValence() == 4) {
bool nonMetalNeighbors = true;
FOR_NBORS_OF_ATOM(neighbor, &*atom) {
switch (neighbor->GetAtomicNum()) {
case 1:
case 5: case 6: case 7: case 8: case 9:
case 14: case 15: case 16: case 17:
case 33: case 34: case 35:
case 53:
continue; default:
nonMetalNeighbors = false;
break; }
}
if (nonMetalNeighbors) atom->SetFormalCharge(+1);
}
if (atom->GetFormalCharge() != 0)
continue;
if (atom->GetExplicitDegree() != 0) {
int nonWaterBonds = 0;
FOR_NBORS_OF_ATOM(neighbor, &*atom) {
if (!CIFisWaterOxygen(&*neighbor)) {
nonWaterBonds = 1;
break;
}
}
if (nonWaterBonds)
continue; }
switch(atom->GetAtomicNum()) {
case 3: case 11: case 19: case 37: case 55: case 87:
atom->SetFormalCharge(+1);
break;
case 4: case 12: case 20: case 38: case 56: case 88:
atom->SetFormalCharge(+2);
break;
case 9: case 17: case 35: case 53: case 85:
atom->SetFormalCharge(-1);
break;
}
}
}
bool CIFFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
{
OBFormat *obformat = OBFormat::FindType("mmcif");
if (obformat) { return obformat->ReadMolecule(pOb, pConv); }
obErrorLog.ThrowError(__FUNCTION__, "mmCIF parser not found. Using CIF parser.", obDebug);
OBMol* pmol = dynamic_cast<OBMol*>(pOb);
if (pmol == nullptr)
return false;
CIF cif(*pConv->GetInStream(),true);
for(map<string,CIFData>::iterator pos=cif.mvData.begin();pos!=cif.mvData.end();++pos)
if(pos->second.mvAtom.size()>0)
{
pmol->BeginModify();
if(pos->second.mvLatticePar.size()==6)
{ string spg=pos->second.mSpacegroupSymbolHall;
if(spg=="") spg=pos->second.mSpacegroupHermannMauguin;
if(spg=="") spg=pos->second.mSpacegroupNumberIT;
if(spg=="") spg="P1";
OBUnitCell *pCell=new OBUnitCell;
pCell->SetOrigin(fileformatInput);
pCell->SetData(pos->second.mvLatticePar[0],
pos->second.mvLatticePar[1],
pos->second.mvLatticePar[2],
pos->second.mvLatticePar[3]/DEG_TO_RAD,
pos->second.mvLatticePar[4]/DEG_TO_RAD,
pos->second.mvLatticePar[5]/DEG_TO_RAD);
pCell->SetSpaceGroup(spg);
pCell->SetSpaceGroup(pos->second.mSpaceGroup);
pmol->SetData(pCell);
}
if(pos->second.mName!="") pmol->SetTitle(pos->second.mName);
else
if(pos->second.mFormula!="") pmol->SetTitle(pos->second.mFormula);
else pmol->SetTitle(pConv->GetTitle());
if(pos->second.mFormula!="") pmol->SetFormula(pos->second.mFormula);
std::map<std::string,OBAtom *> vLabelOBatom;
const unsigned int nbatoms=pos->second.mvAtom.size();
pmol->ReserveAtoms(nbatoms);
for(vector<CIFData::CIFAtom>::const_iterator posat=pos->second.mvAtom.begin();posat!=pos->second.mvAtom.end();++posat)
{
string tmpSymbol=posat->mSymbol;
unsigned int nbc=0;
if((tmpSymbol.size()==1) && isalpha(tmpSymbol[0])) nbc=1;
else if(tmpSymbol.size()>=2)
{
if(isalpha(tmpSymbol[0]) && isalpha(tmpSymbol[1])) nbc=2;
else if(isalpha(tmpSymbol[0])) nbc=1;
}
OBAtom *atom = pmol->NewAtom();
vLabelOBatom.insert(make_pair(posat->mLabel,atom));
if(tmpSymbol.size()>nbc)
{ int charge=0;
int sign=0;
for(unsigned int i=nbc;i<tmpSymbol.size();++i)
{ if(isdigit(tmpSymbol[i]) && (charge==0)) charge=atoi(tmpSymbol.substr(i,1).c_str());
if('-'==tmpSymbol[i]) sign-=1;
if('+'==tmpSymbol[i]) sign+=1;
}
if(0!=sign) {
if(charge==0) charge=1;
stringstream ss;
ss << tmpSymbol<<" / symbol="<<tmpSymbol.substr(0,nbc)<<" charge= "<<sign*charge;
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
atom->SetFormalCharge(sign*charge);
}
}
if(nbc>0) tmpSymbol=tmpSymbol.substr(0,nbc);
else tmpSymbol="C";
int atomicNum = OBElements::GetAtomicNum(tmpSymbol.c_str());
if (atomicNum == 0 && tmpSymbol[0] == 'O') {
atomicNum = 8; }
atom->SetAtomicNum(atomicNum); atom->SetType(tmpSymbol); atom->SetVector(posat->mCoordCart[0],posat->mCoordCart[1],posat->mCoordCart[2]);
if(posat->mLabel.size()>0)
{
OBPairData *label = new OBPairData;
label->SetAttribute("_atom_site_label");
label->SetValue(posat->mLabel);
label->SetOrigin(fileformatInput);
atom->SetData(label);
}
OBPairFloatingPoint *occup_data = new OBPairFloatingPoint;
occup_data->SetAttribute("_atom_site_occupancy");
occup_data->SetValue(posat->mOccupancy);
occup_data->SetOrigin(fileformatInput);
atom->SetData(occup_data);
if( posat->mCharge != NOCHARGE )
{
OBPairFloatingPoint *charge_data = new OBPairFloatingPoint;
charge_data->SetAttribute("input_charge");
charge_data->SetValue(posat->mCharge);
charge_data->SetOrigin(fileformatInput);
atom->SetData(charge_data);
}
}
if (!pConv->IsOption("b",OBConversion::INOPTIONS))
pmol->ConnectTheDots();
if (pConv->IsOption("B",OBConversion::INOPTIONS))
{
for(vector<CIFData::CIFBond>::const_iterator posbond=pos->second.mvBond.begin();posbond!=pos->second.mvBond.end();++posbond)
{ std::map<std::string,OBAtom *>::iterator posat1,posat2;
posat1=vLabelOBatom.find(posbond->mLabel1);
posat2=vLabelOBatom.find(posbond->mLabel2);
if(posat1!=vLabelOBatom.end() && posat2!=vLabelOBatom.end())
{
stringstream ss;
ss << " Adding cif bond ? "<<posat1->first<<"-"<<posat2->first;
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obDebug);
if (pmol->GetBond(posat1->second, posat2->second) == nullptr)
{
obErrorLog.ThrowError(__FUNCTION__, " :Bond added !", obDebug);
OBBond * bond=pmol->NewBond();
bond->SetBegin(posat1->second);
bond->SetEnd(posat2->second);
bond->SetBondOrder(1);
bond->SetLength(double(posbond->mDistance));
}
else obErrorLog.ThrowError(__FUNCTION__, " :Bond already present.. ", obDebug);
}
}
}
if (!pConv->IsOption("s",OBConversion::INOPTIONS) && !pConv->IsOption("b",OBConversion::INOPTIONS))
pmol->PerceiveBondOrders();
pmol->EndModify();
pmol->SetAutomaticFormalCharge(false); CorrectFormalCharges(pmol); return true;
}
obErrorLog.ThrowError(__FUNCTION__, "Problems reading a CIF file: no structure found !" , obWarning);
return(false);
}
bool CIFFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
{
OBMol* pmol = dynamic_cast<OBMol*>(pOb);
if (pmol == nullptr)
return false;
ostream &ofs = *pConv->GetOutStream();
char buffer[BUFF_SIZE];
ofs <<"# CIF file generated by openbabel "<<BABEL_VERSION<<", see https://openbabel.org"<<endl;
ofs << "data_I"<<endl;
ofs <<"_chemical_name_common '"<<pmol->GetTitle()<<"'"<<endl;
OBUnitCell *pUC = nullptr;
if (pmol->HasData(OBGenericDataType::UnitCell))
{
pUC = (OBUnitCell*)pmol->GetData(OBGenericDataType::UnitCell);
ofs << "_cell_length_a " << pUC->GetA() << endl
<< "_cell_length_b " << pUC->GetB() << endl
<< "_cell_length_c " << pUC->GetC() << endl
<< "_cell_angle_alpha " << pUC->GetAlpha() << endl
<< "_cell_angle_beta " << pUC->GetBeta() << endl
<< "_cell_angle_gamma " << pUC->GetGamma() << endl;
const SpaceGroup* pSG = pUC->GetSpaceGroup();
if (pSG != nullptr)
{
size_t n=pSG->GetHMName().find(":");
if(n==string::npos)
ofs << "_space_group_name_H-M_alt '" << pSG->GetHMName() << "'" << endl;
else
ofs << "_space_group_name_H-M_alt '" << pSG->GetHMName().substr(0,n) << "'" << endl;
ofs << "_space_group_name_Hall '" << pSG->GetHallName() << "'" << endl;
ofs << "loop_" <<endl
<< " _symmetry_equiv_pos_as_xyz" << endl;
transform3dIterator ti;
const transform3d *t = pSG->BeginTransform(ti);
while(t)
{
ofs << " " << t->DescribeAsString() << endl;
t = pSG->NextTransform(ti);
}
}
}
ofs << "loop_" << endl
<< " _atom_site_label" << endl
<< " _atom_site_type_symbol" << endl
<< " _atom_site_fract_x" << endl
<< " _atom_site_fract_y" << endl
<< " _atom_site_fract_z" << endl
<< " _atom_site_occupancy" << endl;
std::map<OBAtom*,std::string> label_table;
unsigned int i = 0;
FOR_ATOMS_OF_MOL(atom, *pmol)
{
double X, Y, Z; vector3 v = atom->GetVector();
if (pUC != nullptr) {
v = pUC->CartesianToFractional(v);
v = pUC->WrapFractionalCoordinate(v);
}
X = v.x();
Y = v.y();
Z = v.z();
string label_str;
double occup;
if (atom->HasData("_atom_site_occupancy"))
{
occup = (dynamic_cast<OBPairFloatingPoint *> (atom->GetData("_atom_site_occupancy")))->GetGenericValue();
}
else occup = 1.0;
if (atom->HasData("_atom_site_label"))
{
OBPairData *label = dynamic_cast<OBPairData *> (atom->GetData("_atom_site_label"));
label_str = label->GetValue().c_str();
}
else
{
label_str = OBElements::GetSymbol(atom->GetAtomicNum()) + to_string(i);
i++;
}
label_table[&*atom] = label_str;
snprintf(buffer, BUFF_SIZE, " %-8s %-5s %10.5f %10.5f %10.5f %8.3f\n",
label_str.c_str(), OBElements::GetSymbol(atom->GetAtomicNum()),
X, Y, Z, occup);
ofs << buffer;
}
if (pConv->IsOption("g", OBConversion::OUTOPTIONS))
{
if (pmol->NumBonds() > 0) {
obErrorLog.ThrowError(__FUNCTION__, "Writing bonds to output CIF", obDebug);
ofs << "loop_" << endl
<< " _geom_bond_atom_site_label_1" << endl
<< " _geom_bond_atom_site_label_2" << endl
<< " _geom_bond_distance" << endl
<< " _geom_bond_site_symmetry_2" << endl
<< " _ccdc_geom_bond_type" << endl;
} else {
obErrorLog.ThrowError(__FUNCTION__, "No bonds defined in molecule for CIF export", obDebug);
}
FOR_BONDS_OF_MOL(bond, *pmol)
{
std::string label_1 = label_table[bond->GetBeginAtom()];
std::string label_2 = label_table[bond->GetEndAtom()];
std::string sym_key;
int symmetry_num = 555;
if (bond->IsPeriodic()) {
OBUnitCell *box = (OBUnitCell*)pmol->GetData(OBGenericDataType::UnitCell);
vector3 begin, end_orig, end_expected, uc_direction;
begin = box->CartesianToFractional(bond->GetBeginAtom()->GetVector());
begin = box->WrapFractionalCoordinate(begin);
end_orig = box->CartesianToFractional(bond->GetEndAtom()->GetVector());
end_orig = box->WrapFractionalCoordinate(end_orig);
end_expected = box->UnwrapFractionalNear(end_orig, begin);
uc_direction = end_expected - end_orig;
std::vector<int> uc;
for (int i = 0; i < 3; ++i) {
double raw_cell = uc_direction[i];
uc.push_back(static_cast<int>(lrint(raw_cell)));
}
symmetry_num += 100*uc[0] + 10*uc[1] + 1*uc[2]; }
if (symmetry_num == 555)
{
sym_key = ".";
}
else
{
stringstream ss;
ss << "1_" << symmetry_num;
sym_key = ss.str();
}
std::string bond_type;
int bond_order = bond->GetBondOrder();
switch (bond_order)
{
case 1:
bond_type = "S";
break;
case 2:
bond_type = "D";
break;
case 3:
bond_type = "T";
break;
case 5: bond_type = "A"; break;
default:
stringstream ss;
ss << "Unexpected bond order " << bond_order
<< " for bond" << label_1 << "-" << label_2 << std::endl
<< "Defaulting to single bond.";
obErrorLog.ThrowError(__FUNCTION__, ss.str(), obWarning);
bond_type = "S";
}
snprintf(buffer, BUFF_SIZE, " %-7s%-7s%10.5f%7s%4s\n",
label_1.c_str(), label_2.c_str(),
bond->GetLength(), sym_key.c_str(),
bond_type.c_str());
ofs << buffer;
}
}
return true;
}}