#include <openbabel/babelconfig.h>
#include <openbabel/obmolecformat.h>
#include <openbabel/math/align.h>
#include <algorithm>
using namespace std;
namespace OpenBabel
{
class ConfabReport : public OBMoleculeFormat
{
public:
ConfabReport()
{
OBConversion::RegisterFormat("confabreport",this);
const double binvals_arr[8] = {0.2, 0.5, 1.0, 1.5, 2.0, 3.0, 4.0, 100.0};
binvals = vector<double>(binvals_arr, binvals_arr+8);
cutoff_passed = 0;
N = 0;
oldtitle = "";
rmsd_cutoff = 0.5;
}
virtual const char* Description() {
return
"Confab report format\n"
"Assess performance of a conformer generator relative to a set of reference structures\n\n"
"Once a file containing conformers has been generated by :ref:`Confab`,\n"
"the result can be compared to the original input structures or a set\n"
"of reference structures using this output format.\n\n"
"Conformers are matched with reference structures using the molecule\n"
"title. For every conformer, there should be a reference structure\n"
"(but not necessarily vice versa).\n\n"
"Further information is available in the section describing :ref:`Confab`.\n\n"
"Write Options e.g. -xr 0.3 \n"
" f <filename> File containing reference structures\n"
" r <rmsd> RMSD cutoff (default 0.5 Angstrom)\n"
" The number of structures with conformers within this RMSD cutoff\n"
" of the reference will be reported.\n"
;
};
virtual unsigned int Flags()
{
return NOTREADABLE;
};
virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);
private:
void WriteOutput(ostream&);
ifstream rfs;
const char *rfilename;
OBConversion rconv;
vector<double> binvals;
OBAlign align;
OBMol rmol;
unsigned int cutoff_passed;
unsigned int N;
string oldtitle;
vector<double> rmsd;
double rmsd_cutoff;
};
ConfabReport theConfabReport;
void ConfabReport::WriteOutput(ostream& ofs)
{
if (!rmsd.empty()) {
sort(rmsd.begin(), rmsd.end());
ofs << "..minimum rmsd = " << rmsd.at(0) << "\n";
int bin_idx = 0;
vector<int> bins(binvals.size());
for(vector<double>::iterator it=rmsd.begin(); it!=rmsd.end(); ++it) {
while (*it > binvals[bin_idx])
bin_idx++;
bins[bin_idx]++;
}
vector<int> cumbins(bins);
for(int i=1; i<8; ++i)
cumbins[i] += cumbins[i-1];
ofs << "..confs less than cutoffs: ";
ofs << binvals[0];
for (int i=1; i < binvals.size(); i++)
ofs << " " << binvals[i];
ofs << "\n";
ofs << ".." << cumbins[0];
for (int i=1; i < cumbins.size(); i++)
ofs << " " << cumbins[i];
ofs << "\n";
ofs << "..cutoff (" << rmsd_cutoff << ") passed = ";
if (rmsd.at(0) <= rmsd_cutoff) {
ofs << " Yes\n";
cutoff_passed++;
}
else
ofs << " No\n";
ofs << "\n";
}
}
bool ConfabReport::WriteMolecule(OBBase* pOb, OBConversion* pConv)
{
OBMol* pmol = dynamic_cast<OBMol*>(pOb);
if (pmol == nullptr)
return false;
ostream& ofs = *pConv->GetOutStream();
bool firstMol = pConv->GetOutputIndex()==1;
if(firstMol) {
rfilename = pConv->IsOption("f");
if (!rfilename) {
cerr << "Need to specify a reference file\n";
return false;
}
OBFormat *prFormat = nullptr;
if (!prFormat) {
prFormat = rconv.FormatFromExt(rfilename);
if (!prFormat) {
cerr << "Cannot read reference format!" << endl;
return false;
}
}
rfs.open(rfilename);
if (!rfs) {
cerr << "Cannot read reference file!" << endl;
return false;
}
const char* pp = pConv->IsOption("r");
if (pp)
rmsd_cutoff = atof(pp);
rconv.SetInStream(&rfs);
rconv.SetInFormat(prFormat);
ofs << "**Generating Confab Report " << endl;
ofs << "..Reference file = " << rfilename << endl;
ofs << "..Conformer file = " << pConv->GetInFilename() << "\n\n";
}
string title = pmol->GetTitle();
if (oldtitle != title) {
if (!firstMol)
ofs << "..number of confs = " << rmsd.size() << "\n";
WriteOutput(ofs);
bool success = rconv.Read(&rmol);
if (!success) return false;
N++;
while (rmol.GetTitle() != title) {
ofs << "..Molecule " << N
<< "\n..title = " << rmol.GetTitle()
<< "\n..number of confs = 0\n";
N++;
success = rconv.Read(&rmol);
if (!success) return false;
}
align.SetRefMol(rmol);
ofs << "..Molecule " << N << "\n..title = " << rmol.GetTitle() << "\n";
rmsd.clear();
}
align.SetTargetMol(*pmol);
align.Align();
rmsd.push_back(align.GetRMSD());
oldtitle = title;
if (pConv->IsLast()) {
ofs << "..number of confs = " << rmsd.size() << "\n";
WriteOutput(ofs);
ofs << "\n**Summary\n..number of molecules = " << N
<< "\n..less than cutoff(" << rmsd_cutoff << ") = " << cutoff_passed << "\n";
}
return true;
}
}