#include <openbabel/babelconfig.h>
#include <openbabel/obmolecformat.h>
#include <openbabel/mol.h>
#include <iostream>
using namespace std;
namespace OpenBabel
{
class GenBankFormat : public OBMoleculeFormat
{
public:
GenBankFormat()
{ OBConversion::RegisterFormat("gen", this, "chemical/x-genbank");
OBConversion::RegisterFormat("embl", this);
OBConversion::RegisterFormat("ddbj", this);
OBConversion::RegisterOptionParam("s", this);
OBConversion::RegisterOptionParam("b", this);
}
virtual const char* Description() {
return
"GenBank, DDBJ, EMBL Flat File format\n"
"Read Options e.g. -as\n"
" s Output single bonds only\n"
" b Disable bonding entirely\n\n";
}
virtual const char* SpecificationURL()
{ return "http://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html";}
virtual const char* GetMIMEType()
{ return "chemical/x-genbank"; }
virtual unsigned int Flags()
{ return NOTWRITABLE | READONEONLY; }
virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv);
enum SequenceType
{
UnknownSequence,
ProteinSequence,
DNASequence,
RNASequence,
MAXSequence
};
};
GenBankFormat theGenBankFormat;
bool ReadFASTASequence(OBMol * pmol, int seq_type, std::istream * in, bool create_bonds, bool bond_orders,
bool singleStrand, const char *turns = 0);
bool GenBankFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
{
OBMol* pmol = pOb->CastAndClear<OBMol>();
if (pmol == 0)
return false;
std::istream * in = pConv->GetInStream();
pmol->BeginModify();
std::string line;
SequenceType sequence_type = UnknownSequence;
getline( * in, line);
while (!in->eof())
{ if (!line.compare(0, 6, "LOCUS", 6) || !line.compare(0, 2, "ID ", 2))
{
if (sequence_type == GenBankFormat::UnknownSequence)
{ if (line.find("RNA") != std::string::npos)
sequence_type = GenBankFormat::RNASequence;
else if (line.find("DNA") != std::string::npos)
sequence_type = GenBankFormat::DNASequence;
}
}
else if (!line.compare(0, 6, "DEFINITION", 6) || !line.compare(0, 2, "DE ", 2))
{
if (sequence_type == GenBankFormat::UnknownSequence)
{ if (line.find("RNA") != std::string::npos)
sequence_type = GenBankFormat::RNASequence;
else if (line.find("DNA") != std::string::npos)
sequence_type = GenBankFormat::DNASequence;
else if (line.find("gene") != std::string::npos)
sequence_type = GenBankFormat::DNASequence;
}
if (pmol->GetTitle()[0] == 0)
{
std::string::size_type fc = line.find(' ');
while (fc != std::string::npos && strchr(" \t\n\r", line[fc]))
++ fc;
if (fc != std::string::npos)
pmol->SetTitle( & (line.c_str()[fc]) );
}
}
else if (!line.compare(0, 6, "ORIGIN", 6) || !line.compare(0, 2, "SQ ", 2))
break;
getline( * in, line);
}
if (sequence_type == GenBankFormat::UnknownSequence)
sequence_type = GenBankFormat::DNASequence;
bool rv = ReadFASTASequence(pmol, sequence_type, in,
!pConv->IsOption("b",OBConversion::INOPTIONS), !pConv->IsOption("s",OBConversion::INOPTIONS), false);
pmol->EndModify();
return rv;
}
}