Commit d0b7e21c by maarten

various fixes in pdb2cif

git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@179 a1961a4f-ab94-4bcc-80e8-33b5a54de466
parent b1de54f8
// copyright // copyright
#pragma once
#include <unordered_map> #include <unordered_map>
#include "cif++/Structure.h" #include "cif++/Structure.h"
......
...@@ -18,7 +18,9 @@ struct ValidateCategory; ...@@ -18,7 +18,9 @@ struct ValidateCategory;
class ValidationError : public std::exception class ValidationError : public std::exception
{ {
public: public:
ValidationError(const std::string& msg) : mMsg(msg) {} ValidationError(const std::string& msg);
ValidationError(const std::string& cat, const std::string& item,
const std::string& msg);
const char* what() const noexcept { return mMsg.c_str(); } const char* what() const noexcept { return mMsg.c_str(); }
std::string mMsg; std::string mMsg;
}; };
......
...@@ -17,6 +17,18 @@ extern int VERBOSE; ...@@ -17,6 +17,18 @@ extern int VERBOSE;
namespace cif namespace cif
{ {
ValidationError::ValidationError(const string& msg)
: mMsg(msg)
{
}
ValidationError::ValidationError(const string& cat, const string& item, const string& msg)
: mMsg("When validating _" + cat + '.' + item + ": " + msg)
{
}
// --------------------------------------------------------------------
DDL_PrimitiveType mapToPrimitiveType(const string& s) DDL_PrimitiveType mapToPrimitiveType(const string& s)
{ {
DDL_PrimitiveType result; DDL_PrimitiveType result;
...@@ -137,18 +149,15 @@ void ValidateItem::setParent(ValidateItem* parent) ...@@ -137,18 +149,15 @@ void ValidateItem::setParent(ValidateItem* parent)
void ValidateItem::operator()(string value) const void ValidateItem::operator()(string value) const
{ {
if (VERBOSE >= 4)
cout << "validating '" << value << "' for '" << mTag << "'" << endl;
if (not value.empty() and value != "?" and value != ".") if (not value.empty() and value != "?" and value != ".")
{ {
if (mType != nullptr and not boost::regex_match(value, mType->mRx)) if (mType != nullptr and not boost::regex_match(value, mType->mRx))
throw ValidationError("Value '" + value + "' does not match type expression for type " + mType->mName + " in item " + mTag); throw ValidationError(mCategory->mName, mTag, "Value '" + value + "' does not match type expression for type " + mType->mName);
if (not mEnums.empty()) if (not mEnums.empty())
{ {
if (mEnums.count(value) == 0) if (mEnums.count(value) == 0)
throw ValidationError("Value '" + value + "' is not in the list of allowed values for item " + mTag); throw ValidationError(mCategory->mName, mTag, "Value '" + value + "' is not in the list of allowed values");
} }
} }
} }
......
...@@ -260,7 +260,7 @@ const Compound* CompoundFactory::create(std::string id) ...@@ -260,7 +260,7 @@ const Compound* CompoundFactory::create(std::string id)
if (type == "single") value = 1; if (type == "single") value = 1;
else if (type == "double") value = 2; else if (type == "double") value = 2;
else if (type == "triple") value = 3; else if (type == "triple") value = 3;
else if (type == "deloc" or type == "aromat") else if (type == "deloc" or type == "aromat" or type == "aromatic")
value = 1.5; value = 1.5;
else else
{ {
......
...@@ -283,4 +283,42 @@ float CalculateEDIA(const Atom& atom, const clipper::Xmap<float>& xmap, ...@@ -283,4 +283,42 @@ float CalculateEDIA(const Atom& atom, const clipper::Xmap<float>& xmap,
return result; return result;
} }
//
//// --------------------------------------------------------------------
//// test function
//
//double shellIntegration(float start, float end)
//{
// double r = 1.35;
//
// PointWeightFunction w(Point(), r);
//
// double volume = 0, positief = 0, negatief = 0;
// const size_t N = 10000;
//
// double h = 1.0 / N;
//
// for (size_t i = 0; i < N; ++i)
// {
// double x = start + i * h;
// auto y = w(Point(x * r, 0, 0));
//
// volume += x * y;
// if (y > 0)
// positief += x * y;
// if (y < 0)
// negatief += x * y;
// }
//
// volume *= 2 * kPI * h;
// positief *= 2 * kPI * h;
// negatief *= 2 * kPI * h;
//
// cout << "Volume: " << volume << endl
// << "Positief: " << positief << endl
// << "Negatief: " << negatief << endl;
//
// return volume;
//}
} }
...@@ -387,9 +387,9 @@ class PDBFileParser ...@@ -387,9 +387,9 @@ class PDBFileParser
string PDBIDCode; string PDBIDCode;
char chainID; char chainID;
int seqBegin; int seqBegin;
char insertBegin; char insertBegin = ' ';
int seqEnd; int seqEnd;
char insertEnd; char insertEnd = ' ';
string database; string database;
string dbAccession; string dbAccession;
string dbIdCode; string dbIdCode;
...@@ -820,6 +820,7 @@ class PDBFileParser ...@@ -820,6 +820,7 @@ class PDBFileParser
cif::Datablock* mDatablock = nullptr; cif::Datablock* mDatablock = nullptr;
string mStructureId; string mStructureId;
string mModelTypeDetails;
string mOriginalDate; string mOriginalDate;
string mExpMethod = "X-RAY DIFFRACTION"; string mExpMethod = "X-RAY DIFFRACTION";
int mCitationAuthorNr = 1, mCitationEditorNr = 1; int mCitationAuthorNr = 1, mCitationEditorNr = 1;
...@@ -1232,7 +1233,8 @@ void PDBFileParser::ParseTitle() ...@@ -1232,7 +1233,8 @@ void PDBFileParser::ParseTitle()
if (mRec->is("CAVEAT")) // 1 - 6 Record name "CAVEAT" if (mRec->is("CAVEAT")) // 1 - 6 Record name "CAVEAT"
{ // 9 - 10 Continuation continuation Allows concatenation of multiple records. { // 9 - 10 Continuation continuation Allows concatenation of multiple records.
getCategory("database_PDB_caveat")->emplace({ getCategory("database_PDB_caveat")->emplace({
{ "id", vS(12, 15) }, // 12 - 15 IDcode idCode PDB ID code of this datablock. // { "id", vS(12, 15) }, // 12 - 15 IDcode idCode PDB ID code of this datablock.
{ "id", 1 }, // 12 - 15 IDcode idCode PDB ID code of this datablock.
{ "text", string{mRec->vS(20) } } // 20 - 79 String comment Free text giving the reason for the CAVEAT. { "text", string{mRec->vS(20) } } // 20 - 79 String comment Free text giving the reason for the CAVEAT.
}); });
...@@ -1400,8 +1402,7 @@ void PDBFileParser::ParseTitle() ...@@ -1400,8 +1402,7 @@ void PDBFileParser::ParseTitle()
// MDLTYP // MDLTYP
if (mRec->is("MDLTYP")) if (mRec->is("MDLTYP"))
{ {
if (VERBOSE) mModelTypeDetails = vS(11);
Error("skipping unimplemented MDLTYP record");
GetNextRecord(); GetNextRecord();
} }
...@@ -1519,7 +1520,8 @@ void PDBFileParser::ParseCitation(const string& id) ...@@ -1519,7 +1520,8 @@ void PDBFileParser::ParseCitation(const string& id)
string auth, titl, edit, publ, refn, pmid, doi; string auth, titl, edit, publ, refn, pmid, doi;
string pubname, volume, astm, country, issn, csd; string pubname, volume, astm, country, issn, csd;
int pageFirst = 0, pageLast = 0, year = 0; string pageFirst;
int year = 0;
auto extend = [](string& s, const string& p) auto extend = [](string& s, const string& p)
{ {
...@@ -1541,7 +1543,7 @@ void PDBFileParser::ParseCitation(const string& id) ...@@ -1541,7 +1543,7 @@ void PDBFileParser::ParseCitation(const string& id)
extend(pubname, vS(20, 47)); extend(pubname, vS(20, 47));
if (vS(50, 51) == "V.") if (vS(50, 51) == "V.")
volume = ba::trim_copy(vS(52, 55)); volume = ba::trim_copy(vS(52, 55));
pageFirst = vI(57, 61); pageFirst = vS(57, 61);
year = vI(63, 66); year = vI(63, 66);
} }
else else
...@@ -1568,8 +1570,7 @@ void PDBFileParser::ParseCitation(const string& id) ...@@ -1568,8 +1570,7 @@ void PDBFileParser::ParseCitation(const string& id)
{ "title", titl }, { "title", titl },
{ "journal_abbrev", pubname }, { "journal_abbrev", pubname },
{ "journal_volume", volume }, { "journal_volume", volume },
{ "page_first", pageFirst > 0 ? to_string(pageFirst) : "" }, { "page_first", pageFirst },
{ "page_last", pageLast > 0 ? to_string(pageLast) : "" },
{ "year", year > 0 ? to_string(year) : "" }, { "year", year > 0 ? to_string(year) : "" },
{ "journal_id_ASTM", astm }, { "journal_id_ASTM", astm },
{ "country", country }, { "country", country },
...@@ -1889,8 +1890,8 @@ void PDBFileParser::ParseRemarks() ...@@ -1889,8 +1890,8 @@ void PDBFileParser::ParseRemarks()
string res = vS(16, 18); string res = vS(16, 18);
char chain = vC(20); char chain = vC(20);
int seq = vI(21, 25); int seq = vI(21, 24);
char iCode = vC(26); char iCode = vC(25);
vector<string> atoms; vector<string> atoms;
string atomStr = mRec->vS(29); string atomStr = mRec->vS(29);
...@@ -2052,8 +2053,8 @@ void PDBFileParser::ParseRemarks() ...@@ -2052,8 +2053,8 @@ void PDBFileParser::ParseRemarks()
int model = vI(11, 13); int model = vI(11, 13);
string resNam1 = vS(15, 17); string resNam1 = vS(15, 17);
string chainID1 { vC(19) }; string chainID1 { vC(19) };
int seqNum1 = vI(20, 24); int seqNum1 = vI(20, 23);
string iCode1 { vC(25) }; string iCode1 { vC(24) };
string alt1 = vS(30, 30); string alt1 = vS(30, 30);
string atm1 = vS(26, 29); string atm1 = vS(26, 29);
...@@ -2106,8 +2107,8 @@ void PDBFileParser::ParseRemarks() ...@@ -2106,8 +2107,8 @@ void PDBFileParser::ParseRemarks()
int model = vI(11, 13); int model = vI(11, 13);
string resNam = vS(15, 17); string resNam = vS(15, 17);
string chainID { vC(19) }; string chainID { vC(19) };
int seqNum = vI(20, 24); int seqNum = vI(20, 23);
string iCode { vC(25) }; string iCode { vC(24) };
if (iCode == " ") if (iCode == " ")
iCode.clear(); iCode.clear();
...@@ -2154,8 +2155,8 @@ void PDBFileParser::ParseRemarks() ...@@ -2154,8 +2155,8 @@ void PDBFileParser::ParseRemarks()
int model = vI(11, 13); int model = vI(11, 13);
string resNam = vS(15, 17); string resNam = vS(15, 17);
string chainID { vC(19) }; string chainID { vC(19) };
int seqNum = vI(20, 24); int seqNum = vI(20, 23);
string iCode { vC(25) }; string iCode { vC(24) };
if (iCode == " ") if (iCode == " ")
iCode.clear(); iCode.clear();
...@@ -2229,8 +2230,8 @@ void PDBFileParser::ParseRemarks() ...@@ -2229,8 +2230,8 @@ void PDBFileParser::ParseRemarks()
int model = vI(11, 13); int model = vI(11, 13);
string resNam = vS(15, 17); string resNam = vS(15, 17);
string chainID { vC(19) }; string chainID { vC(19) };
int seqNum = vI(20, 24); int seqNum = vI(20, 23);
string iCode { vC(25) }; string iCode { vC(24) };
if (iCode == " ") if (iCode == " ")
iCode.clear(); iCode.clear();
...@@ -3530,7 +3531,7 @@ void PDBFileParser::ConstructEntities() ...@@ -3530,7 +3531,7 @@ void PDBFileParser::ConstructEntities()
if (not dbref.database.empty()) if (not dbref.database.empty())
{ {
auto insToStr = [](char i) -> string { return i == ' ' ? "" : string{ i }; }; auto insToStr = [](char i) -> string { return i == ' ' or not isprint(i) ? "" : string{ i }; };
auto& pdbxPolySeqScheme = *getCategory("pdbx_poly_seq_scheme"); auto& pdbxPolySeqScheme = *getCategory("pdbx_poly_seq_scheme");
...@@ -3724,7 +3725,8 @@ void PDBFileParser::ConstructEntities() ...@@ -3724,7 +3725,8 @@ void PDBFileParser::ConstructEntities()
getCategory("struct")->emplace({ getCategory("struct")->emplace({
{ "entry_id", mStructureId }, { "entry_id", mStructureId },
{ "title", ba::join(structTitle, ", ") }, { "title", ba::join(structTitle, ", ") },
{ "pdbx_descriptor", ba::join(structDescription, ", ") } { "pdbx_descriptor", ba::join(structDescription, ", ") },
{ "pdbx_model_type_details", mModelTypeDetails }
}); });
} }
...@@ -4030,7 +4032,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4030,7 +4032,7 @@ void PDBFileParser::ConstructEntities()
{ "label_asym_id", asymId }, { "label_asym_id", asymId },
{ "label_comp_id", compId }, // TODO: change to correct comp_id { "label_comp_id", compId }, // TODO: change to correct comp_id
{ "label_seq_id", seqNr > 0 ? to_string(seqNr) : "" }, { "label_seq_id", seqNr > 0 ? to_string(seqNr) : "" },
{ "labelAtomId", atom } { "label_atom_id", atom }
}); });
} }
...@@ -5246,7 +5248,15 @@ void ReadPDBFile(istream& pdbFile, cif::File& cifFile) ...@@ -5246,7 +5248,15 @@ void ReadPDBFile(istream& pdbFile, cif::File& cifFile)
cifFile.loadDictionary("mmcif_pdbx"); cifFile.loadDictionary("mmcif_pdbx");
p.Parse(pdbFile, cifFile); try
{
p.Parse(pdbFile, cifFile);
}
catch (const exception& ex)
{
cerr << "Error parsing PDB file" << endl;
throw;
}
cifFile.validate(); cifFile.validate();
} }
...@@ -221,6 +221,7 @@ PeptideDB& PeptideDB::Instance() ...@@ -221,6 +221,7 @@ PeptideDB& PeptideDB::Instance()
} }
PeptideDB::PeptideDB() PeptideDB::PeptideDB()
: mImpl(nullptr)
{ {
const char* clibdMon = getenv("CLIBD_MON"); const char* clibdMon = getenv("CLIBD_MON");
if (clibdMon == nullptr) if (clibdMon == nullptr)
......
...@@ -42,7 +42,7 @@ void FileImpl::load(fs::path p) ...@@ -42,7 +42,7 @@ void FileImpl::load(fs::path p)
throw runtime_error("No such file: " + p.string()); throw runtime_error("No such file: " + p.string());
io::filtering_stream<io::input> in; io::filtering_stream<io::input> in;
string ext; string ext = p.extension().string();
if (p.extension() == ".bz2") if (p.extension() == ".bz2")
{ {
...@@ -57,43 +57,51 @@ void FileImpl::load(fs::path p) ...@@ -57,43 +57,51 @@ void FileImpl::load(fs::path p)
in.push(inFile); in.push(inFile);
// OK, we've got the file, now create a protein try
if (ext == ".cif")
mData.load(in);
else if (ext == ".pdb" or ext == ".ent")
ReadPDBFile(in, mData);
else
{ {
try // OK, we've got the file, now create a protein
{ if (ext == ".cif")
if (VERBOSE)
cerr << "unrecognized file extension, trying cif" << endl;
mData.load(in); mData.load(in);
} else if (ext == ".pdb" or ext == ".ent")
catch (const cif::CifParserError& e)
{
if (VERBOSE)
cerr << "Not cif, trying plain old PDB" << endl;
// pffft...
in.reset();
if (inFile.is_open())
inFile.seekg(0);
else
inFile.open(p, ios_base::in | ios::binary);
if (p.extension() == ".bz2")
in.push(io::bzip2_decompressor());
else if (p.extension() == ".gz")
in.push(io::gzip_decompressor());
in.push(inFile);
ReadPDBFile(in, mData); ReadPDBFile(in, mData);
else
{
try
{
if (VERBOSE)
cerr << "unrecognized file extension, trying cif" << endl;
mData.load(in);
}
catch (const cif::CifParserError& e)
{
if (VERBOSE)
cerr << "Not cif, trying plain old PDB" << endl;
// pffft...
in.reset();
if (inFile.is_open())
inFile.seekg(0);
else
inFile.open(p, ios_base::in | ios::binary);
if (p.extension() == ".bz2")
in.push(io::bzip2_decompressor());
else if (p.extension() == ".gz")
in.push(io::gzip_decompressor());
in.push(inFile);
ReadPDBFile(in, mData);
}
} }
} }
catch (const exception& ex)
{
cerr << "Failed trying to load file " << p << endl;
throw;
}
// Yes, we've parsed the data. Now locate the datablock. // Yes, we've parsed the data. Now locate the datablock.
mDb = &mData.firstDatablock(); mDb = &mData.firstDatablock();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment