Commit d0b7e21c by maarten

various fixes in pdb2cif

git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@179 a1961a4f-ab94-4bcc-80e8-33b5a54de466
parent b1de54f8
// copyright
#pragma once
#include <unordered_map>
#include "cif++/Structure.h"
......
......@@ -18,7 +18,9 @@ struct ValidateCategory;
class ValidationError : public std::exception
{
public:
ValidationError(const std::string& msg) : mMsg(msg) {}
ValidationError(const std::string& msg);
ValidationError(const std::string& cat, const std::string& item,
const std::string& msg);
const char* what() const noexcept { return mMsg.c_str(); }
std::string mMsg;
};
......
......@@ -17,6 +17,18 @@ extern int VERBOSE;
namespace cif
{
ValidationError::ValidationError(const string& msg)
: mMsg(msg)
{
}
ValidationError::ValidationError(const string& cat, const string& item, const string& msg)
: mMsg("When validating _" + cat + '.' + item + ": " + msg)
{
}
// --------------------------------------------------------------------
DDL_PrimitiveType mapToPrimitiveType(const string& s)
{
DDL_PrimitiveType result;
......@@ -137,18 +149,15 @@ void ValidateItem::setParent(ValidateItem* parent)
void ValidateItem::operator()(string value) const
{
if (VERBOSE >= 4)
cout << "validating '" << value << "' for '" << mTag << "'" << endl;
if (not value.empty() and value != "?" and value != ".")
{
if (mType != nullptr and not boost::regex_match(value, mType->mRx))
throw ValidationError("Value '" + value + "' does not match type expression for type " + mType->mName + " in item " + mTag);
throw ValidationError(mCategory->mName, mTag, "Value '" + value + "' does not match type expression for type " + mType->mName);
if (not mEnums.empty())
{
if (mEnums.count(value) == 0)
throw ValidationError("Value '" + value + "' is not in the list of allowed values for item " + mTag);
throw ValidationError(mCategory->mName, mTag, "Value '" + value + "' is not in the list of allowed values");
}
}
}
......
......@@ -260,7 +260,7 @@ const Compound* CompoundFactory::create(std::string id)
if (type == "single") value = 1;
else if (type == "double") value = 2;
else if (type == "triple") value = 3;
else if (type == "deloc" or type == "aromat")
else if (type == "deloc" or type == "aromat" or type == "aromatic")
value = 1.5;
else
{
......
......@@ -283,4 +283,42 @@ float CalculateEDIA(const Atom& atom, const clipper::Xmap<float>& xmap,
return result;
}
//
//// --------------------------------------------------------------------
//// test function
//
//double shellIntegration(float start, float end)
//{
// double r = 1.35;
//
// PointWeightFunction w(Point(), r);
//
// double volume = 0, positief = 0, negatief = 0;
// const size_t N = 10000;
//
// double h = 1.0 / N;
//
// for (size_t i = 0; i < N; ++i)
// {
// double x = start + i * h;
// auto y = w(Point(x * r, 0, 0));
//
// volume += x * y;
// if (y > 0)
// positief += x * y;
// if (y < 0)
// negatief += x * y;
// }
//
// volume *= 2 * kPI * h;
// positief *= 2 * kPI * h;
// negatief *= 2 * kPI * h;
//
// cout << "Volume: " << volume << endl
// << "Positief: " << positief << endl
// << "Negatief: " << negatief << endl;
//
// return volume;
//}
}
......@@ -387,9 +387,9 @@ class PDBFileParser
string PDBIDCode;
char chainID;
int seqBegin;
char insertBegin;
char insertBegin = ' ';
int seqEnd;
char insertEnd;
char insertEnd = ' ';
string database;
string dbAccession;
string dbIdCode;
......@@ -820,6 +820,7 @@ class PDBFileParser
cif::Datablock* mDatablock = nullptr;
string mStructureId;
string mModelTypeDetails;
string mOriginalDate;
string mExpMethod = "X-RAY DIFFRACTION";
int mCitationAuthorNr = 1, mCitationEditorNr = 1;
......@@ -1232,7 +1233,8 @@ void PDBFileParser::ParseTitle()
if (mRec->is("CAVEAT")) // 1 - 6 Record name "CAVEAT"
{ // 9 - 10 Continuation continuation Allows concatenation of multiple records.
getCategory("database_PDB_caveat")->emplace({
{ "id", vS(12, 15) }, // 12 - 15 IDcode idCode PDB ID code of this datablock.
// { "id", vS(12, 15) }, // 12 - 15 IDcode idCode PDB ID code of this datablock.
{ "id", 1 }, // 12 - 15 IDcode idCode PDB ID code of this datablock.
{ "text", string{mRec->vS(20) } } // 20 - 79 String comment Free text giving the reason for the CAVEAT.
});
......@@ -1400,8 +1402,7 @@ void PDBFileParser::ParseTitle()
// MDLTYP
if (mRec->is("MDLTYP"))
{
if (VERBOSE)
Error("skipping unimplemented MDLTYP record");
mModelTypeDetails = vS(11);
GetNextRecord();
}
......@@ -1519,7 +1520,8 @@ void PDBFileParser::ParseCitation(const string& id)
string auth, titl, edit, publ, refn, pmid, doi;
string pubname, volume, astm, country, issn, csd;
int pageFirst = 0, pageLast = 0, year = 0;
string pageFirst;
int year = 0;
auto extend = [](string& s, const string& p)
{
......@@ -1541,7 +1543,7 @@ void PDBFileParser::ParseCitation(const string& id)
extend(pubname, vS(20, 47));
if (vS(50, 51) == "V.")
volume = ba::trim_copy(vS(52, 55));
pageFirst = vI(57, 61);
pageFirst = vS(57, 61);
year = vI(63, 66);
}
else
......@@ -1568,8 +1570,7 @@ void PDBFileParser::ParseCitation(const string& id)
{ "title", titl },
{ "journal_abbrev", pubname },
{ "journal_volume", volume },
{ "page_first", pageFirst > 0 ? to_string(pageFirst) : "" },
{ "page_last", pageLast > 0 ? to_string(pageLast) : "" },
{ "page_first", pageFirst },
{ "year", year > 0 ? to_string(year) : "" },
{ "journal_id_ASTM", astm },
{ "country", country },
......@@ -1889,8 +1890,8 @@ void PDBFileParser::ParseRemarks()
string res = vS(16, 18);
char chain = vC(20);
int seq = vI(21, 25);
char iCode = vC(26);
int seq = vI(21, 24);
char iCode = vC(25);
vector<string> atoms;
string atomStr = mRec->vS(29);
......@@ -2052,8 +2053,8 @@ void PDBFileParser::ParseRemarks()
int model = vI(11, 13);
string resNam1 = vS(15, 17);
string chainID1 { vC(19) };
int seqNum1 = vI(20, 24);
string iCode1 { vC(25) };
int seqNum1 = vI(20, 23);
string iCode1 { vC(24) };
string alt1 = vS(30, 30);
string atm1 = vS(26, 29);
......@@ -2106,8 +2107,8 @@ void PDBFileParser::ParseRemarks()
int model = vI(11, 13);
string resNam = vS(15, 17);
string chainID { vC(19) };
int seqNum = vI(20, 24);
string iCode { vC(25) };
int seqNum = vI(20, 23);
string iCode { vC(24) };
if (iCode == " ")
iCode.clear();
......@@ -2154,8 +2155,8 @@ void PDBFileParser::ParseRemarks()
int model = vI(11, 13);
string resNam = vS(15, 17);
string chainID { vC(19) };
int seqNum = vI(20, 24);
string iCode { vC(25) };
int seqNum = vI(20, 23);
string iCode { vC(24) };
if (iCode == " ")
iCode.clear();
......@@ -2229,8 +2230,8 @@ void PDBFileParser::ParseRemarks()
int model = vI(11, 13);
string resNam = vS(15, 17);
string chainID { vC(19) };
int seqNum = vI(20, 24);
string iCode { vC(25) };
int seqNum = vI(20, 23);
string iCode { vC(24) };
if (iCode == " ")
iCode.clear();
......@@ -3530,7 +3531,7 @@ void PDBFileParser::ConstructEntities()
if (not dbref.database.empty())
{
auto insToStr = [](char i) -> string { return i == ' ' ? "" : string{ i }; };
auto insToStr = [](char i) -> string { return i == ' ' or not isprint(i) ? "" : string{ i }; };
auto& pdbxPolySeqScheme = *getCategory("pdbx_poly_seq_scheme");
......@@ -3724,7 +3725,8 @@ void PDBFileParser::ConstructEntities()
getCategory("struct")->emplace({
{ "entry_id", mStructureId },
{ "title", ba::join(structTitle, ", ") },
{ "pdbx_descriptor", ba::join(structDescription, ", ") }
{ "pdbx_descriptor", ba::join(structDescription, ", ") },
{ "pdbx_model_type_details", mModelTypeDetails }
});
}
......@@ -4030,7 +4032,7 @@ void PDBFileParser::ConstructEntities()
{ "label_asym_id", asymId },
{ "label_comp_id", compId }, // TODO: change to correct comp_id
{ "label_seq_id", seqNr > 0 ? to_string(seqNr) : "" },
{ "labelAtomId", atom }
{ "label_atom_id", atom }
});
}
......@@ -5246,7 +5248,15 @@ void ReadPDBFile(istream& pdbFile, cif::File& cifFile)
cifFile.loadDictionary("mmcif_pdbx");
p.Parse(pdbFile, cifFile);
try
{
p.Parse(pdbFile, cifFile);
}
catch (const exception& ex)
{
cerr << "Error parsing PDB file" << endl;
throw;
}
cifFile.validate();
}
......@@ -221,6 +221,7 @@ PeptideDB& PeptideDB::Instance()
}
PeptideDB::PeptideDB()
: mImpl(nullptr)
{
const char* clibdMon = getenv("CLIBD_MON");
if (clibdMon == nullptr)
......
......@@ -42,7 +42,7 @@ void FileImpl::load(fs::path p)
throw runtime_error("No such file: " + p.string());
io::filtering_stream<io::input> in;
string ext;
string ext = p.extension().string();
if (p.extension() == ".bz2")
{
......@@ -57,43 +57,51 @@ void FileImpl::load(fs::path p)
in.push(inFile);
// OK, we've got the file, now create a protein
if (ext == ".cif")
mData.load(in);
else if (ext == ".pdb" or ext == ".ent")
ReadPDBFile(in, mData);
else
try
{
try
{
if (VERBOSE)
cerr << "unrecognized file extension, trying cif" << endl;
// OK, we've got the file, now create a protein
if (ext == ".cif")
mData.load(in);
}
catch (const cif::CifParserError& e)
{
if (VERBOSE)
cerr << "Not cif, trying plain old PDB" << endl;
// pffft...
in.reset();
if (inFile.is_open())
inFile.seekg(0);
else
inFile.open(p, ios_base::in | ios::binary);
if (p.extension() == ".bz2")
in.push(io::bzip2_decompressor());
else if (p.extension() == ".gz")
in.push(io::gzip_decompressor());
in.push(inFile);
else if (ext == ".pdb" or ext == ".ent")
ReadPDBFile(in, mData);
else
{
try
{
if (VERBOSE)
cerr << "unrecognized file extension, trying cif" << endl;
mData.load(in);
}
catch (const cif::CifParserError& e)
{
if (VERBOSE)
cerr << "Not cif, trying plain old PDB" << endl;
// pffft...
in.reset();
if (inFile.is_open())
inFile.seekg(0);
else
inFile.open(p, ios_base::in | ios::binary);
if (p.extension() == ".bz2")
in.push(io::bzip2_decompressor());
else if (p.extension() == ".gz")
in.push(io::gzip_decompressor());
in.push(inFile);
ReadPDBFile(in, mData);
}
}
}
catch (const exception& ex)
{
cerr << "Failed trying to load file " << p << endl;
throw;
}
// Yes, we've parsed the data. Now locate the datablock.
mDb = &mData.firstDatablock();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment