Commit e0dc9f1c by maarten

pdb2cif fixes

git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@181 a1961a4f-ab94-4bcc-80e8-33b5a54de466
parent c5d277fb
...@@ -181,6 +181,8 @@ int PDBRecord::vI(int columnFirst, int columnLast) ...@@ -181,6 +181,8 @@ int PDBRecord::vI(int columnFirst, int columnLast)
enum { start, digit, tail } state = start; enum { start, digit, tail } state = start;
bool negate = false; bool negate = false;
try
{
for (const char* p = mValue + columnFirst - 7; p < e; ++p) for (const char* p = mValue + columnFirst - 7; p < e; ++p)
{ {
switch (state) switch (state)
...@@ -217,6 +219,11 @@ int PDBRecord::vI(int columnFirst, int columnLast) ...@@ -217,6 +219,11 @@ int PDBRecord::vI(int columnFirst, int columnLast)
break; break;
} }
} }
}
catch (...)
{
throw_with_nested(runtime_error("Trying to parse '" + string(mValue + columnFirst - 7, mValue + columnLast - 7) + '\''));
}
if (negate) if (negate)
result = -result; result = -result;
...@@ -645,14 +652,6 @@ class PDBFileParser ...@@ -645,14 +652,6 @@ class PDBFileParser
void GetNextRecord(); void GetNextRecord();
void Match(const string& expected); void Match(const string& expected);
// void Error(const string& msg) const
// {
// string lineNr;
// if (mRec != nullptr)
// lineNr = " (at line " + to_string(mRec->mLineNr) + ')';
//
// throw runtime_error("Error parsing PDB file" + lineNr + ": " + msg);
// }
void ParseTitle(); void ParseTitle();
void ParseCitation(const string& id); void ParseCitation(const string& id);
...@@ -695,6 +694,8 @@ class PDBFileParser ...@@ -695,6 +694,8 @@ class PDBFileParser
rx1(R"((\d{2})-(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)-(\d{2}))"), rx1(R"((\d{2})-(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)-(\d{2}))"),
rx2(R"((JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)-(\d{2}))"); rx2(R"((JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)-(\d{2}))");
try
{
if (regex_match(s, m, rx1)) if (regex_match(s, m, rx1))
{ {
using namespace boost::gregorian; using namespace boost::gregorian;
...@@ -726,6 +727,13 @@ class PDBFileParser ...@@ -726,6 +727,13 @@ class PDBFileParser
} }
else else
ec = error::make_error_code(error::pdbErrors::invalidDate); ec = error::make_error_code(error::pdbErrors::invalidDate);
}
catch (const exception& ex)
{
if (VERBOSE)
cerr << ex.what() << endl;
ec = error::make_error_code(error::pdbErrors::invalidDate);
}
return s; return s;
} }
...@@ -768,7 +776,7 @@ class PDBFileParser ...@@ -768,7 +776,7 @@ class PDBFileParser
{ {
smatch m; smatch m;
if (not regex_match(s, m, sgRx)) if (not regex_match(s, m, sgRx))
throw runtime_error("invalid symmetry value"); throw runtime_error("invalid symmetry value '" + s + '\'');
s = m[1].str() + "_" + m[2].str(); s = m[1].str() + "_" + m[2].str();
} }
...@@ -942,8 +950,16 @@ void PDBFileParser::PreParseInput(istream& is) ...@@ -942,8 +950,16 @@ void PDBFileParser::PreParseInput(istream& is)
{ {
string cs = lookahead.substr(offset, len); string cs = lookahead.substr(offset, len);
ba::trim(cs); ba::trim(cs);
int result;
int result = cs.empty() ? 0 : stoi(cs); try
{
result = cs.empty() ? 0 : stoi(cs);
}
catch (...)
{
throw runtime_error("Continuation string '" + cs + "' is not valid");
}
return result; return result;
}; };
...@@ -1054,8 +1070,21 @@ void PDBFileParser::PreParseInput(istream& is) ...@@ -1054,8 +1070,21 @@ void PDBFileParser::PreParseInput(istream& is)
} }
else if (type == "FORMUL") else if (type == "FORMUL")
{ {
int compNr = stoi(value.substr(1, 3)); try
{
int compNr;
try
{
compNr = stoi(value.substr(1, 3));
}
catch (...)
{
throw_with_nested(runtime_error("Invalid component number '" + value.substr(1, 3) + '\''));
}
int n = 2; int n = 2;
try
{
while (lookahead.substr(0, 6) == type and while (lookahead.substr(0, 6) == type and
stoi(lookahead.substr(7, 3)) == compNr and stoi(lookahead.substr(7, 3)) == compNr and
contNr(16, 2) == n) contNr(16, 2) == n)
...@@ -1066,6 +1095,16 @@ void PDBFileParser::PreParseInput(istream& is) ...@@ -1066,6 +1095,16 @@ void PDBFileParser::PreParseInput(istream& is)
++n; ++n;
} }
} }
catch (const invalid_argument& ex)
{
throw_with_nested(runtime_error("Invalid component number '" + lookahead.substr(7, 3) + '\''));
}
}
catch (...)
{
throw_with_nested(runtime_error("When parsing FORMUL at line " + to_string(lineNr)));
}
}
else if (type == "HETNAM" or else if (type == "HETNAM" or
type == "HETSYN") type == "HETSYN")
{ {
...@@ -1117,7 +1156,7 @@ void PDBFileParser::PreParseInput(istream& is) ...@@ -1117,7 +1156,7 @@ void PDBFileParser::PreParseInput(istream& is)
ba::replace_all(k, " ", " "); ba::replace_all(k, " ", " ");
ba::trim(v); ba::trim(v);
if (iequals(v, "NONE")) if (iequals(v, "NONE") or iequals(v, "N/A") or iequals(v, "NAN"))
mRemark200[k] = "."; mRemark200[k] = ".";
else if (not iequals(v, "NULL")) else if (not iequals(v, "NULL"))
mRemark200[k] = v; mRemark200[k] = v;
...@@ -1161,7 +1200,7 @@ void PDBFileParser::GetNextRecord() ...@@ -1161,7 +1200,7 @@ void PDBFileParser::GetNextRecord()
void PDBFileParser::Match(const string& expected) void PDBFileParser::Match(const string& expected)
{ {
if (mRec->mName != expected) if (mRec->mName != expected)
throw runtime_error("At line " + to_string(mRec->mLineNr) + ": expected record " + expected + " but found " + mRec->mName); throw runtime_error("Expected record " + expected + " but found " + mRec->mName);
} }
vector<string> PDBFileParser::SplitCSV(const string& value) vector<string> PDBFileParser::SplitCSV(const string& value)
...@@ -1239,17 +1278,13 @@ void PDBFileParser::ParseTitle() ...@@ -1239,17 +1278,13 @@ void PDBFileParser::ParseTitle()
} }
// SPLIT // SPLIT
if (mRec->is("SPLIT")) if (mRec->is("SPLIT "))
{ {
// 1 - 6 Record name "SPLIT " // 1 - 6 Record name "SPLIT "
// 9 - 10 Continuation continuation Allows concatenation of multiple records. // 9 - 10 Continuation continuation Allows concatenation of multiple records.
// 12 - 15 IDcode idCode ID code of related datablock. // 12 - 15 IDcode idCode ID code of related datablock.
throw runtime_error("SPLIT PDB files are not supported"); throw runtime_error("SPLIT PDB files are not supported");
// if (VERBOSE)
// Error("skipping unimplemented SPLIT record");
// GetNextRecord();
} }
// CAVEAT // CAVEAT
...@@ -1406,7 +1441,13 @@ void PDBFileParser::ParseTitle() ...@@ -1406,7 +1441,13 @@ void PDBFileParser::ParseTitle()
cat = getCategory("exptl"); cat = getCategory("exptl");
for (auto si = ba::make_split_iterator(mExpMethod, ba::token_finder(ba::is_any_of(";"), ba::token_compress_on)); not si.eof(); ++si) vector<string> crystals;
ba::split(crystals, mRemark200["NUMBER OF CRYSTALS USED"], ba::is_any_of("; "));
if (crystals.empty())
crystals.push_back("");
auto ci = crystals.begin();
for (auto si = ba::make_split_iterator(mExpMethod, ba::token_finder(ba::is_any_of(";"), ba::token_compress_on)); not si.eof(); ++si, ++ci)
{ {
string expMethod(si->begin(), si->end()); string expMethod(si->begin(), si->end());
ba::trim(expMethod); ba::trim(expMethod);
...@@ -1417,7 +1458,7 @@ void PDBFileParser::ParseTitle() ...@@ -1417,7 +1458,7 @@ void PDBFileParser::ParseTitle()
cat->emplace({ cat->emplace({
{ "entry_id", mStructureId }, { "entry_id", mStructureId },
{ "method", expMethod }, { "method", expMethod },
{ "crystals_number", mRemark200["NUMBER OF CRYSTALS USED"] } { "crystals_number", ci != crystals.end() ? *ci : "" }
}); });
} }
...@@ -2601,6 +2642,9 @@ void PDBFileParser::ParseRemark200() ...@@ -2601,6 +2642,9 @@ void PDBFileParser::ParseRemark200()
if (ambientTemp.empty()) if (ambientTemp.empty())
break; break;
if (ba::ends_with(ambientTemp, "K"))
ambientTemp.erase(ambientTemp.length() - 1, 1);
getCategory("diffrn")->emplace({ getCategory("diffrn")->emplace({
{ "id", diffrnNr }, { "id", diffrnNr },
{ "ambient_temp", ambientTemp }, { "ambient_temp", ambientTemp },
...@@ -2673,8 +2717,11 @@ void PDBFileParser::ParseRemark200() ...@@ -2673,8 +2717,11 @@ void PDBFileParser::ParseRemark200()
} }
int wavelengthNr = 1; int wavelengthNr = 1;
for (auto& wl: diffrnWaveLengths) for (auto wl: diffrnWaveLengths)
{ {
if (ba::ends_with(wl, "A"))
wl.erase(wl.length() - 1, 1);
getCategory("diffrn_radiation_wavelength")->emplace({ getCategory("diffrn_radiation_wavelength")->emplace({
{ "id", wavelengthNr++ }, { "id", wavelengthNr++ },
{ "wavelength", wl.empty() ? "." : wl }, { "wavelength", wl.empty() ? "." : wl },
...@@ -3208,6 +3255,8 @@ void PDBFileParser::ConstructEntities() ...@@ -3208,6 +3255,8 @@ void PDBFileParser::ConstructEntities()
if (r->is("MODEL ")) if (r->is("MODEL "))
{ {
modelNr = r->vI(11, 14); modelNr = r->vI(11, 14);
if (modelNr != 1)
break;
continue; continue;
} }
...@@ -3311,6 +3360,14 @@ void PDBFileParser::ConstructEntities() ...@@ -3311,6 +3360,14 @@ void PDBFileParser::ConstructEntities()
for (auto r = mData; r != nullptr; r = r->mNext) for (auto r = mData; r != nullptr; r = r->mNext)
{ {
if (r->is("MODEL "))
{
modelNr = r->vI(11, 14);
if (modelNr != 1)
break;
continue;
}
if (r->is("ATOM ") or r->is("HETATM")) if (r->is("ATOM ") or r->is("HETATM"))
{ // 1 - 6 Record name "ATOM " { // 1 - 6 Record name "ATOM "
int serial = r->vI(7, 11); // 7 - 11 Integer serial Atom serial number. int serial = r->vI(7, 11); // 7 - 11 Integer serial Atom serial number.
...@@ -5184,7 +5241,10 @@ void PDBFileParser::PDBChain::AlignResToSeqRes() ...@@ -5184,7 +5241,10 @@ void PDBFileParser::PDBChain::AlignResToSeqRes()
int x, y; int x, y;
const float kGapOpen = 10, gapExtend = 0.1; const float
kMatchReward = 5,
kMismatchCost = -10,
kGapOpen = 10, gapExtend = 0.1;
float high = 0; float high = 0;
size_t highX = 0, highY = 0; size_t highX = 0, highY = 0;
...@@ -5202,9 +5262,9 @@ void PDBFileParser::PDBChain::AlignResToSeqRes() ...@@ -5202,9 +5262,9 @@ void PDBFileParser::PDBChain::AlignResToSeqRes()
// score for alignment // score for alignment
float M; float M;
if (a.mMonId == b.mMonId) if (a.mMonId == b.mMonId)
M = 1; M = kMatchReward;
else else
M = -10000; M = kMismatchCost;
// gap open cost is zero if the PDB ATOM records indicate that a gap // gap open cost is zero if the PDB ATOM records indicate that a gap
// should be here. // should be here.
......
...@@ -1185,7 +1185,11 @@ bool Remark3Parser::parse(const string& expMethod, PDBRecord* r, cif::Datablock& ...@@ -1185,7 +1185,11 @@ bool Remark3Parser::parse(const string& expMethod, PDBRecord* r, cif::Datablock&
string line = getNextLine(); string line = getNextLine();
if (line != "REFINEMENT.") if (line != "REFINEMENT.")
throw runtime_error("Unexpected data in REMARK 3"); {
if (VERBOSE)
cerr << "Unexpected data in REMARK 3" << endl;
return false;
}
line = getNextLine(); line = getNextLine();
...@@ -1193,7 +1197,12 @@ bool Remark3Parser::parse(const string& expMethod, PDBRecord* r, cif::Datablock& ...@@ -1193,7 +1197,12 @@ bool Remark3Parser::parse(const string& expMethod, PDBRecord* r, cif::Datablock&
smatch m; smatch m;
if (not regex_match(line, m, rxp)) if (not regex_match(line, m, rxp))
throw runtime_error("Expected valid PROGRAM line in REMARK 3"); {
if (VERBOSE)
cerr << "Expected valid PROGRAM line in REMARK 3" << endl;
return false;
}
line = m[1].str(); line = m[1].str();
struct programScore struct programScore
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment