Commit c5d277fb by maarten

refactored main using nested exceptions, fixed PDB parser a bit

git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@180 a1961a4f-ab94-4bcc-80e8-33b5a54de466
parent d0b7e21c
/*
Created by: Maarten L. Hekkelman
Date: dinsdag 07 november, 2017
Copyright 2017 NKI AVL
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <vector>
#include <string>
#include <tuple>
#include "cif++/Cif++.h"
namespace cif
{
extern const int
kResidueNrWildcard,
kNoSeqNum;
struct TLSSelection;
typedef std::unique_ptr<TLSSelection> TLSSelectionPtr;
struct TLSResidue;
struct TLSSelection
{
virtual ~TLSSelection() {}
virtual void CollectResidues(cif::Datablock& db, std::vector<TLSResidue>& residues, int indentLevel = 0) const = 0;
std::vector<std::tuple<std::string,int,int>> GetRanges(cif::Datablock& db, bool pdbNamespace) const;
};
// Low level: get the selections
TLSSelectionPtr ParseSelectionDetails(const std::string& program, const std::string& selection);
}
...@@ -1773,7 +1773,7 @@ Row& Row::operator=(const Row& rhs) ...@@ -1773,7 +1773,7 @@ Row& Row::operator=(const Row& rhs)
void Row::assign(const string& name, const string& value, bool emplacing) void Row::assign(const string& name, const string& value, bool emplacing)
{ {
if (mData == nullptr) if (mData == nullptr)
throw logic_error("invalid Row, no data"); throw logic_error("invalid Row, no data assigning value '" + value + "' to " + name);
auto cat = mData->mCategory; auto cat = mData->mCategory;
auto cix = cat->addColumn(name); auto cix = cat->addColumn(name);
...@@ -1905,7 +1905,7 @@ bool Row::empty() const ...@@ -1905,7 +1905,7 @@ bool Row::empty() const
auto Row::begin() const -> const_iterator auto Row::begin() const -> const_iterator
{ {
return const_iterator(mData, mData->mValues); return const_iterator(mData, mData ? mData->mValues : nullptr);
} }
auto Row::end() const -> const_iterator auto Row::end() const -> const_iterator
......
...@@ -264,7 +264,8 @@ const Compound* CompoundFactory::create(std::string id) ...@@ -264,7 +264,8 @@ const Compound* CompoundFactory::create(std::string id)
value = 1.5; value = 1.5;
else else
{ {
cerr << "Unimplemented chem_comp_bond.type " << type << " in file " << resFile << endl; if (VERBOSE)
cerr << "Unimplemented chem_comp_bond.type " << type << " in file " << resFile << endl;
value = 1.0; value = 1.0;
} }
......
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
#include <boost/format.hpp> #include <boost/format.hpp>
#include <boost/numeric/ublas/matrix.hpp> #include <boost/numeric/ublas/matrix.hpp>
#include <clipper/core/spacegroup.h>
#include "cif++/PDB2Cif.h" #include "cif++/PDB2Cif.h"
#include "cif++/AtomType.h" #include "cif++/AtomType.h"
#include "cif++/Compound.h" #include "cif++/Compound.h"
...@@ -31,7 +33,8 @@ namespace error ...@@ -31,7 +33,8 @@ namespace error
{ {
enum pdbErrors enum pdbErrors
{ {
residueNotFound = 1000 residueNotFound = 1000,
invalidDate
}; };
namespace detail namespace detail
...@@ -52,6 +55,9 @@ namespace error ...@@ -52,6 +55,9 @@ namespace error
case residueNotFound: case residueNotFound:
return "Residue not found"; return "Residue not found";
case invalidDate:
return "Invalid date";
default: default:
return "Error in PDB format"; return "Error in PDB format";
} }
...@@ -123,7 +129,6 @@ PDBRecord::PDBRecord(uint32 lineNr, const string& name, const string& value) ...@@ -123,7 +129,6 @@ PDBRecord::PDBRecord(uint32 lineNr, const string& name, const string& value)
PDBRecord::~PDBRecord() PDBRecord::~PDBRecord()
{ {
delete mNext;
} }
void* PDBRecord::operator new(size_t size, size_t vLen) void* PDBRecord::operator new(size_t size, size_t vLen)
...@@ -373,7 +378,13 @@ class PDBFileParser ...@@ -373,7 +378,13 @@ class PDBFileParser
~PDBFileParser() ~PDBFileParser()
{ {
delete mData; PDBRecord* r = mData;
while (r != nullptr)
{
PDBRecord* d = r;
r = d->mNext;
delete d;
}
} }
void Parse(istream& is, cif::File& result); void Parse(istream& is, cif::File& result);
...@@ -489,8 +500,8 @@ class PDBFileParser ...@@ -489,8 +500,8 @@ class PDBFileParser
int mSeqNum; int mSeqNum;
char mIcode; char mIcode;
bool operator==(const AtomRes& rhs) const { return mMonId == rhs.mMonId and mSeqNum == rhs.mSeqNum and mIcode == rhs.mIcode; } bool operator==(const AtomRes& rhs) const { return mSeqNum == rhs.mSeqNum and mIcode == rhs.mIcode; }
bool operator!=(const AtomRes& rhs) const { return mMonId != rhs.mMonId or mSeqNum != rhs.mSeqNum or mIcode != rhs.mIcode; } bool operator!=(const AtomRes& rhs) const { return mSeqNum != rhs.mSeqNum or mIcode != rhs.mIcode; }
}; };
vector<AtomRes> mResiduesSeen; vector<AtomRes> mResiduesSeen;
...@@ -593,16 +604,7 @@ class PDBFileParser ...@@ -593,16 +604,7 @@ class PDBFileParser
int vI(int columnFirst, int columnLast) const int vI(int columnFirst, int columnLast) const
{ {
int result = 0; return mRec->vI(columnFirst, columnLast);
try
{
result = mRec->vI(columnFirst, columnLast);
}
catch (const exception& ex)
{
Error(ex.what());
}
return result;
} }
// ---------------------------------------------------------------- // ----------------------------------------------------------------
...@@ -643,14 +645,14 @@ class PDBFileParser ...@@ -643,14 +645,14 @@ class PDBFileParser
void GetNextRecord(); void GetNextRecord();
void Match(const string& expected); void Match(const string& expected);
void Error(const string& msg) const // void Error(const string& msg) const
{ // {
string lineNr; // string lineNr;
if (mRec != nullptr) // if (mRec != nullptr)
lineNr = " (at line " + to_string(mRec->mLineNr) + ')'; // lineNr = " (at line " + to_string(mRec->mLineNr) + ')';
//
throw runtime_error("Error parsing PDB file" + lineNr + ": " + msg); // throw runtime_error("Error parsing PDB file" + lineNr + ": " + msg);
} // }
void ParseTitle(); void ParseTitle();
void ParseCitation(const string& id); void ParseCitation(const string& id);
...@@ -686,7 +688,7 @@ class PDBFileParser ...@@ -686,7 +688,7 @@ class PDBFileParser
vector<string> SplitCSV(const string& value); vector<string> SplitCSV(const string& value);
string pdb2cifDate(string s) string pdb2cifDate(string s, boost::system::error_code& ec)
{ {
smatch m; smatch m;
const regex const regex
...@@ -700,7 +702,7 @@ class PDBFileParser ...@@ -700,7 +702,7 @@ class PDBFileParser
int day = stoi(m[1].str()); int day = stoi(m[1].str());
auto mi = kMonths.find(m[2].str()); auto mi = kMonths.find(m[2].str());
if (mi == kMonths.end()) if (mi == kMonths.end())
Error("Invalid month"); throw runtime_error("Invalid month: '" + m[2].str() + '\'');
int month = mi->second; int month = mi->second;
int year = 1900 + stoi(m[3].str()); int year = 1900 + stoi(m[3].str());
if (year < 1950) if (year < 1950)
...@@ -714,7 +716,7 @@ class PDBFileParser ...@@ -714,7 +716,7 @@ class PDBFileParser
{ {
auto mi = kMonths.find(m[1].str()); auto mi = kMonths.find(m[1].str());
if (mi == kMonths.end()) if (mi == kMonths.end())
Error("Invalid month"); throw runtime_error("Invalid month: '" + m[1].str() + '\'');
int month = mi->second; int month = mi->second;
int year = 1900 + stoi(m[2].str()); int year = 1900 + stoi(m[2].str());
if (year < 1950) if (year < 1950)
...@@ -722,10 +724,19 @@ class PDBFileParser ...@@ -722,10 +724,19 @@ class PDBFileParser
s = (boost::format("%04d-%02d") % year % month).str(); s = (boost::format("%04d-%02d") % year % month).str();
} }
else
ec = error::make_error_code(error::pdbErrors::invalidDate);
return s; return s;
} }
string pdb2cifDate(string s)
{
boost::system::error_code ec;
pdb2cifDate(s, ec);
return s;
}
string pdb2cifAuth(string author) string pdb2cifAuth(string author)
{ {
ba::trim(author); ba::trim(author);
...@@ -925,7 +936,16 @@ void PDBFileParser::PreParseInput(istream& is) ...@@ -925,7 +936,16 @@ void PDBFileParser::PreParseInput(istream& is)
getline(is, lookahead); getline(is, lookahead);
if (ba::starts_with(lookahead, "HEADER") == false) if (ba::starts_with(lookahead, "HEADER") == false)
Error("This does not look like a PDB file, should start with a HEADER line"); throw runtime_error("This does not look like a PDB file, should start with a HEADER line");
auto contNr = [&lookahead](int offset, int len) -> int
{
string cs = lookahead.substr(offset, len);
ba::trim(cs);
int result = cs.empty() ? 0 : stoi(cs);
return result;
};
PDBRecord* last = nullptr; PDBRecord* last = nullptr;
set<string> dropped; set<string> dropped;
...@@ -964,7 +984,7 @@ void PDBFileParser::PreParseInput(istream& is) ...@@ -964,7 +984,7 @@ void PDBFileParser::PreParseInput(istream& is)
type == "TITLE ") type == "TITLE ")
{ {
int n = 2; int n = 2;
while (lookahead.substr(0, 6) == type and stoi(lookahead.substr(7, 3)) == n) while (lookahead.substr(0, 6) == type and contNr(7, 3) == n)
{ {
value += ba::trim_right_copy(lookahead.substr(10)); value += ba::trim_right_copy(lookahead.substr(10));
getline(is, lookahead); getline(is, lookahead);
...@@ -976,7 +996,7 @@ void PDBFileParser::PreParseInput(istream& is) ...@@ -976,7 +996,7 @@ void PDBFileParser::PreParseInput(istream& is)
{ {
int n = 2; int n = 2;
value += '\n'; value += '\n';
while (lookahead.substr(0, 6) == type and stoi(lookahead.substr(7, 3)) == n) while (lookahead.substr(0, 6) == type and contNr(7, 3) == n)
{ {
value += ba::trim_right_copy(lookahead.substr(10)); value += ba::trim_right_copy(lookahead.substr(10));
value += '\n'; value += '\n';
...@@ -991,7 +1011,7 @@ void PDBFileParser::PreParseInput(istream& is) ...@@ -991,7 +1011,7 @@ void PDBFileParser::PreParseInput(istream& is)
int n = 2; int n = 2;
while (lookahead.substr(0, 6) == type and while (lookahead.substr(0, 6) == type and
stoi(lookahead.substr(7, 3)) == revNr and stoi(lookahead.substr(7, 3)) == revNr and
stoi(lookahead.substr(10, 2)) == n) contNr(10, 2) == n)
{ {
value += lookahead.substr(38); value += lookahead.substr(38);
getline(is, lookahead); getline(is, lookahead);
...@@ -1002,7 +1022,7 @@ void PDBFileParser::PreParseInput(istream& is) ...@@ -1002,7 +1022,7 @@ void PDBFileParser::PreParseInput(istream& is)
else if (type == "CAVEAT") else if (type == "CAVEAT")
{ {
int n = 2; int n = 2;
while (lookahead.substr(0, 6) == type and stoi(lookahead.substr(7, 3)) == n) while (lookahead.substr(0, 6) == type and contNr(7, 3) == n)
{ {
value += ba::trim_right_copy(lookahead.substr(13)); value += ba::trim_right_copy(lookahead.substr(13));
getline(is, lookahead); getline(is, lookahead);
...@@ -1023,7 +1043,7 @@ void PDBFileParser::PreParseInput(istream& is) ...@@ -1023,7 +1043,7 @@ void PDBFileParser::PreParseInput(istream& is)
{ {
value += '\n'; value += '\n';
int n = 2; int n = 2;
while (lookahead.substr(0, 6) == type and stoi(lookahead.substr(7, 3)) == n) while (lookahead.substr(0, 6) == type and contNr(7, 3) == n)
{ {
value += ba::trim_copy(lookahead.substr(10)); value += ba::trim_copy(lookahead.substr(10));
value += '\n'; value += '\n';
...@@ -1038,8 +1058,7 @@ void PDBFileParser::PreParseInput(istream& is) ...@@ -1038,8 +1058,7 @@ void PDBFileParser::PreParseInput(istream& is)
int n = 2; int n = 2;
while (lookahead.substr(0, 6) == type and while (lookahead.substr(0, 6) == type and
stoi(lookahead.substr(7, 3)) == compNr and stoi(lookahead.substr(7, 3)) == compNr and
lookahead.substr(16, 2) != " " and contNr(16, 2) == n)
stoi(lookahead.substr(16, 2)) == n)
{ {
value += lookahead.substr(19); value += lookahead.substr(19);
getline(is, lookahead); getline(is, lookahead);
...@@ -1051,9 +1070,7 @@ void PDBFileParser::PreParseInput(istream& is) ...@@ -1051,9 +1070,7 @@ void PDBFileParser::PreParseInput(istream& is)
type == "HETSYN") type == "HETSYN")
{ {
int n = 2; int n = 2;
while (lookahead.substr(0, 6) == type and while (lookahead.substr(0, 6) == type and contNr(8, 2) == n)
lookahead.substr(8, 2) != " " and
stoi(lookahead.substr(8, 2)) == n)
{ {
value += lookahead.substr(16); value += lookahead.substr(16);
getline(is, lookahead); getline(is, lookahead);
...@@ -1116,6 +1133,9 @@ void PDBFileParser::PreParseInput(istream& is) ...@@ -1116,6 +1133,9 @@ void PDBFileParser::PreParseInput(istream& is)
last->mNext = cur; last->mNext = cur;
last = cur; last = cur;
if (type == "END ")
break;
} }
if (not dropped.empty()) if (not dropped.empty())
...@@ -1141,7 +1161,7 @@ void PDBFileParser::GetNextRecord() ...@@ -1141,7 +1161,7 @@ void PDBFileParser::GetNextRecord()
void PDBFileParser::Match(const string& expected) void PDBFileParser::Match(const string& expected)
{ {
if (mRec->mName != expected) if (mRec->mName != expected)
Error("Expected record " + expected + " but found " + mRec->mName); throw runtime_error("At line " + to_string(mRec->mLineNr) + ": expected record " + expected + " but found " + mRec->mName);
} }
vector<string> PDBFileParser::SplitCSV(const string& value) vector<string> PDBFileParser::SplitCSV(const string& value)
...@@ -1224,17 +1244,20 @@ void PDBFileParser::ParseTitle() ...@@ -1224,17 +1244,20 @@ void PDBFileParser::ParseTitle()
// 1 - 6 Record name "SPLIT " // 1 - 6 Record name "SPLIT "
// 9 - 10 Continuation continuation Allows concatenation of multiple records. // 9 - 10 Continuation continuation Allows concatenation of multiple records.
// 12 - 15 IDcode idCode ID code of related datablock. // 12 - 15 IDcode idCode ID code of related datablock.
if (VERBOSE)
Error("skipping unimplemented SPLIT record"); throw runtime_error("SPLIT PDB files are not supported");
GetNextRecord();
// if (VERBOSE)
// Error("skipping unimplemented SPLIT record");
// GetNextRecord();
} }
// CAVEAT // CAVEAT
if (mRec->is("CAVEAT")) // 1 - 6 Record name "CAVEAT" int caveatID = 1;
{ // 9 - 10 Continuation continuation Allows concatenation of multiple records. while (mRec->is("CAVEAT")) // 1 - 6 Record name "CAVEAT"
{
getCategory("database_PDB_caveat")->emplace({ getCategory("database_PDB_caveat")->emplace({
// { "id", vS(12, 15) }, // 12 - 15 IDcode idCode PDB ID code of this datablock. { "id", caveatID++ },
{ "id", 1 }, // 12 - 15 IDcode idCode PDB ID code of this datablock.
{ "text", string{mRec->vS(20) } } // 20 - 79 String comment Free text giving the reason for the CAVEAT. { "text", string{mRec->vS(20) } } // 20 - 79 String comment Free text giving the reason for the CAVEAT.
}); });
...@@ -1347,7 +1370,7 @@ void PDBFileParser::ParseTitle() ...@@ -1347,7 +1370,7 @@ void PDBFileParser::ParseTitle()
} }
if (source == nullptr) if (source == nullptr)
Error("MOL_ID missing"); throw runtime_error("At line " + to_string(mRec->mLineNr) + ": missing MOL_ID in SOURCE");
(*source)[key] = value; (*source)[key] = value;
} }
...@@ -1380,13 +1403,23 @@ void PDBFileParser::ParseTitle() ...@@ -1380,13 +1403,23 @@ void PDBFileParser::ParseTitle()
if (mRec->is("EXPDTA")) if (mRec->is("EXPDTA"))
{ {
mExpMethod = vS(11); mExpMethod = vS(11);
cat = getCategory("exptl"); cat = getCategory("exptl");
cat->emplace({
{ "entry_id", mStructureId }, for (auto si = ba::make_split_iterator(mExpMethod, ba::token_finder(ba::is_any_of(";"), ba::token_compress_on)); not si.eof(); ++si)
{ "method", mExpMethod }, {
{ "crystals_number", mRemark200["NUMBER OF CRYSTALS USED"] } string expMethod(si->begin(), si->end());
}); ba::trim(expMethod);
if (expMethod.empty())
continue;
cat->emplace({
{ "entry_id", mStructureId },
{ "method", expMethod },
{ "crystals_number", mRemark200["NUMBER OF CRYSTALS USED"] }
});
}
GetNextRecord(); GetNextRecord();
} }
...@@ -1395,7 +1428,7 @@ void PDBFileParser::ParseTitle() ...@@ -1395,7 +1428,7 @@ void PDBFileParser::ParseTitle()
if (mRec->is("NUMMDL")) if (mRec->is("NUMMDL"))
{ {
if (VERBOSE) if (VERBOSE)
Error("skipping unimplemented NUMMDL record"); cerr << "skipping unimplemented NUMMDL record" << endl;
GetNextRecord(); GetNextRecord();
} }
...@@ -1624,835 +1657,844 @@ void PDBFileParser::ParseRemarks() ...@@ -1624,835 +1657,844 @@ void PDBFileParser::ParseRemarks()
{ {
int remarkNr = vI(8, 10); int remarkNr = vI(8, 10);
switch (remarkNr) try
{ {
case 1: switch (remarkNr)
while (mRec->is("REMARK 1"))
{
if (mRec->mVlen > 15 and vS(12, 20) == "REFERENCE")
{
string id = vS(22, 70);
GetNextRecord();
ParseCitation(id);
}
else
GetNextRecord();
}
break;
case 3:
// we skip REMARK 3 until we know the mapping
while (mRec->is("REMARK 3"))
GetNextRecord();
break;
case 4:
// who cares...
while (mRec->is("REMARK 4"))
GetNextRecord();
break;
case 100:
{
const regex rx(R"(THE (\S+) ID CODE IS (\S+?)\.?\s*)");
smatch m;
string r = vS(12);
if (regex_match(r, m, rx))
{
auto cat = getCategory("database_2");
cat->emplace({
{ "database_id", m[1].str() },
{ "database_code", m[2].str() }
});
}
GetNextRecord();
break;
}
case 200:
{ {
// we already parsed most of this remark, but the "REMARK:" part might have been split. case 1:
while (mRec->is("REMARK 1"))
bool remark = false;
do
{
string r = mRec->vS(12);
if (ba::starts_with(r, "REMARK: "))
{ {
mRemark200["REMARK"] = r.substr(8); if (mRec->mVlen > 15 and vS(12, 20) == "REFERENCE")
remark = true; {
} string id = vS(22, 70);
else if (remark) GetNextRecord();
{
if (r.empty()) ParseCitation(id);
remark = false; }
else else
mRemark200["REMARK"] += r; GetNextRecord();
} }
break;
GetNextRecord();
}
while (mRec->is("REMARK 200"));
break;
}
case 280:
{
string density_Matthews, densityPercentSol, conditions;
const regex rx1(R"(SOLVENT CONTENT, VS +\(%\): *(.+))"),
rx2(R"(MATTHEWS COEFFICIENT, VM \(ANGSTROMS\*\*3/DA\): *(.+))");
smatch m; case 3:
// we skip REMARK 3 until we know the mapping
do while (mRec->is("REMARK 3"))
GetNextRecord();
break;
case 4:
// who cares...
while (mRec->is("REMARK 4"))
GetNextRecord();
break;
case 100:
{ {
const regex rx(R"(THE (\S+) ID CODE IS (\S+?)\.?\s*)");
smatch m;
string r = vS(12); string r = vS(12);
if (conditions.empty()) if (regex_match(r, m, rx))
{ {
if (regex_match(r, m, rx1)) auto cat = getCategory("database_2");
densityPercentSol = m[1].str(); cat->emplace({
else if (regex_match(r, m, rx2)) { "database_id", m[1].str() },
density_Matthews = m[1].str(); { "database_code", m[2].str() }
else if (ba::starts_with(r, "CRYSTALLIZATION CONDITIONS: ")) });
conditions = r.substr(28);
} }
else
conditions = conditions + ' ' + r;
GetNextRecord(); GetNextRecord();
break;
} }
while (mRec->is("REMARK 280"));
string desc = mRemark200["REMARK"];
if (desc == "NULL")
desc.clear();
getCategory("exptl_crystal")->emplace({
{ "id", 1 },
{ "density_Matthews", iequals(density_Matthews, "NULL") ? "" : density_Matthews },
{ "density_percent_sol", iequals(densityPercentSol, "NULL") ? "" : densityPercentSol },
{ "description", desc }
});
// now try to parse the conditions
const regex rx3(R"(TEMPERATURE +(\d+)K)"), rx4(R"(PH *(?:: *)?(\d+(?:\.\d+)?))")/*, rx5(R"(\b(\d+)C\b)")*/;
string temp, ph, method; case 200:
for (auto i = make_split_iterator(conditions, ba::token_finder(ba::is_any_of(","), ba::token_compress_on)); not i.eof(); ++i)
{ {
string s(i->begin(), i->end()); // we already parsed most of this remark, but the "REMARK:" part might have been split.
ba::trim(s); bool remark = false;
if (regex_search(s, m, rx3)) do
temp = m[1].str();
if (regex_search(s, m, rx4))
ph = m[1].str();
if (s.length() < 60 and
(ba::icontains(s, "drop") or ba::icontains(s, "vapor") or ba::icontains(s, "batch")))
{ {
if (not method.empty()) string r = mRec->vS(12);
method = method + ", " + s;
else if (ba::starts_with(r, "REMARK: "))
method = s; {
mRemark200["REMARK"] = r.substr(8);
remark = true;
}
else if (remark)
{
if (r.empty())
remark = false;
else
mRemark200["REMARK"] += r;
}
GetNextRecord();
} }
while (mRec->is("REMARK 200"));
break;
} }
if (not (method.empty() and temp.empty() and ph.empty() and (conditions.empty() or conditions == "NULL"))) case 280:
{
getCategory("exptl_crystal_grow")->emplace({
{ "crystal_id", 1 },
{ "method", method },
{ "temp", temp },
{ "pH", ph },
{ "pdbx_details", conditions }
});
}
break;
}
// case 290:
//
// break;
case 350:
// postponed since we don't have the required information yet
for (; mRec->is("REMARK 350"); GetNextRecord())
;
break;
case 400:
{
stringstream s;
GetNextRecord();
if (vS(12) == "COMPOUND")
GetNextRecord();
while (mRec->is("REMARK 400"))
{
s << vS(12) << endl;
GetNextRecord();
}
compoundDetails = s.str();
break;
}
case 450:
{
stringstream s;
GetNextRecord();
if (vS(12) == "SOURCE")
GetNextRecord();
while (mRec->is("REMARK 450"))
{
s << vS(12) << endl;
GetNextRecord();
}
sourceDetails = s.str();
break;
}
case 465:
{
bool headerSeen = false;
regex rx(R"( *MODELS *(\d+)-(\d+))");
int models[2] = { -1, -1 };
for (; mRec->is("REMARK 465"); GetNextRecord())
{ {
if (not headerSeen) string density_Matthews, densityPercentSol, conditions;
const regex rx1(R"(SOLVENT CONTENT, VS +\(%\): *(.+))"),
rx2(R"(MATTHEWS COEFFICIENT, VM \(ANGSTROMS\*\*3/DA\): *(.+))");
smatch m;
do
{ {
string line = vS(12); string r = vS(12);
smatch m;
if (conditions.empty())
if (regex_match(line, m, rx))
{ {
models[0] = stoi(m[1].str()); if (regex_match(r, m, rx1))
models[1] = stoi(m[2].str()); densityPercentSol = m[1].str();
else if (regex_match(r, m, rx2))
density_Matthews = m[1].str();
else if (ba::starts_with(r, "CRYSTALLIZATION CONDITIONS: "))
conditions = r.substr(28);
} }
else else
headerSeen = ba::contains(line, "RES C SSSEQI"); conditions = conditions + ' ' + r;
continue;
GetNextRecord();
} }
while (mRec->is("REMARK 280"));
string desc = mRemark200["REMARK"];
if (desc == "NULL")
desc.clear();
if (models[0] == models[1]) getCategory("exptl_crystal")->emplace({
models[0] = models[1] = vI(12, 14); { "id", 1 },
{ "density_Matthews", iequals(density_Matthews, "NULL") ? "" : density_Matthews },
{ "density_percent_sol", iequals(densityPercentSol, "NULL") ? "" : densityPercentSol },
{ "description", desc }
});
// now try to parse the conditions
const regex rx3(R"(TEMPERATURE +(\d+)K)"), rx4(R"(PH *(?:: *)?(\d+(?:\.\d+)?))")/*, rx5(R"(\b(\d+)C\b)")*/;
string res = vS(16, 18); string temp, ph, method;
char chain = vC(20);
int seq = vI(22, 26); for (auto i = make_split_iterator(conditions, ba::token_finder(ba::is_any_of(","), ba::token_compress_on)); not i.eof(); ++i)
char iCode = vC(27); {
string s(i->begin(), i->end());
ba::trim(s);
if (regex_search(s, m, rx3))
temp = m[1].str();
if (regex_search(s, m, rx4))
ph = m[1].str();
if (s.length() < 60 and
(ba::icontains(s, "drop") or ba::icontains(s, "vapor") or ba::icontains(s, "batch")))
{
if (not method.empty())
method = method + ", " + s;
else
method = s;
}
}
for (int modelNr = models[0]; modelNr <= models[1]; ++modelNr) if (not (method.empty() and temp.empty() and ph.empty() and (conditions.empty() or conditions == "NULL")))
mUnobs.push_back({modelNr, res, chain, seq, iCode}); {
getCategory("exptl_crystal_grow")->emplace({
{ "crystal_id", 1 },
{ "method", method },
{ "temp", temp },
{ "pH", ph },
{ "pdbx_details", conditions }
});
}
break;
} }
break; // case 290:
} //
// break;
case 470:
{ case 350:
bool headerSeen = false; // postponed since we don't have the required information yet
regex rx(R"( *MODELS *(\d+)-(\d+))"); for (; mRec->is("REMARK 350"); GetNextRecord())
int models[2] = { -1, -1 }; ;
break;
for (; mRec->is("REMARK 470"); GetNextRecord()) case 400:
{ {
if (not headerSeen) stringstream s;
GetNextRecord();
if (vS(12) == "COMPOUND")
GetNextRecord();
while (mRec->is("REMARK 400"))
{ {
string line = vS(12); s << vS(12) << endl;
smatch m; GetNextRecord();
if (regex_match(line, m, rx))
{
models[0] = stoi(m[1].str());
models[1] = stoi(m[2].str());
}
else
headerSeen = ba::contains(line, "RES CSSEQI ATOMS");
continue;
} }
if (models[0] == models[1]) compoundDetails = s.str();
models[0] = models[1] = vI(12, 14); break;
}
string res = vS(16, 18);
char chain = vC(20); case 450:
int seq = vI(21, 24); {
char iCode = vC(25); stringstream s;
GetNextRecord();
if (vS(12) == "SOURCE")
GetNextRecord();
vector<string> atoms; while (mRec->is("REMARK 450"))
string atomStr = mRec->vS(29); {
for (auto i = make_split_iterator(atomStr, ba::token_finder(ba::is_any_of(" "), ba::token_compress_on)); not i.eof(); ++i) s << vS(12) << endl;
atoms.push_back({ i-> begin(), i->end() }); GetNextRecord();
}
for (int modelNr = models[0]; modelNr <= models[1]; ++modelNr) sourceDetails = s.str();
mUnobs.push_back({modelNr, res, chain, seq, iCode, atoms}); break;
} }
break; case 465:
}
case 500:
{
GetNextRecord();
enum State { eStart, eCCinSAU, eCC, eCBL, eCBA, eTA, eCTg, ePG, eMCP, eChC } state = eStart;
bool headerSeen = false;
int id = 0;
for (; mRec->is("REMARK 500"); GetNextRecord())
{ {
string line = vS(12); bool headerSeen = false;
regex rx(R"( *MODELS *(\d+)-(\d+))");
if (line == "GEOMETRY AND STEREOCHEMISTRY") int models[2] = { -1, -1 };
continue;
switch (state) for (; mRec->is("REMARK 465"); GetNextRecord())
{ {
case eStart: if (not headerSeen)
{
if (line.empty() or not ba::starts_with(line, "SUBTOPIC: "))
continue;
string subtopic = line.substr(10);
if (subtopic == "CLOSE CONTACTS IN SAME ASYMMETRIC UNIT") state = eCCinSAU;
else if (subtopic == "CLOSE CONTACTS") state = eCC;
else if (subtopic == "COVALENT BOND LENGTHS") state = eCBL;
else if (subtopic == "COVALENT BOND ANGLES") state = eCBA;
else if (subtopic == "TORSION ANGLES") state = eTA;
else if (subtopic == "NON-CIS, NON-TRANS") state = eCTg;
else if (subtopic == "PLANAR GROUPS") state = ePG;
else if (subtopic == "MAIN CHAIN PLANARITY") state = eMCP;
else if (subtopic == "CHIRAL CENTERS") state = eChC;
else if (VERBOSE)
Error("Unknown subtopic in REMARK 500: " + subtopic);
headerSeen = false;
id = 0;
break;
}
case eCCinSAU:
{
if (not headerSeen)
headerSeen =
line == "ATM1 RES C SSEQI ATM2 RES C SSEQI DISTANCE";
else if (line.empty())
state = eStart;
else
{
string atom1 = vS(13, 16);
string res1 = vS(19, 21);
string alt1 = vS(17, 17);
char chain1 = vC(23);
int seq1 = vI(25, 29);
string iCode1 = vS(30, 30);
string atom2 = vS(34, 37);
string alt2 = vS(38, 38);
string res2 = vS(40, 42);
char chain2 = vC(44);
int seq2 = vI(46, 50);
string iCode2 = vS(51, 51);
string distance = vF(63, 71);
getCategory("pdbx_validate_close_contact")->emplace({
{ "id", to_string(++id) },
{ "PDB_model_num", 1 },
{ "auth_atom_id_1", atom1 },
{ "auth_asym_id_1", string{ chain1 } },
{ "auth_comp_id_1", res1 },
{ "auth_seq_id_1", seq1 },
{ "PDB_ins_code_1", iCode1 },
{ "label_alt_id_1", alt1 },
{ "auth_atom_id_2", atom2 },
{ "auth_asym_id_2", string { chain2 } },
{ "auth_comp_id_2", res2 },
{ "auth_seq_id_2", seq2 },
{ "PDB_ins_code_2", iCode2 },
{ "label_alt_id_2", alt2 },
{ "dist", distance }
});
}
break;
}
case eCC:
{
if (not headerSeen)
headerSeen = line == "ATM1 RES C SSEQI ATM2 RES C SSEQI SSYMOP DISTANCE";
else if (line.empty())
state = eStart;
else
{
string atom1 = vS(13, 16);
string res1 = vS(19, 21);
char chain1 = vC(23);
int seq1 = vI(25, 29);
string atom2 = vS(34, 37);
string res2 = vS(40, 42);
char chain2 = vC(44);
int seq2 = vI(46, 50);
string symop = pdb2cifSymmetry(vS(54, 59));
string distance = vF(63, 71);
getCategory("pdbx_validate_symm_contact")->emplace({
{ "id", to_string(++id) },
{ "PDB_model_num", 1 },
{ "auth_atom_id_1", atom1 },
{ "auth_asym_id_1", string{ chain1 } },
{ "auth_comp_id_1", res1 },
{ "auth_seq_id_1", seq1 },
// { "PDB_ins_code_1", "" },
// { "label_alt_id_1", "" },
{ "site_symmetry_1", "1_555" },
{ "auth_atom_id_2", atom2 },
{ "auth_asym_id_2", string { chain2 } },
{ "auth_comp_id_2", res2 },
{ "auth_seq_id_2", seq2 },
// { "PDB_ins_code_2", "" },
// { "label_alt_id_2", "" },
{ "site_symmetry_2", symop },
{ "dist", distance }
});
}
break;
}
case eCBL:
{ {
if (not headerSeen) string line = vS(12);
{ smatch m;
if (ba::starts_with(line, "FORMAT: ") and line != "FORMAT: (10X,I3,1X,2(A3,1X,A1,I4,A1,1X,A4,3X),1X,F6.3)")
Error("Unexpected format in REMARK 500"); if (regex_match(line, m, rx))
{
headerSeen = line == "M RES CSSEQI ATM1 RES CSSEQI ATM2 DEVIATION"; models[0] = stoi(m[1].str());
} models[1] = stoi(m[2].str());
else if (line.empty())
state = eStart;
else
{
int model = vI(11, 13);
string resNam1 = vS(15, 17);
string chainID1 { vC(19) };
int seqNum1 = vI(20, 23);
string iCode1 { vC(24) };
string alt1 = vS(30, 30);
string atm1 = vS(26, 29);
string resNam2 = vS(33, 35);
string chainID2 { vC(37) };
int seqNum2 = vI(38, 41);
string iCode2 { vC(42) };
string alt2 = vS(48, 48);
string atm2 = vS(44, 47);
string deviation = vF(51, 57);
if (iCode1 == " ") iCode1.clear();
if (iCode2 == " ") iCode2.clear();
getCategory("pdbx_validate_rmsd_bond")->emplace({
{ "id", to_string(++id) },
{ "PDB_model_num", model ? model : 1 },
{ "auth_atom_id_1", atm1 },
{ "auth_asym_id_1", chainID1 },
{ "auth_comp_id_1", resNam1 },
{ "auth_seq_id_1", seqNum1 },
{ "PDB_ins_code_1", iCode1 },
{ "label_alt_id_1", alt1 },
{ "auth_atom_id_2", atm2 },
{ "auth_asym_id_2", chainID2 },
{ "auth_comp_id_2", resNam2 },
{ "auth_seq_id_2", seqNum2 },
{ "PDB_ins_code_2", iCode2 },
{ "label_alt_id_2", alt2 },
{ "bond_deviation",deviation }
});
}
break;
}
case eCBA:
if (not headerSeen)
{
if (ba::starts_with(line, "FORMAT: ") and line != "FORMAT: (10X,I3,1X,A3,1X,A1,I4,A1,3(1X,A4,2X),12X,F5.1)")
Error("Unexpected format in REMARK 500");
headerSeen = line == "M RES CSSEQI ATM1 ATM2 ATM3";
}
else if (line.empty())
state = eStart;
else if (vS(64) == "DEGREES")
{
int model = vI(11, 13);
string resNam = vS(15, 17);
string chainID { vC(19) };
int seqNum = vI(20, 23);
string iCode { vC(24) };
if (iCode == " ")
iCode.clear();
string atoms[3] = { vS(27, 30), vS(34, 37), vS(41, 44) };
string deviation = vF(57, 62);
getCategory("pdbx_validate_rmsd_angle")->emplace({
{ "id", to_string(++id) },
{ "PDB_model_num", model ? model : 1 },
{ "auth_atom_id_1", atoms[0] },
{ "auth_asym_id_1", chainID },
{ "auth_comp_id_1", resNam },
{ "auth_seq_id_1", seqNum },
{ "PDB_ins_code_1", iCode },
{ "auth_atom_id_2", atoms[1] },
{ "auth_asym_id_2", chainID },
{ "auth_comp_id_2", resNam },
{ "auth_seq_id_2", seqNum },
{ "PDB_ins_code_2", iCode },
{ "auth_atom_id_3", atoms[2] },
{ "auth_asym_id_3", chainID },
{ "auth_comp_id_3", resNam },
{ "auth_seq_id_3", seqNum },
{ "PDB_ins_code_3", iCode },
{ "angle_deviation",deviation }
});
}
break;
case eTA:
if (not headerSeen)
{
if (ba::starts_with(line, "FORMAT: ") and line != "FORMAT:(10X,I3,1X,A3,1X,A1,I4,A1,4X,F7.2,3X,F7.2)")
Error("Unexpected format in REMARK 500");
headerSeen = line == "M RES CSSEQI PSI PHI";
}
else if (line.empty())
state = eStart;
else
{
int model = vI(11, 13);
string resNam = vS(15, 17);
string chainID { vC(19) };
int seqNum = vI(20, 23);
string iCode { vC(24) };
if (iCode == " ")
iCode.clear();
string psi = vF(27, 35);
string phi = vF(37, 45);
getCategory("pdbx_validate_torsion")->emplace({
{ "id", to_string(++id) },
{ "PDB_model_num", model ? model : 1 },
{ "auth_comp_id", resNam },
{ "auth_asym_id", chainID },
{ "auth_seq_id", seqNum },
{ "PDB_ins_code", iCode },
{ "phi", phi },
{ "psi", psi }
});
}
break;
case eCTg:
if (not headerSeen)
headerSeen = line == "MODEL OMEGA";
else if (line.empty())
state = eStart;
else
{
int model = vI(45, 48);
string resNam1 = vS(12, 14);
string chainID1 { vC(16) };
int seqNum1 = vI(17, 21);
string iCode1 { vC(22) };
if (iCode1 == " ")
iCode1.clear();
string resNam2 = vS(27, 29);
string chainID2 { vC(31) };
int seqNum2 = vI(32, 36);
string iCode2 { vC(37) };
if (iCode2 == " ")
iCode2.clear();
string omega = vF(54, 60);
getCategory("pdbx_validate_peptide_omega")->emplace({
{ "id", to_string(++id) },
{ "PDB_model_num", model ? model : 1 },
{ "auth_comp_id_1", resNam1 },
{ "auth_asym_id_1", chainID1 },
{ "auth_seq_id_1", seqNum1 },
{ "PDB_ins_code_1", iCode1 },
{ "auth_comp_id_2", resNam2 },
{ "auth_asym_id_2", chainID2 },
{ "auth_seq_id_2", seqNum2 },
{ "PDB_ins_code_2", iCode2 },
{ "omega", omega }
});
}
break;
case ePG:
if (not headerSeen)
headerSeen = line == "M RES CSSEQI RMS TYPE";
else if (line.empty())
state = eStart;
else
{
int model = vI(11, 13);
string resNam = vS(15, 17);
string chainID { vC(19) };
int seqNum = vI(20, 23);
string iCode { vC(24) };
if (iCode == " ")
iCode.clear();
string rmsd = vF(32, 36);
string type = vS(41);
getCategory("pdbx_validate_planes")->emplace({
{ "id", to_string(++id) },
{ "PDB_model_num", model ? model : 1 },
{ "auth_comp_id", resNam },
{ "auth_asym_id", chainID },
{ "auth_seq_id", seqNum },
{ "PDB_ins_code", iCode },
{ "rmsd", rmsd },
{ "type", type }
});
} }
break; else
headerSeen = ba::contains(line, "RES C SSSEQI");
continue;
}
if (models[0] == models[1])
models[0] = models[1] = vI(12, 14);
default: string res = vS(16, 18);
state = eStart; char chain = vC(20);
break; int seq = vI(22, 26);
char iCode = vC(27);
for (int modelNr = models[0]; modelNr <= models[1]; ++modelNr)
mUnobs.push_back({modelNr, res, chain, seq, iCode});
} }
break;
} }
break; case 470:
}
case 610:
{
bool headerSeen = false;
for (; mRec->is("REMARK 610"); GetNextRecord())
{ {
if (not headerSeen) bool headerSeen = false;
{ regex rx(R"( *MODELS *(\d+)-(\d+))");
string line = vS(12); int models[2] = { -1, -1 };
headerSeen = ba::contains(line, "RES C SSEQI");
continue;
}
int modelNr = vI(12, 14);
if (modelNr == 0)
modelNr = 1;
string res = vS(16, 18);
char chain = vC(20);
int seq = vI(22, 25);
char iCode = vC(26);
auto compound = libcif::Compound::create(res);
if (compound == nullptr)
continue;
vector<string> atoms; for (; mRec->is("REMARK 470"); GetNextRecord())
for (auto atom: compound->atoms())
{ {
if (atom.typeSymbol != libcif::H) if (not headerSeen)
atoms.push_back(atom.id); {
string line = vS(12);
smatch m;
if (regex_match(line, m, rx))
{
models[0] = stoi(m[1].str());
models[1] = stoi(m[2].str());
}
else
headerSeen = ba::contains(line, "RES CSSEQI ATOMS");
continue;
}
if (models[0] == models[1])
models[0] = models[1] = vI(12, 14);
string res = vS(16, 18);
char chain = vC(20);
int seq = vI(21, 24);
char iCode = vC(25);
vector<string> atoms;
string atomStr = mRec->vS(29);
for (auto i = make_split_iterator(atomStr, ba::token_finder(ba::is_any_of(" "), ba::token_compress_on)); not i.eof(); ++i)
atoms.push_back({ i-> begin(), i->end() });
for (int modelNr = models[0]; modelNr <= models[1]; ++modelNr)
mUnobs.push_back({modelNr, res, chain, seq, iCode, atoms});
} }
mUnobs.push_back({modelNr, res, chain, seq, iCode, { atoms }}); break;
} }
break; case 500:
}
case 800:
{
const regex rx1(R"(SITE_IDENTIFIER: (.+))"),
rx2(R"(EVIDENCE_CODE: (.+))"),
rx3(R"(SITE_DESCRIPTION: (binding site for residue ([[:alnum:]]{1,3}) ([[:alnum:]]) (\d+)|.+))", regex_constants::icase);
string id, evidence, desc;
string pdbxAuthAsymId, pdbxAuthCompId, pdbxAuthSeqId, pdbxAuthInsCode;
smatch m;
enum State { sStart, sId, sEvidence, sDesc, sDesc2 } state = sStart;
auto store = [&]()
{ {
// Find the matching SITE record GetNextRecord();
auto site = FindRecord([id](PDBRecord& r) -> bool
{
return r.is("SITE ") and r.vS(12, 14) == id;
});
if (site == nullptr) enum State { eStart, eCCinSAU, eCC, eCBL, eCBA, eTA, eCTg, ePG, eMCP, eChC } state = eStart;
throw runtime_error("Invalid REMARK 800, no SITE record for id " + id); bool headerSeen = false;
int id = 0;
// next record, store what we have for (; mRec->is("REMARK 500"); GetNextRecord())
getCategory("struct_site")->emplace({
{ "id", id },
{ "details", desc },
{ "pdbx_auth_asym_id", pdbxAuthAsymId },
{ "pdbx_auth_comp_id", pdbxAuthCompId },
{ "pdbx_auth_seq_id", pdbxAuthSeqId },
{ "pdbx_num_residues", site->vI(16, 17) },
{ "pdbx_evidence_code", evidence }
});
};
for ( ; mRec->is("REMARK 800"); GetNextRecord())
{
string s = mRec->vS(12);
if (s.empty())
continue;
switch (state)
{ {
case sStart: string line = vS(12);
if (s == "SITE")
state = sId; if (line == "GEOMETRY AND STEREOCHEMISTRY")
else if (VERBOSE) continue;
Error("Invalid REMARK 800 record, expected SITE");
break;
case sId: switch (state)
if (regex_match(s, m, rx1)) {
case eStart:
{ {
id = m[1].str(); if (line.empty() or not ba::starts_with(line, "SUBTOPIC: "))
state = sEvidence; continue;
string subtopic = line.substr(10);
if (subtopic == "CLOSE CONTACTS IN SAME ASYMMETRIC UNIT") state = eCCinSAU;
else if (subtopic == "CLOSE CONTACTS") state = eCC;
else if (subtopic == "COVALENT BOND LENGTHS") state = eCBL;
else if (subtopic == "COVALENT BOND ANGLES") state = eCBA;
else if (subtopic == "TORSION ANGLES") state = eTA;
else if (subtopic == "NON-CIS, NON-TRANS") state = eCTg;
else if (subtopic == "PLANAR GROUPS") state = ePG;
else if (subtopic == "MAIN CHAIN PLANARITY") state = eMCP;
else if (subtopic == "CHIRAL CENTERS") state = eChC;
else if (VERBOSE)
throw runtime_error("Unknown subtopic in REMARK 500: " + subtopic);
headerSeen = false;
id = 0;
break;
} }
else if (VERBOSE)
Error("Invalid REMARK 800 record, expected SITE_IDENTIFIER"); case eCCinSAU:
break;
case sEvidence:
if (regex_match(s, m, rx2))
{ {
evidence = m[1].str(); if (not headerSeen)
state = sDesc; headerSeen =
line == "ATM1 RES C SSEQI ATM2 RES C SSEQI DISTANCE";
else if (line.empty())
state = eStart;
else
{
string atom1 = vS(13, 16);
string res1 = vS(19, 21);
string alt1 = vS(17, 17);
char chain1 = vC(23);
int seq1 = vI(25, 29);
string iCode1 = vS(30, 30);
string atom2 = vS(34, 37);
string alt2 = vS(38, 38);
string res2 = vS(40, 42);
char chain2 = vC(44);
int seq2 = vI(46, 50);
string iCode2 = vS(51, 51);
string distance = vF(63, 71);
getCategory("pdbx_validate_close_contact")->emplace({
{ "id", to_string(++id) },
{ "PDB_model_num", 1 },
{ "auth_atom_id_1", atom1 },
{ "auth_asym_id_1", string{ chain1 } },
{ "auth_comp_id_1", res1 },
{ "auth_seq_id_1", seq1 },
{ "PDB_ins_code_1", iCode1 },
{ "label_alt_id_1", alt1 },
{ "auth_atom_id_2", atom2 },
{ "auth_asym_id_2", string { chain2 } },
{ "auth_comp_id_2", res2 },
{ "auth_seq_id_2", seq2 },
{ "PDB_ins_code_2", iCode2 },
{ "label_alt_id_2", alt2 },
{ "dist", distance }
});
}
break;
} }
else if (VERBOSE)
Error("Invalid REMARK 800 record, expected SITE_IDENTIFIER"); case eCC:
break;
case sDesc:
if (regex_match(s, m, rx3))
{ {
desc = m[1].str(); if (not headerSeen)
pdbxAuthCompId = m[2].str(); headerSeen = line == "ATM1 RES C SSEQI ATM2 RES C SSEQI SSYMOP DISTANCE";
pdbxAuthAsymId = m[3].str(); else if (line.empty())
pdbxAuthSeqId = m[4].str(); state = eStart;
else
state = sDesc2; {
string atom1 = vS(13, 16);
string res1 = vS(19, 21);
char chain1 = vC(23);
int seq1 = vI(25, 29);
string atom2 = vS(34, 37);
string res2 = vS(40, 42);
char chain2 = vC(44);
int seq2 = vI(46, 50);
string symop = pdb2cifSymmetry(vS(54, 59));
string distance = vF(63, 71);
getCategory("pdbx_validate_symm_contact")->emplace({
{ "id", to_string(++id) },
{ "PDB_model_num", 1 },
{ "auth_atom_id_1", atom1 },
{ "auth_asym_id_1", string{ chain1 } },
{ "auth_comp_id_1", res1 },
{ "auth_seq_id_1", seq1 },
// { "PDB_ins_code_1", "" },
// { "label_alt_id_1", "" },
{ "site_symmetry_1", "1_555" },
{ "auth_atom_id_2", atom2 },
{ "auth_asym_id_2", string { chain2 } },
{ "auth_comp_id_2", res2 },
{ "auth_seq_id_2", seq2 },
// { "PDB_ins_code_2", "" },
// { "label_alt_id_2", "" },
{ "site_symmetry_2", symop },
{ "dist", distance }
});
}
break;
} }
break;
case eCBL:
case sDesc2:
if (regex_match(s, m, rx1))
{ {
store(); if (not headerSeen)
{
if (ba::starts_with(line, "FORMAT: ") and line != "FORMAT: (10X,I3,1X,2(A3,1X,A1,I4,A1,1X,A4,3X),1X,F6.3)")
throw runtime_error("Unexpected format in REMARK 500");
headerSeen = line == "M RES CSSEQI ATM1 RES CSSEQI ATM2 DEVIATION";
}
else if (line.empty())
state = eStart;
else
{
int model = vI(11, 13);
string resNam1 = vS(15, 17);
string chainID1 { vC(19) };
int seqNum1 = vI(20, 23);
string iCode1 { vC(24) };
string alt1 = vS(30, 30);
string atm1 = vS(26, 29);
string resNam2 = vS(33, 35);
string chainID2 { vC(37) };
int seqNum2 = vI(38, 41);
string iCode2 { vC(42) };
string alt2 = vS(48, 48);
string atm2 = vS(44, 47);
string deviation = vF(51, 57);
if (iCode1 == " ") iCode1.clear();
if (iCode2 == " ") iCode2.clear();
getCategory("pdbx_validate_rmsd_bond")->emplace({
{ "id", to_string(++id) },
{ "PDB_model_num", model ? model : 1 },
{ "auth_atom_id_1", atm1 },
{ "auth_asym_id_1", chainID1 },
{ "auth_comp_id_1", resNam1 },
{ "auth_seq_id_1", seqNum1 },
{ "PDB_ins_code_1", iCode1 },
{ "label_alt_id_1", alt1 },
{ "auth_atom_id_2", atm2 },
{ "auth_asym_id_2", chainID2 },
{ "auth_comp_id_2", resNam2 },
{ "auth_seq_id_2", seqNum2 },
{ "PDB_ins_code_2", iCode2 },
{ "label_alt_id_2", alt2 },
{ "bond_deviation",deviation }
});
}
id = m[1].str(); break;
state = sEvidence;
evidence.clear();
desc.clear();
} }
else
desc = desc + ' ' + s; case eCBA:
break; if (not headerSeen)
{
if (ba::starts_with(line, "FORMAT: ") and line != "FORMAT: (10X,I3,1X,A3,1X,A1,I4,A1,3(1X,A4,2X),12X,F5.1)")
throw runtime_error("Unexpected format in REMARK 500");
headerSeen = line == "M RES CSSEQI ATM1 ATM2 ATM3";
}
else if (line.empty())
state = eStart;
else if (vS(64) == "DEGREES")
{
int model = vI(11, 13);
string resNam = vS(15, 17);
string chainID { vC(19) };
int seqNum = vI(20, 23);
string iCode { vC(24) };
if (iCode == " ")
iCode.clear();
string atoms[3] = { vS(27, 30), vS(34, 37), vS(41, 44) };
string deviation = vF(57, 62);
if (deviation == "*****")
deviation.clear();
getCategory("pdbx_validate_rmsd_angle")->emplace({
{ "id", to_string(++id) },
{ "PDB_model_num", model ? model : 1 },
{ "auth_atom_id_1", atoms[0] },
{ "auth_asym_id_1", chainID },
{ "auth_comp_id_1", resNam },
{ "auth_seq_id_1", seqNum },
{ "PDB_ins_code_1", iCode },
{ "auth_atom_id_2", atoms[1] },
{ "auth_asym_id_2", chainID },
{ "auth_comp_id_2", resNam },
{ "auth_seq_id_2", seqNum },
{ "PDB_ins_code_2", iCode },
{ "auth_atom_id_3", atoms[2] },
{ "auth_asym_id_3", chainID },
{ "auth_comp_id_3", resNam },
{ "auth_seq_id_3", seqNum },
{ "PDB_ins_code_3", iCode },
{ "angle_deviation",deviation }
});
}
break;
case eTA:
if (not headerSeen)
{
if (ba::starts_with(line, "FORMAT: ") and line != "FORMAT:(10X,I3,1X,A3,1X,A1,I4,A1,4X,F7.2,3X,F7.2)")
throw runtime_error("Unexpected format in REMARK 500");
headerSeen = line == "M RES CSSEQI PSI PHI";
}
else if (line.empty())
state = eStart;
else
{
int model = vI(11, 13);
string resNam = vS(15, 17);
string chainID { vC(19) };
int seqNum = vI(20, 23);
string iCode { vC(24) };
if (iCode == " ")
iCode.clear();
string psi = vF(27, 35);
string phi = vF(37, 45);
getCategory("pdbx_validate_torsion")->emplace({
{ "id", to_string(++id) },
{ "PDB_model_num", model ? model : 1 },
{ "auth_comp_id", resNam },
{ "auth_asym_id", chainID },
{ "auth_seq_id", seqNum },
{ "PDB_ins_code", iCode },
{ "phi", phi },
{ "psi", psi }
});
}
break;
case eCTg:
if (not headerSeen)
headerSeen = line == "MODEL OMEGA";
else if (line.empty())
state = eStart;
else
{
int model = vI(45, 48);
string resNam1 = vS(12, 14);
string chainID1 { vC(16) };
int seqNum1 = vI(17, 21);
string iCode1 { vC(22) };
if (iCode1 == " ")
iCode1.clear();
string resNam2 = vS(27, 29);
string chainID2 { vC(31) };
int seqNum2 = vI(32, 36);
string iCode2 { vC(37) };
if (iCode2 == " ")
iCode2.clear();
string omega = vF(54, 60);
getCategory("pdbx_validate_peptide_omega")->emplace({
{ "id", to_string(++id) },
{ "PDB_model_num", model ? model : 1 },
{ "auth_comp_id_1", resNam1 },
{ "auth_asym_id_1", chainID1 },
{ "auth_seq_id_1", seqNum1 },
{ "PDB_ins_code_1", iCode1 },
{ "auth_comp_id_2", resNam2 },
{ "auth_asym_id_2", chainID2 },
{ "auth_seq_id_2", seqNum2 },
{ "PDB_ins_code_2", iCode2 },
{ "omega", omega }
});
}
break;
case ePG:
if (not headerSeen)
headerSeen = line == "M RES CSSEQI RMS TYPE";
else if (line.empty())
state = eStart;
else
{
int model = vI(11, 13);
string resNam = vS(15, 17);
string chainID { vC(19) };
int seqNum = vI(20, 23);
string iCode { vC(24) };
if (iCode == " ")
iCode.clear();
string rmsd = vF(32, 36);
string type = vS(41);
getCategory("pdbx_validate_planes")->emplace({
{ "id", to_string(++id) },
{ "PDB_model_num", model ? model : 1 },
{ "auth_comp_id", resNam },
{ "auth_asym_id", chainID },
{ "auth_seq_id", seqNum },
{ "PDB_ins_code", iCode },
{ "rmsd", rmsd },
{ "type", type }
});
}
break;
default:
state = eStart;
break;
}
} }
break;
} }
if (not id.empty()) case 610:
store();
break;
}
case 999:
{
stringstream s;
GetNextRecord();
if (vS(12) == "SEQUENCE")
GetNextRecord();
while (mRec->is("REMARK 999"))
{ {
s << vS(12) << endl; bool headerSeen = false;
GetNextRecord();
for (; mRec->is("REMARK 610"); GetNextRecord())
{
if (not headerSeen)
{
string line = vS(12);
headerSeen = ba::contains(line, "RES C SSEQI");
continue;
}
int modelNr = vI(12, 14);
if (modelNr == 0)
modelNr = 1;
string res = vS(16, 18);
char chain = vC(20);
int seq = vI(22, 25);
char iCode = vC(26);
auto compound = libcif::Compound::create(res);
if (compound == nullptr)
continue;
vector<string> atoms;
for (auto atom: compound->atoms())
{
if (atom.typeSymbol != libcif::H)
atoms.push_back(atom.id);
}
mUnobs.push_back({modelNr, res, chain, seq, iCode, { atoms }});
}
break;
} }
sequenceDetails = s.str(); case 800:
break; {
} const regex rx1(R"(SITE_IDENTIFIER: (.+))"),
rx2(R"(EVIDENCE_CODE: (.+))"),
// these are skipped rx3(R"(SITE_DESCRIPTION: (binding site for residue ([[:alnum:]]{1,3}) ([[:alnum:]]) (\d+)|.+))", regex_constants::icase);
case 2: string id, evidence, desc;
case 290: string pdbxAuthAsymId, pdbxAuthCompId, pdbxAuthSeqId, pdbxAuthInsCode;
case 300: smatch m;
GetNextRecord();
break; enum State { sStart, sId, sEvidence, sDesc, sDesc2 } state = sStart;
default: auto store = [&]()
{ {
string skipped = mRec->mName; // Find the matching SITE record
auto site = FindRecord([id](PDBRecord& r) -> bool
stringstream s; {
return r.is("SITE ") and r.vS(12, 14) == id;
});
if (site == nullptr)
throw runtime_error("Invalid REMARK 800, no SITE record for id " + id);
// next record, store what we have
getCategory("struct_site")->emplace({
{ "id", id },
{ "details", desc },
{ "pdbx_auth_asym_id", pdbxAuthAsymId },
{ "pdbx_auth_comp_id", pdbxAuthCompId },
{ "pdbx_auth_seq_id", pdbxAuthSeqId },
{ "pdbx_num_residues", site->vI(16, 17) },
{ "pdbx_evidence_code", evidence }
});
};
for ( ; mRec->is("REMARK 800"); GetNextRecord())
{
string s = mRec->vS(12);
if (s.empty())
continue;
switch (state)
{
case sStart:
if (s == "SITE")
state = sId;
else if (VERBOSE)
throw runtime_error("Invalid REMARK 800 record, expected SITE");
break;
case sId:
if (regex_match(s, m, rx1))
{
id = m[1].str();
state = sEvidence;
}
else if (VERBOSE)
throw runtime_error("Invalid REMARK 800 record, expected SITE_IDENTIFIER");
break;
case sEvidence:
if (regex_match(s, m, rx2))
{
evidence = m[1].str();
state = sDesc;
}
else if (VERBOSE)
throw runtime_error("Invalid REMARK 800 record, expected SITE_IDENTIFIER");
break;
case sDesc:
if (regex_match(s, m, rx3))
{
desc = m[1].str();
pdbxAuthCompId = m[2].str();
pdbxAuthAsymId = m[3].str();
pdbxAuthSeqId = m[4].str();
state = sDesc2;
}
break;
case sDesc2:
if (regex_match(s, m, rx1))
{
store();
id = m[1].str();
state = sEvidence;
evidence.clear();
desc.clear();
}
else
desc = desc + ' ' + s;
break;
}
}
if (not id.empty())
store();
break;
}
if (not mRec->vS(11).empty()) case 999:
s << mRec->vS(11) << endl; {
GetNextRecord(); stringstream s;
GetNextRecord();
if (vS(12) == "SEQUENCE")
GetNextRecord();
while (mRec->is("REMARK 999"))
{
s << vS(12) << endl;
GetNextRecord();
}
while (mRec->is(skipped.c_str())) sequenceDetails = s.str();
break;
}
// these are skipped
case 2:
case 290:
case 300:
GetNextRecord();
break;
default:
{ {
s << mRec->vS(11) << endl; string skipped = mRec->mName;
stringstream s;
if (not mRec->vS(11).empty())
s << mRec->vS(11) << endl;
GetNextRecord(); GetNextRecord();
while (mRec->is(skipped.c_str()))
{
s << mRec->vS(11) << endl;
GetNextRecord();
}
getCategory("pdbx_database_remark")->emplace({
{ "id", remarkNr },
{ "text", s.str() }
});
break;
} }
getCategory("pdbx_database_remark")->emplace({
{ "id", remarkNr },
{ "text", s.str() }
});
break;
} }
} }
catch (const exception& ex)
{
throw_with_nested(runtime_error("Error parsing REMARK " + to_string(remarkNr)));
}
} }
if (not (compoundDetails.empty() and sequenceDetails.empty() and sourceDetails.empty())) if (not (compoundDetails.empty() and sequenceDetails.empty() and sourceDetails.empty()))
...@@ -2566,11 +2608,24 @@ void PDBFileParser::ParseRemark200() ...@@ -2566,11 +2608,24 @@ void PDBFileParser::ParseRemark200()
{ "crystal_id", 1 } { "crystal_id", 1 }
}); });
string collectionDate;
boost::system::error_code ec;
collectionDate = pdb2cifDate(rm200("DATE OF DATA COLLECTION", diffrnNr), ec);
if (ec)
{
if (VERBOSE)
cerr << ec.message() << " for pdbx_collection_date" << endl;
// The date field can become truncated when multiple values are available
if (diffrnNr != 1)
collectionDate.clear();
}
getCategory("diffrn_detector")->emplace({ getCategory("diffrn_detector")->emplace({
{ "diffrn_id", diffrnNr }, { "diffrn_id", diffrnNr },
{ "detector", rm200("DETECTOR TYPE", diffrnNr) }, { "detector", rm200("DETECTOR TYPE", diffrnNr) },
{ "type", rm200("DETECTOR MANUFACTURER", diffrnNr) }, { "type", rm200("DETECTOR MANUFACTURER", diffrnNr) },
{ "pdbx_collection_date", pdb2cifDate(rm200("DATE OF DATA COLLECTION", diffrnNr)) }, { "pdbx_collection_date", collectionDate },
{ "details", rm200("OPTICS", diffrnNr) } { "details", rm200("OPTICS", diffrnNr) }
}); });
...@@ -2586,7 +2641,7 @@ void PDBFileParser::ParseRemark200() ...@@ -2586,7 +2641,7 @@ void PDBFileParser::ParseRemark200()
vector<string> wavelengths; vector<string> wavelengths;
string wl = rm200("WAVELENGTH OR RANGE (A)", diffrnNr); string wl = rm200("WAVELENGTH OR RANGE (A)", diffrnNr);
ba::split(wavelengths, wl, ba::is_any_of(", "), ba::token_compress_on); ba::split(wavelengths, wl, ba::is_any_of(", -"), ba::token_compress_on);
diffrnWaveLengths.insert(wavelengths.begin(), wavelengths.end()); diffrnWaveLengths.insert(wavelengths.begin(), wavelengths.end());
...@@ -2997,7 +3052,7 @@ void PDBFileParser::ParsePrimaryStructure() ...@@ -2997,7 +3052,7 @@ void PDBFileParser::ParsePrimaryStructure()
else if (mRec->is("DBREF2")) // 1 - 6 Record name "DBREF2" else if (mRec->is("DBREF2")) // 1 - 6 Record name "DBREF2"
{ // 8 - 11 IDcode idCode ID code of this datablock. { // 8 - 11 IDcode idCode ID code of this datablock.
if (vC(13) != cur.chainID) // 13 Character chainID Chain identifier. if (vC(13) != cur.chainID) // 13 Character chainID Chain identifier.
Error("Chain ID's for DBREF1/DBREF2 records do not match"); throw runtime_error("Chain ID's for DBREF1/DBREF2 records do not match");
cur.dbAccession = vS(19, 40); // 19 - 40 LString dbAccession Sequence database accession code, cur.dbAccession = vS(19, 40); // 19 - 40 LString dbAccession Sequence database accession code,
// left justified. // left justified.
cur.dbSeqBegin = vI(46, 55); // 46 - 55 Integer seqBegin Initial sequence number of the cur.dbSeqBegin = vI(46, 55); // 46 - 55 Integer seqBegin Initial sequence number of the
...@@ -3096,25 +3151,32 @@ void PDBFileParser::ParseHeterogen() ...@@ -3096,25 +3151,32 @@ void PDBFileParser::ParseHeterogen()
GetNextRecord(); GetNextRecord();
} }
while (mRec->is("HETNAM")) // 1 - 6 Record name "HETNAM" for (;;)
{ // 9 - 10 Continuation continuation Allows concatenation of multiple records. {
string hetID = vS(12, 14); // 12 - 14 LString(3) hetID Het identifier, right-justified. if (mRec->is("HETNAM")) // 1 - 6 Record name "HETNAM"
string text = vS(16); // 16 - 70 String text Chemical name. { // 9 - 10 Continuation continuation Allows concatenation of multiple records.
string hetID = vS(12, 14); // 12 - 14 LString(3) hetID Het identifier, right-justified.
mHetnams[hetID] = text; string text = vS(16); // 16 - 70 String text Chemical name.
InsertChemComp(hetID);
mHetnams[hetID] = text;
InsertChemComp(hetID);
GetNextRecord();
continue;
}
if (mRec->is("HETSYN")) // 1 - 6 Record name "HETSYN"
{ // 9 - 10 Continuation continuation Allows concatenation of multiple records.
string hetID = vS(12, 14); // 12 - 14 LString(3) hetID Het identifier, right-justified.
string syn = vS(16); // 16 - 70 SList hetSynonyms List of synonyms.
mHetsyns[hetID] = syn;
GetNextRecord();
continue;
}
GetNextRecord(); break;
}
while (mRec->is("HETSYN")) // 1 - 6 Record name "HETSYN"
{ // 9 - 10 Continuation continuation Allows concatenation of multiple records.
string hetID = vS(12, 14); // 12 - 14 LString(3) hetID Het identifier, right-justified.
string syn = vS(16); // 16 - 70 SList hetSynonyms List of synonyms.
mHetsyns[hetID] = syn;
GetNextRecord();
} }
while (mRec->is("FORMUL")) // 1 - 6 Record name "FORMUL" while (mRec->is("FORMUL")) // 1 - 6 Record name "FORMUL"
...@@ -3145,7 +3207,7 @@ void PDBFileParser::ConstructEntities() ...@@ -3145,7 +3207,7 @@ void PDBFileParser::ConstructEntities()
{ {
if (r->is("MODEL ")) if (r->is("MODEL "))
{ {
modelNr = vI(11, 14); modelNr = r->vI(11, 14);
continue; continue;
} }
...@@ -3245,6 +3307,8 @@ void PDBFileParser::ConstructEntities() ...@@ -3245,6 +3307,8 @@ void PDBFileParser::ConstructEntities()
} }
} }
set<char> terminatedChains;
for (auto r = mData; r != nullptr; r = r->mNext) for (auto r = mData; r != nullptr; r = r->mNext)
{ {
if (r->is("ATOM ") or r->is("HETATM")) if (r->is("ATOM ") or r->is("HETATM"))
...@@ -3295,7 +3359,8 @@ void PDBFileParser::ConstructEntities() ...@@ -3295,7 +3359,8 @@ void PDBFileParser::ConstructEntities()
} }
} }
if (r->is("HETATM")) // There appears to be a program that writes out HETATM records as ATOM records....
if (r->is("HETATM") or terminatedChains.count(chainID))
{ {
if (isWater(resName)) if (isWater(resName))
mWaterHetId = resName; mWaterHetId = resName;
...@@ -3317,6 +3382,12 @@ void PDBFileParser::ConstructEntities() ...@@ -3317,6 +3382,12 @@ void PDBFileParser::ConstructEntities()
continue; continue;
} }
if (r->is("TER "))
{
char chainID = r->vC(22); // 22 Character chainID Chain identifier.
terminatedChains.insert(chainID);
}
} }
// Create missing compounds // Create missing compounds
...@@ -3371,32 +3442,38 @@ void PDBFileParser::ConstructEntities() ...@@ -3371,32 +3442,38 @@ void PDBFileParser::ConstructEntities()
int seqNr = 1; int seqNr = 1;
for (auto& res: chain.mSeqres) for (auto& res: chain.mSeqres)
{ {
string authMonId, authSeqNum;
if (res.mSeen)
{
authMonId = res.mMonId;
authSeqNum = to_string(res.mSeqNum);
}
mChainSeq2AsymSeq[make_tuple(chain.mDbref.chainID, res.mSeqNum, res.mIcode)] = make_tuple(asymId, seqNr, true); mChainSeq2AsymSeq[make_tuple(chain.mDbref.chainID, res.mSeqNum, res.mIcode)] = make_tuple(asymId, seqNr, true);
string seqId = to_string(seqNr); string seqId = to_string(seqNr);
++seqNr; ++seqNr;
cat->emplace({ set<string> monIds = { res.mMonId };
{ "asym_id", asymId }, monIds.insert(res.mAlts.begin(), res.mAlts.end());
{ "entity_id", mMolID2EntityID[chain.mMolId] },
{ "seq_id", seqId }, for (string monId: monIds)
{ "mon_id", res.mMonId }, {
{ "ndb_seq_num", seqId }, string authMonId, authSeqNum;
{ "pdb_seq_num", res.mSeqNum }, if (res.mSeen)
{ "auth_seq_num", authSeqNum }, {
{ "pdb_mon_id", authMonId }, authMonId = monId;
{ "auth_mon_id", authMonId }, authSeqNum = to_string(res.mSeqNum);
{ "pdb_strand_id", string{chain.mDbref.chainID} }, }
{ "pdb_ins_code", (res.mIcode == ' ' or res.mIcode == 0) ? "." : string{res.mIcode} },
{ "hetero", res.mAlts.empty() ? "n" : "y" } cat->emplace({
}); { "asym_id", asymId },
{ "entity_id", mMolID2EntityID[chain.mMolId] },
{ "seq_id", seqId },
{ "mon_id", monId },
{ "ndb_seq_num", seqId },
{ "pdb_seq_num", res.mSeqNum },
{ "auth_seq_num", authSeqNum },
{ "pdb_mon_id", authMonId },
{ "auth_mon_id", authMonId },
{ "pdb_strand_id", string{chain.mDbref.chainID} },
{ "pdb_ins_code", (res.mIcode == ' ' or res.mIcode == 0) ? "." : string{res.mIcode} },
{ "hetero", res.mAlts.empty() ? "n" : "y" }
});
}
} }
} }
...@@ -3431,9 +3508,12 @@ void PDBFileParser::ConstructEntities() ...@@ -3431,9 +3508,12 @@ void PDBFileParser::ConstructEntities()
{ "gene_src_common_name", cmp.mSource["ORGANISM_COMMON"] }, { "gene_src_common_name", cmp.mSource["ORGANISM_COMMON"] },
{ "pdbx_gene_src_gene", cmp.mSource["GENE"] }, { "pdbx_gene_src_gene", cmp.mSource["GENE"] },
{ "gene_src_strain", cmp.mSource["STRAIN"] }, { "gene_src_strain", cmp.mSource["STRAIN"] },
{ "gene_src_tissue", cmp.mSource["TISSUE"] },
{ "gene_src_tissue_fraction", cmp.mSource["TISSUE_FRACTION"] },
{ "pdbx_gene_src_cell_line", cmp.mSource["CELL_LINE"] }, { "pdbx_gene_src_cell_line", cmp.mSource["CELL_LINE"] },
{ "pdbx_gene_src_organelle", cmp.mSource["ORGANELLE"] }, { "pdbx_gene_src_organelle", cmp.mSource["ORGANELLE"] },
{ "pdbx_gene_src_cellular_location", cmp.mSource["CELLULAR_LOCATION"] }, { "pdbx_gene_src_cellular_location", cmp.mSource["CELLULAR_LOCATION"] },
{ "host_org_common_name", cmp.mSource["EXPRESSION_SYSTEM_COMMON"] },
{ "pdbx_gene_src_scientific_name", cmp.mSource["ORGANISM_SCIENTIFIC"] }, { "pdbx_gene_src_scientific_name", cmp.mSource["ORGANISM_SCIENTIFIC"] },
{ "pdbx_gene_src_ncbi_taxonomy_id", cmp.mSource["ORGANISM_TAXID"] }, { "pdbx_gene_src_ncbi_taxonomy_id", cmp.mSource["ORGANISM_TAXID"] },
{ "pdbx_host_org_scientific_name", cmp.mSource["EXPRESSION_SYSTEM"] }, { "pdbx_host_org_scientific_name", cmp.mSource["EXPRESSION_SYSTEM"] },
...@@ -4613,9 +4693,21 @@ void PDBFileParser::ParseCrystallographic() ...@@ -4613,9 +4693,21 @@ void PDBFileParser::ParseCrystallographic()
{ "Z_PDB", vF(67, 70) } // 67 - 70 Integer z Z value. { "Z_PDB", vF(67, 70) } // 67 - 70 Integer z Z value.
}); });
string spageGroup, intTablesNr;
try
{
spageGroup = vS(56, 66);
clipper::Spacegroup sg(clipper::Spgr_descr{spageGroup});
intTablesNr = to_string(sg.spacegroup_number());
}
catch (...)
{
}
getCategory("symmetry")->emplace({ getCategory("symmetry")->emplace({
{ "entry_id", mStructureId }, { "entry_id", mStructureId },
{ "space_group_name_H-M", vS(56, 66) } { "space_group_name_H-M", spageGroup },
{ "Int_Tables_number", intTablesNr }
}); });
GetNextRecord(); GetNextRecord();
...@@ -4901,7 +4993,7 @@ void PDBFileParser::ParseCoordinate(int modelNr) ...@@ -4901,7 +4993,7 @@ void PDBFileParser::ParseCoordinate(int modelNr)
int u23 = vI(64, 70); // 64 - 70 Integer u[1][2] U(2,3) int u23 = vI(64, 70); // 64 - 70 Integer u[1][2] U(2,3)
if (vS(7, 11) + vS(77, 80) != check) if (vS(7, 11) + vS(77, 80) != check)
Error("ANISOU record should follow corresponding ATOM record"); throw runtime_error("ANISOU record should follow corresponding ATOM record");
auto f = [](float f) -> string { return (boost::format("%6.4f") % f).str(); }; auto f = [](float f) -> string { return (boost::format("%6.4f") % f).str(); };
...@@ -4912,7 +5004,7 @@ void PDBFileParser::ParseCoordinate(int modelNr) ...@@ -4912,7 +5004,7 @@ void PDBFileParser::ParseCoordinate(int modelNr)
{ "pdbx_label_alt_id", altLoc != ' ' ? string { altLoc } : "." }, { "pdbx_label_alt_id", altLoc != ' ' ? string { altLoc } : "." },
{ "pdbx_label_comp_id", resName }, { "pdbx_label_comp_id", resName },
{ "pdbx_label_asym_id", asymId }, { "pdbx_label_asym_id", asymId },
{ "pdbx_label_seq_id", seqId }, { "pdbx_label_seq_id", (isResseq and seqId > 0) ? to_string(seqId) : "." },
{ "U[1][1]", f(u11 / 10000.f) }, { "U[1][1]", f(u11 / 10000.f) },
{ "U[2][2]", f(u22 / 10000.f) }, { "U[2][2]", f(u22 / 10000.f) },
{ "U[3][3]", f(u33 / 10000.f) }, { "U[3][3]", f(u33 / 10000.f) },
...@@ -5028,12 +5120,9 @@ void PDBFileParser::Parse(istream& is, cif::File& result) ...@@ -5028,12 +5120,9 @@ void PDBFileParser::Parse(istream& is, cif::File& result)
} }
catch (const exception& ex) catch (const exception& ex)
{ {
cerr << "Error parsing REMARK 3: " << endl throw_with_nested(runtime_error("Error parsing REMARK 3"));
<< ex.what() << endl;
} }
// //
//
//
// auto cat = getCategory("pdbx_refine_tls_group"); // auto cat = getCategory("pdbx_refine_tls_group");
// for (Row r: *cat) // for (Row r: *cat)
// { // {
...@@ -5062,7 +5151,10 @@ void PDBFileParser::Parse(istream& is, cif::File& result) ...@@ -5062,7 +5151,10 @@ void PDBFileParser::Parse(istream& is, cif::File& result)
} }
catch (const exception& ex) catch (const exception& ex)
{ {
Error(ex.what()); if (mRec != nullptr)
throw_with_nested(runtime_error("Error parsing PDB at line " + to_string(mRec->mLineNr)));
else
throw;
} }
} }
...@@ -5175,36 +5267,75 @@ void PDBFileParser::PDBChain::AlignResToSeqRes() ...@@ -5175,36 +5267,75 @@ void PDBFileParser::PDBChain::AlignResToSeqRes()
x = highX; x = highX;
y = highY; y = highY;
while (x >= 0 and y >= 0) try
{ {
switch (tb(x, y)) while (x >= 0 and y >= 0)
{ {
case -1: switch (tb(x, y))
throw runtime_error("A residue found in the ATOM records (" + ry[y].mMonId + {
" @ " + string{mDbref.chainID} + ":" + to_string(ry[y].mSeqNum) + case -1:
") was not found in the SEQRES records"); throw runtime_error("A residue found in the ATOM records (" + ry[y].mMonId +
--y; " @ " + string{mDbref.chainID} + ":" + to_string(ry[y].mSeqNum) +
break; ((ry[y].mIcode == ' ' or ry[y].mIcode == 0) ? "" : string{ ry[y].mIcode })+
") was not found in the SEQRES records");
--y;
break;
case 1:
if (VERBOSE > 3)
cerr << "Missing residue in ATOM records: " << rx[x].mMonId << " at " << rx[x].mSeqNum << endl;
--x;
break;
case 0:
if (VERBOSE > 3 and rx[x].mMonId != ry[y].mMonId)
cerr << "Warning, unaligned residues at " << x << "/" << y << "(" << rx[x].mMonId << '/' << ry[y].mMonId << ')' << endl;
else if (VERBOSE > 4)
cerr << rx[x].mMonId << " -> " << ry[y].mMonId << " (" << ry[y].mSeqNum << ')' << endl;
rx[x].mSeqNum = ry[y].mSeqNum;
rx[x].mIcode = ry[y].mIcode;
--x;
--y;
}
}
}
catch (const exception& ex)
{
if (VERBOSE)
{
std::vector<pair<string,string>> alignment;
case 1: for (x = highX, y = highY; x >= 0 and y >= 0; )
if (VERBOSE > 3) {
cerr << "Missing residue in ATOM records: " << rx[x].mMonId << " at " << rx[x].mSeqNum << endl; switch (tb(x, y))
{
--x; case -1:
break; alignment.push_back(make_pair("...", ry[y].mMonId));
--y;
break;
case 1:
alignment.push_back(make_pair(rx[x].mMonId, "..."));
--x;
break;
case 0:
alignment.push_back(make_pair(rx[x].mMonId, ry[y].mMonId));
--x;
--y;
break;
}
}
case 0: reverse(alignment.begin(), alignment.end());
if (VERBOSE > 3 and rx[x].mMonId != ry[y].mMonId) for (auto a: alignment)
cerr << "Warning, unaligned residues at " << x << "/" << y << "(" << rx[x].mMonId << '/' << ry[y].mMonId << ')' << endl; cerr << " " << a.first << " -- " << a.second << endl;
else if (VERBOSE > 4)
cerr << rx[x].mMonId << " -> " << ry[y].mMonId << " (" << ry[y].mSeqNum << ')' << endl;
rx[x].mSeqNum = ry[y].mSeqNum;
rx[x].mIcode = ry[y].mIcode;
--x;
--y;
} }
throw;
} }
// assign numbers to the residues that don't have them yet // assign numbers to the residues that don't have them yet
...@@ -5248,15 +5379,7 @@ void ReadPDBFile(istream& pdbFile, cif::File& cifFile) ...@@ -5248,15 +5379,7 @@ void ReadPDBFile(istream& pdbFile, cif::File& cifFile)
cifFile.loadDictionary("mmcif_pdbx"); cifFile.loadDictionary("mmcif_pdbx");
try p.Parse(pdbFile, cifFile);
{
p.Parse(pdbFile, cifFile);
}
catch (const exception& ex)
{
cerr << "Error parsing PDB file" << endl;
throw;
}
cifFile.validate(); cifFile.validate();
} }
...@@ -978,7 +978,10 @@ float Remark3Parser::parse() ...@@ -978,7 +978,10 @@ float Remark3Parser::parse()
} }
if (not remarks.empty() and not iequals(remarks, "NULL")) if (not remarks.empty() and not iequals(remarks, "NULL"))
mDb["refine"].front()["details"] = remarks; {
if (not mDb["refine"].empty())
mDb["refine"].front()["details"] = remarks;
}
float score = float(lineCount - dropped) / lineCount; float score = float(lineCount - dropped) / lineCount;
......
/*
Created by: Maarten L. Hekkelman
Date: dinsdag 07 november, 2017
Copyright 2017 NKI AVL
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "cif++/Config.h"
#include <termios.h>
#include <sys/ioctl.h>
#include <iostream>
#include <iomanip>
#include <boost/program_options.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/filesystem/path.hpp>
#include "cif++/CifUtils.h"
#include "cif++/Structure.h"
#include "cif++/TlsParser.h"
using namespace std;
namespace po = boost::program_options;
namespace ba = boost::algorithm;
namespace fs = boost::filesystem;
namespace c = libcif;
namespace cif
{
static const char* kRedOn = "\033[37;1;41m";
static const char* kRedOff = "\033[0m";
const int
kResidueNrWildcard = numeric_limits<int>::min(),
kNoSeqNum = numeric_limits<int>::max() - 1;
using namespace std;
// --------------------------------------------------------------------
// We parse selection statements and create a selection expression tree
// which is then interpreted by setting the selected flag for the
// residues. After that, the selected ranges are collected and printed.
struct TLSResidue
{
string chainID;
int seqNr;
char iCode;
string name;
bool selected;
string asymID;
int seqID;
bool operator==(const TLSResidue& rhs) const
{
return chainID == rhs.chainID and
seqNr == rhs.seqNr and
iCode == rhs.iCode and
iequals(name, rhs.name) and
selected == rhs.selected;
}
};
void DumpSelection(const vector<TLSResidue>& selected, int indentLevel)
{
string indent(indentLevel * 2, ' ');
auto i = selected.begin();
bool first = true;
// First print in PDB space
while (i != selected.end())
{
auto b = find_if(i, selected.end(), [](auto s) -> bool { return s.selected; });
if (b == selected.end())
break;
if (first)
cout << indent << "PDB:" << endl;
first = false;
auto e = find_if(b, selected.end(), [b](auto s) -> bool { return s.chainID != b->chainID or not s.selected; });
cout << indent << " >> " << b->chainID << ' ' << b->seqNr << ':' << (e - 1)->seqNr << endl;
i = e;
}
// Then in mmCIF space
if (not first)
cout << indent << "mmCIF:" << endl;
i = selected.begin();
while (i != selected.end())
{
auto b = find_if(i, selected.end(), [](auto s) -> bool { return s.selected; });
if (b == selected.end())
break;
auto e = find_if(b, selected.end(), [b](auto s) -> bool { return s.asymID != b->asymID or not s.selected; });
string asymID = b->asymID;
int from = b->seqID, to = from;
for (auto j = b + 1; j != e; ++j)
{
if (j->seqID == to + 1)
to = j->seqID;
else if (j->seqID != to) // probably an insertion code
{
if (from == kNoSeqNum or to == kNoSeqNum)
cout << indent << " >> " << asymID << endl;
else
cout << indent << " >> " << asymID << ' ' << from << ':' << to << endl;
asymID = b->asymID;
from = to = b->seqID;
}
}
if (from == kNoSeqNum or to == kNoSeqNum)
cout << indent << " >> " << asymID << endl;
else
cout << indent << " >> " << asymID << ' ' << from << ':' << to << endl;
i = e;
}
if (first)
{
if (isatty(STDOUT_FILENO))
cout << indent << kRedOn << "Empty selection" << kRedOff << endl;
else
cout << indent << kRedOn << "Empty selection" << kRedOff << endl;
}
}
vector<tuple<string,int,int>> TLSSelection::GetRanges(Datablock& db, bool pdbNamespace) const
{
vector<TLSResidue> selected;
// Collect the residues from poly seq scheme...
for (auto r: db["pdbx_poly_seq_scheme"])
{
string chain, seqNr, iCode, name;
string asymID;
int seqID;
if (pdbNamespace)
cif::tie(chain, seqNr, iCode, name, asymID, seqID) = r.get("pdb_strand_id", "pdb_seq_num", "pdb_ins_code", "pdb_comp_id", "asym_id", "seq_id");
else
{
cif::tie(chain, seqNr, name) = r.get("asym_id", "seq_id", "mon_id");
asymID = chain;
seqID = stoi(seqNr);
}
if (seqNr.empty())
continue;
if (iCode.length() > 1)
throw runtime_error("invalid iCode");
selected.push_back({chain, stoi(seqNr), iCode[0], name, false, asymID, seqID});
}
// ... those from the nonpoly scheme
for (auto r: db["pdbx_nonpoly_scheme"])
{
string chain, seqNr, iCode, name, asymID;
if (pdbNamespace)
{
cif::tie(chain, seqNr, iCode, name, asymID) = r.get("pdb_strand_id", "pdb_seq_num", "pdb_ins_code", "pdb_mon_id", "asym_id");
if (seqNr.empty())
continue;
}
else
{
cif::tie(chain, name) = r.get("asym_id", "mon_id");
asymID = chain;
seqNr = "0";
}
if (iequals(name, "HOH") or iequals(name, "H2O"))
continue;
if (iCode.length() > 1)
throw runtime_error("invalid iCode");
selected.push_back({chain, stoi(seqNr), iCode[0], name, false, asymID, kNoSeqNum});
}
// selected might consist of multiple ranges
// output per chain
stable_sort(selected.begin(), selected.end(), [](auto& a, auto& b) -> bool
{
int d = a.chainID.compare(b.chainID);
if (d == 0)
d = a.seqNr - b.seqNr;
return d < 0;
});
CollectResidues(db, selected);
vector<tuple<string,int,int>> result;
auto i = selected.begin();
while (i != selected.end())
{
auto b = find_if(i, selected.end(), [](auto s) -> bool { return s.selected; });
if (b == selected.end())
break;
auto e = find_if(b, selected.end(), [b](auto s) -> bool { return s.asymID != b->asymID or not s.selected; });
// return ranges with strict increasing sequence numbers.
// So when there's a gap in the sequence we split the range.
// Beware of iCodes though
result.push_back(make_tuple(b->asymID, b->seqID, b->seqID));
for (auto j = b + 1; j != e; ++j)
{
if (j->seqID == get<2>(result.back()) + 1)
get<2>(result.back()) = j->seqID;
else if (j->seqID != get<2>(result.back())) // probably an insertion code
result.push_back(make_tuple(b->asymID, j->seqID, j->seqID));
}
i = e;
}
return result;
}
struct TLSSelectionNot : public TLSSelection
{
TLSSelectionNot(TLSSelectionPtr selection)
: selection(selection.release()) {}
virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
{
selection->CollectResidues(db, residues, indentLevel + 1);
for (auto& r: residues)
r.selected = not r.selected;
if (VERBOSE)
{
cout << string(indentLevel * 2, ' ') << "NOT" << endl;
DumpSelection(residues, indentLevel);
}
}
TLSSelectionPtr selection;
};
struct TLSSelectionAll : public TLSSelection
{
TLSSelectionAll() {}
virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
{
for (auto& r: residues)
r.selected = true;
if (VERBOSE)
{
cout << string(indentLevel * 2, ' ') << "ALL" << endl;
DumpSelection(residues, indentLevel);
}
}
};
struct TLSSelectionChain : public TLSSelectionAll
{
TLSSelectionChain(const string& chainID)
: m_chain(chainID) {}
virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
{
bool allChains = m_chain == "*";
for (auto& r: residues)
r.selected = allChains or r.chainID == m_chain;
if (VERBOSE)
{
cout << string(indentLevel * 2, ' ') << "CHAIN " << m_chain << endl;
DumpSelection(residues, indentLevel);
}
}
string m_chain;
};
struct TLSSelectionResID : public TLSSelectionAll
{
TLSSelectionResID(int seqNr, char iCode)
: m_seq_nr(seqNr), m_icode(iCode) {}
virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
{
for (auto& r: residues)
r.selected = r.seqNr == m_seq_nr and r.iCode == m_icode;
if (VERBOSE)
{
cout << string(indentLevel * 2, ' ') << "ResID " << m_seq_nr << (m_icode ? string { m_icode} : "") << endl;
DumpSelection(residues, indentLevel);
}
}
int m_seq_nr;
char m_icode;
};
struct TLSSelectionRangeSeq : public TLSSelectionAll
{
TLSSelectionRangeSeq(int first, int last)
: m_first(first), m_last(last) {}
virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
{
for (auto& r: residues)
{
r.selected = ((r.seqNr >= m_first or m_first == kResidueNrWildcard) and
(r.seqNr <= m_last or m_last == kResidueNrWildcard));
}
if (VERBOSE)
{
cout << string(indentLevel * 2, ' ') << "Range " << m_first << ':' << m_last << endl;
DumpSelection(residues, indentLevel);
}
}
int m_first, m_last;
};
struct TLSSelectionRangeID : public TLSSelectionAll
{
TLSSelectionRangeID(int first, int last, char icodeFirst = 0, char icodeLast = 0)
: m_first(first), m_last(last), m_icode_first(icodeFirst), m_icode_last(icodeLast) {}
virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
{
// need to do this per chain
set<string> chains;
for (auto& r: residues)
chains.insert(r.chainID);
for (string chain: chains)
{
auto f = find_if(residues.begin(), residues.end(),
[=](auto r) -> bool {
return r.chainID == chain and r.seqNr == m_first and r.iCode == m_icode_first;
});
auto l = find_if(residues.begin(), residues.end(),
[=](auto r) -> bool {
return r.chainID == chain and r.seqNr == m_last and r.iCode == m_icode_last;
});
if (f != residues.end() and l != residues.end() and f <= l)
{
++l;
for (; f != l; ++f)
f->selected = true;
}
}
if (VERBOSE)
{
cout << string(indentLevel * 2, ' ') << "Through " << m_first << ':' << m_last << endl;
DumpSelection(residues, indentLevel);
}
}
int m_first, m_last;
char m_icode_first, m_icode_last;
};
struct TLSSelectionUnion : public TLSSelection
{
TLSSelectionUnion(TLSSelectionPtr& lhs, TLSSelectionPtr& rhs)
: lhs(lhs.release()), rhs(rhs.release()) {}
TLSSelectionUnion(TLSSelectionPtr& lhs, TLSSelectionPtr&& rhs)
: lhs(lhs.release()), rhs(rhs.release()) {}
virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
{
auto a = residues;
for_each(a.begin(), a.end(), [](auto& r) { r.selected = false; });
auto b = residues;
for_each(b.begin(), b.end(), [](auto& r) { r.selected = false; });
lhs->CollectResidues(db, a, indentLevel + 1);
rhs->CollectResidues(db, b, indentLevel + 1);
for (auto ai = a.begin(), bi = b.begin(), ri = residues.begin(); ri != residues.end(); ++ai, ++bi, ++ri)
ri->selected = ai->selected or bi->selected;
if (VERBOSE)
{
cout << string(indentLevel * 2, ' ') << "Union" << endl;
DumpSelection(residues, indentLevel);
}
}
TLSSelectionPtr lhs;
TLSSelectionPtr rhs;
};
struct TLSSelectionIntersection : public TLSSelection
{
TLSSelectionIntersection(TLSSelectionPtr& lhs, TLSSelectionPtr& rhs)
: lhs(lhs.release()), rhs(rhs.release()) {}
TLSSelectionIntersection(TLSSelectionPtr& lhs, TLSSelectionPtr&& rhs)
: lhs(lhs.release()), rhs(rhs.release()) {}
virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
{
auto a = residues;
for_each(a.begin(), a.end(), [](auto& r) { r.selected = false; });
auto b = residues;
for_each(b.begin(), b.end(), [](auto& r) { r.selected = false; });
lhs->CollectResidues(db, a, indentLevel + 1);
rhs->CollectResidues(db, b, indentLevel + 1);
for (auto ai = a.begin(), bi = b.begin(), ri = residues.begin(); ri != residues.end(); ++ai, ++bi, ++ri)
ri->selected = ai->selected and bi->selected;
if (VERBOSE)
{
cout << string(indentLevel * 2, ' ') << "Intersection" << endl;
DumpSelection(residues, indentLevel);
}
}
TLSSelectionPtr lhs;
TLSSelectionPtr rhs;
};
struct TLSSelectionByName : public TLSSelectionAll
{
public:
TLSSelectionByName(const string& resname)
: m_name(resname) {}
virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
{
for (auto& r: residues)
r.selected = r.name == m_name;
if (VERBOSE)
{
cout << string(indentLevel * 2, ' ') << "Name " << m_name << endl;
DumpSelection(residues, indentLevel);
}
}
string m_name;
};
struct TLSSelectionByElement : public TLSSelectionAll
{
public:
TLSSelectionByElement(const string& element)
: m_element(element) {}
virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
{
// rationale... We want to select residues only. So we select
// residues that have just a single atom of type m_element.
// And we assume these have as residue name... m_element.
// ... Right?
for (auto& r: residues)
r.selected = iequals(r.name, m_element);
if (VERBOSE)
{
cout << string(indentLevel * 2, ' ') << "Element " << m_element << endl;
DumpSelection(residues, indentLevel);
}
}
string m_element;
};
// --------------------------------------------------------------------
class TLSSelectionParserImpl
{
public:
TLSSelectionParserImpl(const string& selection)
: m_selection(selection), m_p(m_selection.begin()), m_end(m_selection.end()) {}
virtual TLSSelectionPtr Parse() = 0;
protected:
virtual int GetNextToken() = 0;
virtual void Match(int token);
virtual string ToString(int token) = 0;
string m_selection;
string::iterator m_p, m_end;
int m_lookahead;
string m_token;
};
void TLSSelectionParserImpl::Match(int token)
{
if (m_lookahead == token)
m_lookahead = GetNextToken();
else
{
string expected;
if (token >= 256)
expected = ToString(token);
else
expected = { char(token) };
string found;
if (m_lookahead >= 256)
found = ToString(m_lookahead) + " (" + m_token + ')';
else
found = { char(m_lookahead) };
throw runtime_error("Expected " + expected + " but found " + found);
}
}
// --------------------------------------------------------------------
class TLSSelectionParserImplPhenix : public TLSSelectionParserImpl
{
public:
TLSSelectionParserImplPhenix(const string& selection)
: TLSSelectionParserImpl(selection)
{
m_lookahead = GetNextToken();
}
virtual TLSSelectionPtr Parse();
private:
TLSSelectionPtr ParseAtomSelection();
TLSSelectionPtr ParseTerm();
TLSSelectionPtr ParseFactor();
enum TOKEN {
pt_NONE = 0,
pt_IDENT = 256,
pt_STRING,
pt_NUMBER,
pt_RESID,
pt_EOLN,
pt_KW_ALL,
pt_KW_CHAIN,
pt_KW_RESSEQ,
pt_KW_RESID,
pt_KW_ICODE,
pt_KW_RESNAME,
pt_KW_ELEMENT,
pt_KW_AND,
pt_KW_OR,
pt_KW_NOT,
pt_KW_PDB,
pt_KW_ENTRY,
pt_KW_THROUGH
};
virtual int GetNextToken();
virtual string ToString(int token);
int m_value_i;
string m_value_s;
char m_icode;
};
int TLSSelectionParserImplPhenix::GetNextToken()
{
int result = pt_NONE;
enum STATE {
st_START,
st_RESID = 200,
st_NUM = 300,
st_IDENT = 400,
st_QUOTED = 500,
st_DQUOTED = 550,
st_OTHER = 600
};
int state = st_START;
m_value_i = 0;
m_icode = 0;
m_value_s.clear();
auto s = m_p;
auto start = state;
m_token.clear();
auto restart = [&]()
{
switch (start)
{
case st_START: state = start = st_RESID; break;
case st_RESID: state = start = st_NUM; break;
case st_NUM: state = start = st_IDENT; break;
case st_IDENT: state = start = st_QUOTED; break;
case st_QUOTED: state = start = st_DQUOTED; break;
case st_DQUOTED:state = start = st_OTHER; break;
}
m_token.clear();
m_p = s;
};
auto retract = [&]()
{
--m_p;
m_token.pop_back();
};
while (result == pt_NONE)
{
char ch = *m_p++;
if (m_p > m_end)
ch = 0;
else
m_token += ch;
switch (state)
{
// start block
case st_START:
if (ch == 0)
result = pt_EOLN;
else if (isspace(ch))
{
m_token.clear();
++s;
}
else
restart();
break;
// RESID block
case st_RESID:
if (ch == '-')
state = st_RESID + 1;
else if (isdigit(ch))
{
m_value_i = (ch - '0');
state = st_RESID + 2;
}
else
restart();
break;
case st_RESID + 1:
if (isdigit(ch))
{
m_value_i = -(ch - '0');
state = st_RESID + 2;
}
else
restart();
break;
case st_RESID + 2:
if (isdigit(ch))
m_value_i = 10 * m_value_i + (m_value_i < 0 ? -1 : 1) * (ch - '0');
else if (isalpha(ch))
{
m_icode = ch;
state = st_RESID + 3;
}
else
restart();
break;
case st_RESID + 3:
if (isalnum(ch))
restart();
else
{
retract();
result = pt_RESID;
}
break;
// NUM block
case st_NUM:
if (ch == '-')
state = st_NUM + 1;
else if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM + 2;
}
else
restart();
break;
case st_NUM + 1:
if (isdigit(ch))
{
m_value_i = -(ch - '0');
state = st_NUM + 2;
}
else
restart();
break;
case st_NUM + 2:
if (isdigit(ch))
m_value_i = 10 * m_value_i + (m_value_i < 0 ? -1 : 1) * (ch - '0');
else if (not isalpha(ch))
{
result = pt_NUMBER;
retract();
}
else
restart();
break;
// IDENT block
case st_IDENT:
if (isalnum(ch))
{
m_value_s = { ch };
state = st_IDENT + 1;
}
else
restart();
break;
case st_IDENT + 1:
if (isalnum(ch) or ch == '\'')
m_value_s += ch;
else
{
--m_p;
result = pt_IDENT;
}
break;
// QUOTED block
case st_QUOTED:
if (ch == '\'')
{
m_value_s.clear();
state = st_QUOTED + 1;
}
else
restart();
break;
case st_QUOTED + 1:
if (ch == '\'')
result = pt_STRING;
else if (ch == 0)
throw runtime_error("Unexpected end of selection, missing quote character?");
else
m_value_s += ch;
break;
// QUOTED block
case st_DQUOTED:
if (ch == '\"')
{
m_value_s.clear();
state = st_DQUOTED + 1;
}
else
restart();
break;
case st_DQUOTED + 1:
if (ch == '\"')
result = pt_STRING;
else if (ch == 0)
throw runtime_error("Unexpected end of selection, missing quote character?");
else
m_value_s += ch;
break;
// OTHER block
case st_OTHER:
result = ch;
break;
}
}
if (result == pt_IDENT)
{
if (iequals(m_value_s, "CHAIN"))
result = pt_KW_CHAIN;
else if (iequals(m_value_s, "ALL"))
result = pt_KW_ALL;
else if (iequals(m_value_s, "AND"))
result = pt_KW_AND;
else if (iequals(m_value_s, "OR"))
result = pt_KW_OR;
else if (iequals(m_value_s, "NOT"))
result = pt_KW_NOT;
else if (iequals(m_value_s, "RESSEQ"))
result = pt_KW_RESSEQ;
else if (iequals(m_value_s, "RESID") or iequals(m_value_s, "RESI"))
result = pt_KW_RESID;
else if (iequals(m_value_s, "RESNAME"))
result = pt_KW_RESNAME;
else if (iequals(m_value_s, "ELEMENT"))
result = pt_KW_ELEMENT;
else if (iequals(m_value_s, "PDB"))
result = pt_KW_PDB;
else if (iequals(m_value_s, "ENTRY"))
result = pt_KW_ENTRY;
else if (iequals(m_value_s, "THROUGH"))
result = pt_KW_THROUGH;
}
return result;
}
string TLSSelectionParserImplPhenix::ToString(int token)
{
switch (token)
{
case pt_IDENT: return "identifier";
case pt_STRING: return "string";
case pt_NUMBER: return "number";
case pt_RESID: return "resid";
case pt_EOLN: return "end of line";
case pt_KW_ALL: return "ALL";
case pt_KW_CHAIN: return "CHAIN";
case pt_KW_RESSEQ: return "RESSEQ";
case pt_KW_RESID: return "RESID";
case pt_KW_RESNAME: return "RESNAME";
case pt_KW_ELEMENT: return "ELEMENT";
case pt_KW_AND: return "AND";
case pt_KW_OR: return "OR";
case pt_KW_NOT: return "NOT";
case pt_KW_PDB: return "PDB";
case pt_KW_ENTRY: return "ENTRY";
case pt_KW_THROUGH: return "THROUGH";
default: return "character";
}
}
TLSSelectionPtr TLSSelectionParserImplPhenix::Parse()
{
if (m_lookahead == pt_KW_PDB)
{
Match(pt_KW_PDB);
// Match(pt_KW_ENTRY);
throw runtime_error("Unimplemented PDB ENTRY specification");
}
TLSSelectionPtr result = ParseAtomSelection();
bool extraParenthesis = false;
if (m_lookahead == ')')
{
extraParenthesis = true;
m_lookahead = GetNextToken();
}
Match(pt_EOLN);
if (extraParenthesis)
cerr << "WARNING: too many closing parenthesis in TLS selection statement" << endl;
return result;
}
TLSSelectionPtr TLSSelectionParserImplPhenix::ParseAtomSelection()
{
TLSSelectionPtr result = ParseTerm();
while (m_lookahead == pt_KW_OR)
{
Match(pt_KW_OR);
result.reset(new TLSSelectionUnion(result, ParseTerm()));
}
return result;
}
TLSSelectionPtr TLSSelectionParserImplPhenix::ParseTerm()
{
TLSSelectionPtr result = ParseFactor();
while (m_lookahead == pt_KW_AND)
{
Match(pt_KW_AND);
result.reset(new TLSSelectionIntersection(result, ParseFactor()));
}
return result;
}
TLSSelectionPtr TLSSelectionParserImplPhenix::ParseFactor()
{
TLSSelectionPtr result;
switch (m_lookahead)
{
case '(':
Match('(');
result = ParseAtomSelection();
if (m_lookahead == pt_EOLN)
cerr << "WARNING: missing closing parenthesis in TLS selection statement" << endl;
else
Match(')');
break;
case pt_KW_NOT:
Match(pt_KW_NOT);
result.reset(new TLSSelectionNot(ParseAtomSelection()));
break;
case pt_KW_CHAIN:
{
Match(pt_KW_CHAIN);
string chainID = m_value_s;
if (m_lookahead == pt_NUMBER) // sigh
{
chainID = to_string(m_value_i);
Match(pt_NUMBER);
}
else
Match(m_lookahead == pt_STRING ? pt_STRING : pt_IDENT);
result.reset(new TLSSelectionChain(chainID));
break;
}
case pt_KW_RESNAME:
{
Match(pt_KW_RESNAME);
string name = m_value_s;
Match(pt_IDENT);
result.reset(new TLSSelectionByName(name));
break;
}
case pt_KW_ELEMENT:
{
Match(pt_KW_ELEMENT);
string element = m_value_s;
Match(pt_IDENT);
result.reset(new TLSSelectionByElement(element));
break;
}
case pt_KW_RESSEQ:
{
Match(pt_KW_RESSEQ);
int from = m_value_i;
Match(pt_NUMBER);
int to = from;
if (m_lookahead == ':')
{
Match(':');
to = m_value_i;
Match(pt_NUMBER);
}
result.reset(new TLSSelectionRangeSeq(from, to));
break;
}
case pt_KW_RESID:
{
Match(pt_KW_RESID);
int from, to;
char icode_from = 0, icode_to = 0;
bool through = false;
from = to = m_value_i;
if (m_lookahead == pt_NUMBER)
Match(pt_NUMBER);
else
{
icode_from = m_icode;
Match(pt_RESID);
}
if (m_lookahead == ':' or m_lookahead == pt_KW_THROUGH or m_lookahead == '-')
{
through = m_lookahead == pt_KW_THROUGH;
Match(m_lookahead);
to = m_value_i;
if (m_lookahead == pt_NUMBER)
Match(pt_NUMBER);
else
{
icode_to = m_icode;
Match(pt_RESID);
}
if (through)
result.reset(new TLSSelectionRangeID(from, to, icode_from, icode_to));
else
{
if (VERBOSE and (icode_from or icode_to))
cerr << "Warning, ignoring insertion codes" << endl;
result.reset(new TLSSelectionRangeSeq(from, to));
}
}
else
result.reset(new TLSSelectionResID(from, icode_from));
break;
}
case pt_KW_ALL:
Match(pt_KW_ALL);
result.reset(new TLSSelectionAll());
break;
default:
throw runtime_error("Unexpected token " + ToString(m_lookahead) + " (" + m_token + ')');
}
return result;
}
// --------------------------------------------------------------------
class TLSSelectionParserImplBuster : public TLSSelectionParserImpl
{
public:
TLSSelectionParserImplBuster(const string& selection);
virtual TLSSelectionPtr Parse();
protected:
enum TOKEN {
bt_NONE = 0,
bt_IDENT = 256,
bt_NUMBER,
bt_EOLN,
};
virtual int GetNextToken();
virtual string ToString(int token);
TLSSelectionPtr ParseGroup();
tuple<string,int> ParseAtom();
TLSSelectionPtr ParseOldGroup();
int m_value_i;
string m_value_s;
bool m_parsing_old_style = false;
};
TLSSelectionParserImplBuster::TLSSelectionParserImplBuster(const string& selection)
: TLSSelectionParserImpl(selection)
{
m_lookahead = GetNextToken();
}
int TLSSelectionParserImplBuster::GetNextToken()
{
int result = bt_NONE;
enum STATE { st_START, st_NEGATE, st_NUM, st_IDENT } state = st_START;
m_value_i = 0;
m_value_s.clear();
bool negative = false;
while (result == bt_NONE)
{
char ch = *m_p++;
if (m_p > m_end)
ch = 0;
switch (state)
{
case st_START:
if (ch == 0)
result = bt_EOLN;
else if (isspace(ch))
continue;
else if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM;
}
else if (isalpha(ch))
{
m_value_s = { ch };
state = st_IDENT;
}
else if (ch == '-')
{
state = st_NEGATE;
}
else
result = ch;
break;
case st_NEGATE:
if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM;
negative = true;
}
else
{
--m_p;
result = '-';
}
break;
case st_NUM:
if (isdigit(ch))
m_value_i = 10 * m_value_i + (ch - '0');
else
{
if (negative)
m_value_i = -m_value_i;
result = bt_NUMBER;
--m_p;
}
break;
case st_IDENT:
if (isalnum(ch))
m_value_s += ch;
else
{
--m_p;
result = bt_IDENT;
}
break;
}
}
return result;
}
string TLSSelectionParserImplBuster::ToString(int token)
{
switch (token)
{
case bt_IDENT: return "identifier (" + m_value_s + ')';
case bt_NUMBER: return "number (" + to_string(m_value_i) + ')';
case bt_EOLN: return "end of line";
default:
assert(false);
return "unknown token";
}
}
TLSSelectionPtr TLSSelectionParserImplBuster::ParseGroup()
{
TLSSelectionPtr result;
auto add = [&result](const string& chainID, int from, int to)
{
TLSSelectionPtr sc(new TLSSelectionChain(chainID));
TLSSelectionPtr sr(new TLSSelectionRangeSeq(from, to));
TLSSelectionPtr s(new TLSSelectionIntersection(sc, sr));
if (result == nullptr)
result.reset(s.release());
else
result.reset(new TLSSelectionUnion{result, s });
};
Match('{');
do
{
string chain1;
int seqNr1;
std::tie(chain1, seqNr1) = ParseAtom();
if (m_lookahead == '-')
{
string chain2;
int seqNr2 = seqNr1;
Match('-');
if (m_lookahead == bt_NUMBER)
{
seqNr2 = m_value_i;
Match(bt_NUMBER);
}
else
{
std::tie(chain2, seqNr2) = ParseAtom();
if (chain1 != chain2)
{
cerr << "Warning, ranges over multiple chains detected" << endl;
TLSSelectionPtr sc1(new TLSSelectionChain(chain1));
TLSSelectionPtr sr1(new TLSSelectionRangeSeq(seqNr1, kResidueNrWildcard));
TLSSelectionPtr s1(new TLSSelectionIntersection(sc1, sr1));
TLSSelectionPtr sc2(new TLSSelectionChain(chain2));
TLSSelectionPtr sr2(new TLSSelectionRangeSeq(kResidueNrWildcard, seqNr2));
TLSSelectionPtr s2(new TLSSelectionIntersection(sc2, sr2));
TLSSelectionPtr s(new TLSSelectionUnion(s1, s2));
if (result == nullptr)
result.reset(s.release());
else
result.reset(new TLSSelectionUnion{result, s });
chain1.clear();
}
}
if (not chain1.empty())
add(chain1, seqNr1, seqNr2);
}
else
add(chain1, seqNr1, seqNr1);
}
while (m_lookahead != '}');
Match('}');
return result;
}
tuple<string,int> TLSSelectionParserImplBuster::ParseAtom()
{
string chain = m_value_s;
int seqNr = kResidueNrWildcard;
if (m_lookahead == '*')
Match('*');
else
Match(bt_IDENT);
Match('|');
if (m_lookahead == '*')
Match('*');
else
{
seqNr = m_value_i;
Match(bt_NUMBER);
if (m_lookahead == ':')
{
Match(':');
string atom = m_value_s;
if (VERBOSE)
cerr << "Warning: ignoring atom ID '" << atom << "' in TLS selection" << endl;
Match(bt_IDENT);
}
}
return make_tuple(chain, seqNr);
}
TLSSelectionPtr TLSSelectionParserImplBuster::Parse()
{
TLSSelectionPtr result = ParseGroup();
Match(bt_EOLN);
return result;
}
// --------------------------------------------------------------------
class TLSSelectionParserImplBusterOld : public TLSSelectionParserImpl
{
public:
TLSSelectionParserImplBusterOld(const string& selection)
: TLSSelectionParserImpl(selection)
{
m_lookahead = GetNextToken();
}
virtual TLSSelectionPtr Parse();
private:
TLSSelectionPtr ParseAtomSelection();
TLSSelectionPtr ParseTerm();
TLSSelectionPtr ParseFactor();
TLSSelectionPtr ParseResid();
TLSSelectionPtr ParseChainResid();
enum TOKEN {
pt_NONE = 0,
pt_IDENT = 256,
pt_CHAINRESID,
pt_STRING,
pt_NUMBER,
pt_RANGE,
pt_EOLN,
pt_KW_ALL,
pt_KW_CHAIN,
pt_KW_RESSEQ,
pt_KW_RESID,
pt_KW_RESNAME,
pt_KW_ELEMENT,
pt_KW_AND,
pt_KW_OR,
pt_KW_NOT,
pt_KW_PDB,
pt_KW_ENTRY,
pt_KW_THROUGH
};
virtual int GetNextToken();
virtual string ToString(int token);
int m_value_i;
string m_value_s;
int m_value_r[2];
};
int TLSSelectionParserImplBusterOld::GetNextToken()
{
int result = pt_NONE;
enum STATE { st_START, st_NEGATE, st_NUM, st_RANGE, st_IDENT_1, st_IDENT, st_CHAINRESID, st_QUOTED_1, st_QUOTED_2 } state = st_START;
m_value_i = 0;
m_value_s.clear();
bool negative = false;
while (result == pt_NONE)
{
char ch = *m_p++;
if (m_p > m_end)
ch = 0;
switch (state)
{
case st_START:
if (ch == 0)
result = pt_EOLN;
else if (isspace(ch))
continue;
else if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM;
}
else if (isalpha(ch))
{
m_value_s = { ch };
state = st_IDENT_1;
}
else if (ch == '-')
{
state = st_NEGATE;
}
else if (ch == '\'')
{
state = st_QUOTED_1;
}
else
result = ch;
break;
case st_NEGATE:
if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM;
negative = true;
}
else
{
--m_p;
result = '-';
}
break;
case st_NUM:
if (isdigit(ch))
m_value_i = 10 * m_value_i + (ch - '0');
else if (ch == '-' or ch == ':')
{
if (negative)
m_value_i = -m_value_i;
m_value_r[0] = m_value_i;
m_value_r[1] = 0;
state = st_RANGE;
}
else
{
if (negative)
m_value_i = -m_value_i;
result = pt_NUMBER;
--m_p;
}
break;
case st_RANGE: // TODO: question, is "-2--1" a valid range? We do not support that, yet
if (isdigit(ch))
m_value_r[1] = 10 * m_value_r[1] + (ch - '0');
else if (m_value_r[1] != 0)
{
result = pt_RANGE;
--m_p;
}
else
{
--m_p;
--m_p;
result = pt_NUMBER;
}
break;
case st_IDENT_1:
if (isalpha(ch))
{
m_value_s += ch;
state = st_IDENT;
}
else if (isdigit(ch))
{
m_value_i = (ch - '0');
state = st_CHAINRESID;
}
else
{
--m_p;
result = pt_IDENT;
}
break;
case st_CHAINRESID:
if (isalpha(ch))
{
m_value_s += to_string(m_value_i);
m_value_s += ch;
state = st_IDENT;
}
else if (isdigit(ch))
m_value_i = 10 * m_value_i + (ch - '0');
else
{
--m_p;
result = pt_CHAINRESID;
}
break;
case st_IDENT:
if (isalnum(ch))
m_value_s += ch;
else
{
--m_p;
result = pt_IDENT;
}
break;
case st_QUOTED_1:
if (ch == '\'')
{
--m_p;
result = '\'';
}
else
{
m_value_s = { ch };
state = st_QUOTED_2;
}
break;
case st_QUOTED_2:
if (ch == '\'')
result = pt_STRING;
else if (ch == 0)
throw runtime_error("Unexpected end of selection, missing quote character?");
else
m_value_s += ch;
break;
}
}
if (result == pt_IDENT)
{
if (iequals(m_value_s, "CHAIN"))
result = pt_KW_CHAIN;
else if (iequals(m_value_s, "ALL"))
result = pt_KW_ALL;
else if (iequals(m_value_s, "AND"))
result = pt_KW_AND;
else if (iequals(m_value_s, "OR"))
result = pt_KW_OR;
else if (iequals(m_value_s, "NOT"))
result = pt_KW_NOT;
else if (iequals(m_value_s, "RESSEQ"))
result = pt_KW_RESSEQ;
else if (iequals(m_value_s, "RESID") or iequals(m_value_s, "RESI") or iequals(m_value_s, "RESIDUES"))
result = pt_KW_RESID;
else if (iequals(m_value_s, "RESNAME"))
result = pt_KW_RESNAME;
else if (iequals(m_value_s, "PDB"))
result = pt_KW_PDB;
else if (iequals(m_value_s, "ENTRY"))
result = pt_KW_ENTRY;
else if (iequals(m_value_s, "THROUGH"))
result = pt_KW_THROUGH;
}
return result;
}
string TLSSelectionParserImplBusterOld::ToString(int token)
{
switch (token)
{
case pt_IDENT: return "identifier (" + m_value_s + ')';
case pt_STRING: return "string (" + m_value_s + ')';
case pt_NUMBER: return "number (" + to_string(m_value_i) + ')';
case pt_RANGE: return "range (" + to_string(m_value_r[0]) + ':' + to_string(m_value_r[1]) + ')';
case pt_EOLN: return "end of line";
case pt_KW_ALL: return "ALL";
case pt_KW_CHAIN: return "CHAIN";
case pt_KW_RESSEQ: return "RESSEQ";
case pt_KW_RESID: return "RESID";
case pt_KW_RESNAME: return "RESNAME";
case pt_KW_ELEMENT: return "ELEMENT";
case pt_KW_AND: return "AND";
case pt_KW_OR: return "OR";
case pt_KW_NOT: return "NOT";
case pt_KW_PDB: return "PDB";
case pt_KW_ENTRY: return "ENTRY";
case pt_KW_THROUGH: return "THROUGH";
default:
assert(false);
return "unknown token";
}
}
TLSSelectionPtr TLSSelectionParserImplBusterOld::Parse()
{
if (m_lookahead == pt_KW_PDB)
{
Match(pt_KW_PDB);
// Match(pt_KW_ENTRY);
throw runtime_error("Unimplemented PDB ENTRY specification");
}
TLSSelectionPtr result = ParseAtomSelection();
Match(pt_EOLN);
return result;
}
TLSSelectionPtr TLSSelectionParserImplBusterOld::ParseAtomSelection()
{
TLSSelectionPtr result = ParseTerm();
while (m_lookahead == pt_KW_OR)
{
Match(pt_KW_OR);
result.reset(new TLSSelectionUnion(result, ParseTerm()));
}
return result;
}
TLSSelectionPtr TLSSelectionParserImplBusterOld::ParseTerm()
{
TLSSelectionPtr result = ParseFactor();
while (m_lookahead == pt_KW_AND)
{
Match(pt_KW_AND);
result.reset(new TLSSelectionIntersection(result, ParseFactor()));
}
return result;
}
TLSSelectionPtr TLSSelectionParserImplBusterOld::ParseFactor()
{
TLSSelectionPtr result;
switch (m_lookahead)
{
case '(':
Match('(');
result = ParseAtomSelection();
Match(')');
break;
case pt_KW_NOT:
Match(pt_KW_NOT);
result.reset(new TLSSelectionNot(ParseAtomSelection()));
break;
case pt_KW_CHAIN:
{
Match(pt_KW_CHAIN);
string chainID = m_value_s;
if (m_lookahead == pt_NUMBER) // sigh
{
chainID = to_string(m_value_i);
Match(pt_NUMBER);
}
else
Match(m_lookahead == pt_STRING ? pt_STRING : pt_IDENT);
result.reset(new TLSSelectionChain(chainID));
break;
}
case pt_KW_RESNAME:
{
Match(pt_KW_RESNAME);
string name = m_value_s;
Match(pt_IDENT);
result.reset(new TLSSelectionByName(name));
break;
}
case pt_KW_RESSEQ:
Match(pt_KW_RESSEQ);
result = ParseResid();
break;
case pt_KW_RESID:
Match(pt_KW_RESID);
result = ParseResid();
break;
case pt_KW_ALL:
Match(pt_KW_ALL);
result.reset(new TLSSelectionAll());
break;
case pt_CHAINRESID:
result = ParseChainResid();
break;
default:
throw runtime_error("Unexpected token " + ToString(m_lookahead));
}
return result;
}
TLSSelectionPtr TLSSelectionParserImplBusterOld::ParseResid()
{
TLSSelectionPtr result;
for (;;)
{
int from, to;
if (m_lookahead == pt_RANGE)
{
from = m_value_r[0];
to = m_value_r[1];
Match(pt_RANGE);
}
else
{
from = m_value_i;
Match(pt_NUMBER);
to = from;
if (m_lookahead == ':' or m_lookahead == '-' or m_lookahead == pt_KW_THROUGH)
{
Match(m_lookahead);
to = m_value_i;
Match(pt_NUMBER);
}
}
TLSSelectionPtr range(new TLSSelectionRangeSeq(from, to));
if (result)
result.reset(new TLSSelectionUnion(result, range));
else
result.reset(range.release());
if (m_lookahead == ',')
{
Match(',');
continue;
}
break;
}
return result;
}
TLSSelectionPtr TLSSelectionParserImplBusterOld::ParseChainResid()
{
TLSSelectionPtr result;
for (;;)
{
int from, to;
from = to = m_value_i;
string chainID = m_value_s;
Match(pt_CHAINRESID);
if (m_lookahead == '-')
{
Match(m_lookahead);
to = m_value_i;
if (m_value_s != chainID)
throw runtime_error("Cannot have two different chainIDs in a range selection");
Match(pt_CHAINRESID);
}
TLSSelectionPtr sc(new TLSSelectionChain(chainID));
TLSSelectionPtr sr(new TLSSelectionRangeSeq(from, to));
TLSSelectionPtr range(new TLSSelectionIntersection(sc, sr));
if (result)
result.reset(new TLSSelectionUnion(result, range));
else
result.reset(range.release());
if (m_lookahead == ',')
{
Match(',');
continue;
}
break;
}
return result;
}
// --------------------------------------------------------------------
class TLSSelectionParserBase
{
public:
virtual TLSSelectionPtr Parse(const string& selection) const = 0;
virtual ~TLSSelectionParserBase() {}
};
template<typename IMPL>
class TLSSelectionParser
{
public:
virtual TLSSelectionPtr Parse(const string& selection) const
{
TLSSelectionPtr result;
try
{
IMPL p(selection);
result = p.Parse();
}
catch (const exception& ex)
{
cerr << "ParseError: " << ex.what() << endl;
}
return result;
}
};
// --------------------------------------------------------------------
TLSSelectionPtr ParseSelectionDetails(const string& program, const string& selection)
{
TLSSelectionParser<TLSSelectionParserImplPhenix> phenix;
TLSSelectionParser<TLSSelectionParserImplBuster> buster;
TLSSelectionParser<TLSSelectionParserImplBusterOld> busterOld;
TLSSelectionPtr result;
if (ba::icontains(program, "buster"))
{
result = buster.Parse(selection);
if (not result)
{
if (VERBOSE)
cerr << "Falling back to old BUSTER" << endl;
result = busterOld.Parse(selection);
}
if (not result)
{
if (VERBOSE)
cerr << "Falling back to PHENIX" << endl;
result = phenix.Parse(selection);
}
}
else if (ba::icontains(program, "phenix"))
{
result = phenix.Parse(selection);
if (not result)
{
if (VERBOSE)
cerr << "Falling back to BUSTER" << endl;
result = buster.Parse(selection);
}
if (not result)
{
if (VERBOSE)
cerr << "Falling back to old BUSTER" << endl;
result = busterOld.Parse(selection);
}
}
else
{
if (VERBOSE)
cerr << "No known program specified, trying PHENIX" << endl;
result = phenix.Parse(selection);
if (not result)
{
if (VERBOSE)
cerr << "Falling back to BUSTER" << endl;
result = buster.Parse(selection);
}
if (not result)
{
if (VERBOSE)
cerr << "Falling back to old BUSTER" << endl;
result = busterOld.Parse(selection);
}
}
return result;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment