Commit 821c1ef9 by maarten

last fixes for pdb2cif

git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@189 a1961a4f-ab94-4bcc-80e8-33b5a54de466
parent ba0e39c0
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "cif++/Compound.h" #include "cif++/Compound.h"
#include "cif++/PeptideDB.h" #include "cif++/PeptideDB.h"
#include "cif++/PDB2CifRemark3.h" #include "cif++/PDB2CifRemark3.h"
#include "cif++/CifUtils.h"
using namespace std; using namespace std;
namespace ba = boost::algorithm; namespace ba = boost::algorithm;
...@@ -3272,8 +3273,11 @@ void PDBFileParser::ConstructEntities() ...@@ -3272,8 +3273,11 @@ void PDBFileParser::ConstructEntities()
PDBChain::AtomRes ar{ resName, resSeq, iCode }; PDBChain::AtomRes ar{ resName, resSeq, iCode };
if (chain.mResiduesSeen.empty() or chain.mResiduesSeen.back() != ar) if ((chain.mResiduesSeen.empty() or chain.mResiduesSeen.back() != ar) and
(PeptideDB::Instance().isKnownPeptide(resName) or PeptideDB::Instance().isKnownBase(resName)))
{
chain.mResiduesSeen.push_back(ar); chain.mResiduesSeen.push_back(ar);
}
// now that we're iterating atoms anyway, clean up the mUnobs array // now that we're iterating atoms anyway, clean up the mUnobs array
mUnobs.erase(remove_if(mUnobs.begin(), mUnobs.end(), [=](UNOBS& a) mUnobs.erase(remove_if(mUnobs.begin(), mUnobs.end(), [=](UNOBS& a)
...@@ -3324,27 +3328,7 @@ void PDBFileParser::ConstructEntities() ...@@ -3324,27 +3328,7 @@ void PDBFileParser::ConstructEntities()
if (chain.mTerIndex > 0) if (chain.mTerIndex > 0)
chain.mResiduesSeen.erase(chain.mResiduesSeen.begin() + chain.mTerIndex, chain.mResiduesSeen.end()); chain.mResiduesSeen.erase(chain.mResiduesSeen.begin() + chain.mTerIndex, chain.mResiduesSeen.end());
int lastResidueIndex; int lastResidueIndex = chain.AlignResToSeqRes();
try
{
lastResidueIndex = chain.AlignResToSeqRes();
}
catch (const exception& ex)
{
if (VERBOSE > 1)
cerr << "First alignment attempt failed, trying to strip seen residues of non-polymers" << endl;
chain.mResiduesSeen.erase(
remove_if(chain.mResiduesSeen.begin(), chain.mResiduesSeen.end(),
[](PDBChain::AtomRes& r)
{
return not (PeptideDB::Instance().isKnownPeptide(r.mMonId) or PeptideDB::Instance().isKnownBase(r.mMonId));
}),
chain.mResiduesSeen.end());
lastResidueIndex = chain.AlignResToSeqRes();
}
if (lastResidueIndex > 0 and lastResidueIndex + 1 < static_cast<int>(chain.mResiduesSeen.size())) if (lastResidueIndex > 0 and lastResidueIndex + 1 < static_cast<int>(chain.mResiduesSeen.size()))
{ {
...@@ -5368,6 +5352,62 @@ int PDBFileParser::PDBChain::AlignResToSeqRes() ...@@ -5368,6 +5352,62 @@ int PDBFileParser::PDBChain::AlignResToSeqRes()
// assign numbers // assign numbers
x = highX; x = highX;
y = highY; y = highY;
// C++ is getting closer to Pascal :-)
auto printAlignment = [=]()
{
cerr << string(cif::get_terminal_width(), '-') << endl
<< "Alignment for chain " << mDbref.chainID << endl
<< endl;
std::vector<pair<string,string>> alignment;
int x = highX;
int y = highY;
for (x = highX, y = highY; x >= 0 and y >= 0; )
{
switch (tb(x, y))
{
case -1:
alignment.push_back(make_pair("...", ry[y].mMonId));
--y;
break;
case 1:
alignment.push_back(make_pair(rx[x].mMonId, "..."));
--x;
break;
case 0:
alignment.push_back(make_pair(rx[x].mMonId, ry[y].mMonId));
--x;
--y;
break;
}
}
while (x >= 0)
{
alignment.push_back(make_pair(rx[x].mMonId, "..."));
--x;
}
while (y >= 0)
{
alignment.push_back(make_pair("...", ry[y].mMonId));
--y;
}
reverse(alignment.begin(), alignment.end());
for (auto a: alignment)
cerr << " " << a.first << " -- " << a.second << endl;
cerr << endl;
};
if (VERBOSE > 1)
printAlignment();
try try
{ {
...@@ -5412,48 +5452,8 @@ int PDBFileParser::PDBChain::AlignResToSeqRes() ...@@ -5412,48 +5452,8 @@ int PDBFileParser::PDBChain::AlignResToSeqRes()
} }
catch (const exception& ex) catch (const exception& ex)
{ {
if (VERBOSE) if (VERBOSE == 1)
{ printAlignment();
std::vector<pair<string,string>> alignment;
for (x = highX, y = highY; x >= 0 and y >= 0; )
{
switch (tb(x, y))
{
case -1:
alignment.push_back(make_pair("...", ry[y].mMonId));
--y;
break;
case 1:
alignment.push_back(make_pair(rx[x].mMonId, "..."));
--x;
break;
case 0:
alignment.push_back(make_pair(rx[x].mMonId, ry[y].mMonId));
--x;
--y;
break;
}
}
while (x >= 0)
{
alignment.push_back(make_pair(rx[x].mMonId, "..."));
--x;
}
while (y >= 0)
{
alignment.push_back(make_pair("...", ry[y].mMonId));
--y;
}
reverse(alignment.begin(), alignment.end());
for (auto a: alignment)
cerr << " " << a.first << " -- " << a.second << endl;
}
throw; throw;
} }
......
...@@ -118,15 +118,18 @@ struct PeptideDBImpl ...@@ -118,15 +118,18 @@ struct PeptideDBImpl
PeptideDBImpl::PeptideDBImpl(istream& data, PeptideDBImpl* next) PeptideDBImpl::PeptideDBImpl(istream& data, PeptideDBImpl* next)
: mFile(data), mChemComp(mFile.firstDatablock()["chem_comp"]), mNext(next) : mFile(data), mChemComp(mFile.firstDatablock()["chem_comp"]), mNext(next)
{ {
const std::regex peptideRx("(?:[lmp]-)?peptide", std::regex::icase);
for (auto& chemComp: mChemComp) for (auto& chemComp: mChemComp)
{ {
string group, threeLetterCode; string group, threeLetterCode;
cif::tie(group, threeLetterCode) = chemComp.get("group", "three_letter_code"); cif::tie(group, threeLetterCode) = chemComp.get("group", "three_letter_code");
if (group == "peptide" or group == "M-peptide" or group == "P-peptide") if (std::regex_match(group, peptideRx))
// if (ba::iequals(group, "peptide") or ba::iequals(group, "M-peptide") or ba::iequals(group, "P-peptide"))
mKnownPeptides.insert(threeLetterCode); mKnownPeptides.insert(threeLetterCode);
else if (group == "DNA" or group == "RNA") else if (ba::iequals(group, "DNA") or ba::iequals(group, "RNA"))
mKnownBases.insert(threeLetterCode); mKnownBases.insert(threeLetterCode);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment