Commit 77fc4080 by Maarten L. Hekkelman

dssp work

parent 599d0cb5
...@@ -75,6 +75,7 @@ LIBCIF_SRC = AtomShape.cpp \ ...@@ -75,6 +75,7 @@ LIBCIF_SRC = AtomShape.cpp \
CifValidator.cpp \ CifValidator.cpp \
Compound.cpp \ Compound.cpp \
DistanceMap.cpp \ DistanceMap.cpp \
FixDMC.cpp \
MapMaker.cpp \ MapMaker.cpp \
PDB2Cif.cpp \ PDB2Cif.cpp \
PDB2CifRemark3.cpp \ PDB2CifRemark3.cpp \
......
...@@ -17,3 +17,8 @@ ...@@ -17,3 +17,8 @@
#pragma warning (disable : 4996) #pragma warning (disable : 4996)
#pragma warning (disable : 4355) #pragma warning (disable : 4355)
#endif #endif
namespace cif
{
extern int VERBOSE;
}
\ No newline at end of file
...@@ -128,6 +128,13 @@ class Atom ...@@ -128,6 +128,13 @@ class Atom
int compare(const Atom& b) const; int compare(const Atom& b) const;
bool operator<(const Atom& rhs) const
{
return compare(rhs) < 0;
}
friend std::ostream& operator<<(std::ostream& os, const Atom& atom);
private: private:
friend class Structure; friend class Structure;
void setID(int id); void setID(int id);
...@@ -178,6 +185,9 @@ class Residue ...@@ -178,6 +185,9 @@ class Residue
const Compound& compound() const; const Compound& compound() const;
const AtomView& atoms() const; const AtomView& atoms() const;
/// \brief Unique atoms returns only the atoms without alternates and the first of each alternate atom id.
AtomView unique_atoms() const;
Atom atomByID(const std::string& atomID) const; Atom atomByID(const std::string& atomID) const;
...@@ -209,6 +219,8 @@ class Residue ...@@ -209,6 +219,8 @@ class Residue
// some routines for 3d work // some routines for 3d work
std::tuple<Point,float> centerAndRadius() const; std::tuple<Point,float> centerAndRadius() const;
friend std::ostream& operator<<(std::ostream& os, const Residue& res);
protected: protected:
Residue() {} Residue() {}
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#include <set> #include <set>
#include <cif++/Structure.h> #include <cif++/Structure.hpp>
namespace mmcif namespace mmcif
{ {
...@@ -36,22 +36,23 @@ void CreateMissingBackboneAtoms(Structure& structure, bool simplified) ...@@ -36,22 +36,23 @@ void CreateMissingBackboneAtoms(Structure& structure, bool simplified)
if (mon.isComplete() or mon.hasAlternateBackboneAtoms()) if (mon.isComplete() or mon.hasAlternateBackboneAtoms())
continue; continue;
std::set<std::string> missing; auto atomC = mon.atomByID("C");
if (not mon.hasAtomWithID("C")) missing.insert("C"); auto atomCA = mon.atomByID("CA");
if (not mon.hasAtomWithID("CA")) missing.insert("CA"); auto atomN = mon.atomByID("N");
if (not mon.hasAtomWithID("N")) missing.insert("N"); auto atomO = mon.atomByID("O");
if (not mon.hasAtomWithID("O")) missing.insert("O");
switch (missing.size()) int missing = (atomC ? 0 : 1) + (atomCA ? 0 : 1) + (atomN ? 0 : 1) + (atomO ? 0 : 1);
switch (missing)
{ {
case 1: case 1:
if (missing.count("O")) if (not atomO)
addO(mon); addO(mon);
else if (missing.count("N")) else if (not atomN)
addN(mon); addN(mon);
else if (missing.count("CA")) else if (not atomCA)
addCA(mon); addCA(mon);
else if (missing.count("C")) else if (not atomC)
addC(mon); addC(mon);
break; break;
} }
......
...@@ -153,9 +153,10 @@ const float ...@@ -153,9 +153,10 @@ const float
struct Res struct Res
{ {
Res(const Monomer& m, int nr) Res(const Monomer& m, int nr, ChainBreak brk)
: mM(m), mNumber(nr) : mM(m), mNumber(nr)
, mType(MapResidue(m.compoundID())) , mType(MapResidue(m.compoundID()))
, mChainBreak(brk)
{ {
// update the box containing all atoms // update the box containing all atoms
mBox[0].mX = mBox[0].mY = mBox[0].mZ = std::numeric_limits<double>::max(); mBox[0].mX = mBox[0].mY = mBox[0].mZ = std::numeric_limits<double>::max();
...@@ -163,7 +164,7 @@ struct Res ...@@ -163,7 +164,7 @@ struct Res
mH = mmcif::Point{ std::numeric_limits<float>::max(), std::numeric_limits<float>::max(), std::numeric_limits<float>::max() }; mH = mmcif::Point{ std::numeric_limits<float>::max(), std::numeric_limits<float>::max(), std::numeric_limits<float>::max() };
for (auto& a: mM.atoms()) for (auto& a: mM.unique_atoms())
{ {
if (a.labelAtomID() == "CA") if (a.labelAtomID() == "CA")
{ {
...@@ -185,7 +186,7 @@ struct Res ...@@ -185,7 +186,7 @@ struct Res
mO = a.location(); mO = a.location();
ExtendBox(mO, kRadiusO + 2 * kRadiusWater); ExtendBox(mO, kRadiusO + 2 * kRadiusWater);
} }
else else if (a.type() != AtomType::H)
{ {
mSideChain.push_back(a.location()); mSideChain.push_back(a.location());
ExtendBox(a.location(), kRadiusSideAtom + 2 * kRadiusWater); ExtendBox(a.location(), kRadiusSideAtom + 2 * kRadiusWater);
...@@ -329,6 +330,7 @@ struct Res ...@@ -329,6 +330,7 @@ struct Res
uint32_t mSheet = 0; uint32_t mSheet = 0;
Helix mHelixFlags[3] = { Helix::None, Helix::None, Helix::None }; // Helix mHelixFlags[3] = { Helix::None, Helix::None, Helix::None }; //
bool mBend = false; bool mBend = false;
ChainBreak mChainBreak = ChainBreak::None;
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -1020,7 +1022,6 @@ DSSPImpl::DSSPImpl(const Structure& s) ...@@ -1020,7 +1022,6 @@ DSSPImpl::DSSPImpl(const Structure& s)
size_t nRes = accumulate(mPolymers.begin(), mPolymers.end(), size_t nRes = accumulate(mPolymers.begin(), mPolymers.end(),
0.0, [](double s, auto& p) { return s + p.size(); }); 0.0, [](double s, auto& p) { return s + p.size(); });
mStats.nrOfResidues = nRes;
mStats.nrOfChains = mPolymers.size(); mStats.nrOfChains = mPolymers.size();
mResidues.reserve(nRes); mResidues.reserve(nRes);
...@@ -1028,6 +1029,8 @@ DSSPImpl::DSSPImpl(const Structure& s) ...@@ -1028,6 +1029,8 @@ DSSPImpl::DSSPImpl(const Structure& s)
for (auto& p: mPolymers) for (auto& p: mPolymers)
{ {
ChainBreak brk = ChainBreak::NewChain;
for (auto& m: p) for (auto& m: p)
{ {
if (not m.isComplete()) if (not m.isComplete())
...@@ -1038,14 +1041,22 @@ DSSPImpl::DSSPImpl(const Structure& s) ...@@ -1038,14 +1041,22 @@ DSSPImpl::DSSPImpl(const Structure& s)
if (not mResidues.empty() and if (not mResidues.empty() and
Distance(mResidues.back().mC, m.atomByID("N").location()) > kMaxPeptideBondLength) Distance(mResidues.back().mC, m.atomByID("N").location()) > kMaxPeptideBondLength)
{ {
++mStats.nrOfChains; if (mResidues.back().mM.asymID() == m.asymID())
{
++mStats.nrOfChains;
brk = ChainBreak::Gap;
}
++resNumber; ++resNumber;
} }
mResidues.emplace_back(m, resNumber); mResidues.emplace_back(m, resNumber, brk);
brk = ChainBreak::None;
} }
} }
mStats.nrOfResidues = mResidues.size();
for (size_t i = 0; i + 1 < mResidues.size(); ++i) for (size_t i = 0; i + 1 < mResidues.size(); ++i)
{ {
mResidues[i].mNext = &mResidues[i + 1]; mResidues[i].mNext = &mResidues[i + 1];
...@@ -1117,10 +1128,16 @@ DSSPImpl::DSSPImpl(const Structure& s) ...@@ -1117,10 +1128,16 @@ DSSPImpl::DSSPImpl(const Structure& s)
mStats.nrOfSSBridges = mSSBonds.size(); mStats.nrOfSSBridges = mSSBonds.size();
mStats.nrOfIntraChainSSBridges = 0; mStats.nrOfIntraChainSSBridges = 0;
int ssBondNr = 0;
for (const auto& [a, b]: mSSBonds) for (const auto& [a, b]: mSSBonds)
{ {
if (a->mM.asymID() != b->mM.asymID()) if (a == b)
throw std::runtime_error("first and second residue are the same");
if (a->mM.asymID() == b->mM.asymID() and NoChainBreak(a, b))
++mStats.nrOfIntraChainSSBridges; ++mStats.nrOfIntraChainSSBridges;
a->mSSBridgeNr = b->mSSBridgeNr = ++ssBondNr;
} }
mStats.nrOfHBonds = 0; mStats.nrOfHBonds = 0;
...@@ -1157,7 +1174,7 @@ const Monomer& DSSP::ResidueInfo::residue() const ...@@ -1157,7 +1174,7 @@ const Monomer& DSSP::ResidueInfo::residue() const
ChainBreak DSSP::ResidueInfo::chainBreak() const ChainBreak DSSP::ResidueInfo::chainBreak() const
{ {
return ChainBreak::None; return mImpl->mChainBreak;
} }
int DSSP::ResidueInfo::nr() const int DSSP::ResidueInfo::nr() const
...@@ -1172,7 +1189,7 @@ SecondaryStructureType DSSP::ResidueInfo::ss() const ...@@ -1172,7 +1189,7 @@ SecondaryStructureType DSSP::ResidueInfo::ss() const
int DSSP::ResidueInfo::ssBridgeNr() const int DSSP::ResidueInfo::ssBridgeNr() const
{ {
return 0; return mImpl->mSSBridgeNr;
} }
Helix DSSP::ResidueInfo::helix(int stride) const Helix DSSP::ResidueInfo::helix(int stride) const
......
...@@ -19,6 +19,7 @@ namespace fs = boost::filesystem; ...@@ -19,6 +19,7 @@ namespace fs = boost::filesystem;
#include <boost/iostreams/filter/gzip.hpp> #include <boost/iostreams/filter/gzip.hpp>
#include <boost/format.hpp> #include <boost/format.hpp>
#include "cif++/Config.hpp"
#include "cif++/PDB2Cif.hpp" #include "cif++/PDB2Cif.hpp"
#include "cif++/CifParser.hpp" #include "cif++/CifParser.hpp"
#include "cif++/Cif2PDB.hpp" #include "cif++/Cif2PDB.hpp"
...@@ -683,6 +684,18 @@ void Atom::setID(int id) ...@@ -683,6 +684,18 @@ void Atom::setID(int id)
impl()->mID = to_string(id); impl()->mID = to_string(id);
} }
std::ostream& operator<<(std::ostream& os, const Atom& atom)
{
os << atom.labelCompID() << ' ' << atom.labelAsymID() << ':' << atom.labelSeqID();
if (atom.isAlternate())
os << '(' << atom.labelAltID() << ')';
if (atom.authAsymID() != atom.labelAsymID() or atom.authSeqID() != std::to_string(atom.labelSeqID()) or atom.pdbxAuthInsCode().empty() == false)
os << " [" << atom.authAsymID() << ':' << atom.authSeqID() << atom.pdbxAuthInsCode() << ']';
return os;
}
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// residue // residue
...@@ -828,6 +841,38 @@ const AtomView& Residue::atoms() const ...@@ -828,6 +841,38 @@ const AtomView& Residue::atoms() const
return mAtoms; return mAtoms;
} }
AtomView Residue::unique_atoms() const
{
if (mStructure == nullptr)
throw runtime_error("Invalid Residue object");
AtomView result;
std::string firstAlt;
for (auto& atom: mAtoms)
{
auto alt = atom.labelAltID();
if (alt.empty())
{
result.push_back(atom);
continue;
}
if (firstAlt.empty())
firstAlt = alt;
else if (alt != firstAlt)
{
if (cif::VERBOSE)
std::cerr << "skipping alternate atom " << atom << std::endl;
continue;
}
result.push_back(atom);
}
return result;
}
Atom Residue::atomByID(const string& atomID) const Atom Residue::atomByID(const string& atomID) const
{ {
Atom result; Atom result;
...@@ -915,6 +960,16 @@ bool Residue::hasAlternateAtoms() const ...@@ -915,6 +960,16 @@ bool Residue::hasAlternateAtoms() const
return std::find_if(mAtoms.begin(), mAtoms.end(), [](const Atom& atom) { return atom.isAlternate(); }) != mAtoms.end(); return std::find_if(mAtoms.begin(), mAtoms.end(), [](const Atom& atom) { return atom.isAlternate(); }) != mAtoms.end();
} }
std::ostream& operator<<(std::ostream& os, const Residue& res)
{
os << res.compoundID() << ' ' << res.asymID() << ':' << res.seqID();
if (res.authAsymID() != res.asymID() or res.authSeqID() != std::to_string(res.seqID()))
os << " [" << res.authAsymID() << ':' << res.authSeqID() << ']';
return os;
}
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// monomer // monomer
...@@ -1374,10 +1429,19 @@ Polymer::Polymer(const Structure& s, const string& entityID, const string& asymI ...@@ -1374,10 +1429,19 @@ Polymer::Polymer(const Structure& s, const string& entityID, const string& asymI
string compoundID; string compoundID;
cif::tie(seqID, compoundID) = r.get("seq_id", "mon_id"); cif::tie(seqID, compoundID) = r.get("seq_id", "mon_id");
auto index = size(); uint32_t index = size();
ix[seqID] = index; // store only the first
emplace_back(*this, index, seqID, compoundID); if (not ix.count(seqID))
{
ix[seqID] = index;
emplace_back(*this, index, seqID, compoundID);
}
else if (cif::VERBOSE)
{
Monomer m{*this, index, seqID, compoundID};
std::cerr << "Dropping alternate residue " << m << std::endl;
}
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment