Commit 6274ee73 by Maarten L. Hekkelman

Merge branch 'develop' of github.com:PDB-REDO/dssp into develop

parents 05c339e9 4112306a
...@@ -249,13 +249,16 @@ const float ...@@ -249,13 +249,16 @@ const float
struct dssp::residue struct dssp::residue
{ {
residue(residue &&) = default;
residue &operator=(residue &&) = default;
residue(int model_nr, residue(int model_nr,
std::string_view pdb_strand_id, int pdb_seq_num, std::string_view pdb_ins_code, std::string_view pdb_strand_id, int pdb_seq_num, std::string_view pdb_ins_code)
const std::vector<cif::row_handle> &atoms)
: mPDBStrandID(pdb_strand_id) : mPDBStrandID(pdb_strand_id)
, mPDBSeqNum(pdb_seq_num) , mPDBSeqNum(pdb_seq_num)
, mPDBInsCode(pdb_ins_code) , mPDBInsCode(pdb_ins_code)
, mChainBreak(chain_break_type::None) , mChainBreak(chain_break_type::None)
, m_model_nr(model_nr)
{ {
// update the box containing all atoms // update the box containing all atoms
mBox[0].mX = mBox[0].mY = mBox[0].mZ = std::numeric_limits<float>::max(); mBox[0].mX = mBox[0].mY = mBox[0].mZ = std::numeric_limits<float>::max();
...@@ -263,13 +266,11 @@ struct dssp::residue ...@@ -263,13 +266,11 @@ struct dssp::residue
mH = point{ std::numeric_limits<float>::max(), std::numeric_limits<float>::max(), std::numeric_limits<float>::max() }; mH = point{ std::numeric_limits<float>::max(), std::numeric_limits<float>::max(), std::numeric_limits<float>::max() };
int seen = 0; for (auto &p : m_chiralAtoms)
std::array<point, 4> chiralAtoms;
for (auto &p : chiralAtoms)
p = {}; p = {};
}
for (auto atom : atoms) void addAtom(cif::row_handle atom)
{ {
std::string asymID, compID, atomID, type, authAsymID; std::string asymID, compID, atomID, type, authAsymID;
std::optional<std::string> altID; std::optional<std::string> altID;
...@@ -282,10 +283,10 @@ struct dssp::residue ...@@ -282,10 +283,10 @@ struct dssp::residue
"pdbx_PDB_model_num", "Cartn_x", "Cartn_y", "Cartn_z", "pdbx_PDB_model_num", "Cartn_x", "Cartn_y", "Cartn_z",
"auth_asym_id", "auth_seq_id"); "auth_asym_id", "auth_seq_id");
if (model and model != model_nr) if (model and model != m_model_nr)
continue; return;
if (seen == 0) if (m_seen == 0)
{ {
mAsymID = asymID; mAsymID = asymID;
mCompoundID = compID; mCompoundID = compID;
...@@ -302,66 +303,68 @@ struct dssp::residue ...@@ -302,66 +303,68 @@ struct dssp::residue
if (atomID == "CA") if (atomID == "CA")
{ {
seen |= 1; m_seen |= 1;
mCAlpha = { x, y, z }; mCAlpha = { x, y, z };
ExtendBox(mCAlpha, kRadiusCA + 2 * kRadiusWater); ExtendBox(mCAlpha, kRadiusCA + 2 * kRadiusWater);
if (mType == kValine) if (mType == kValine)
chiralAtoms[1] = mCAlpha; m_chiralAtoms[1] = mCAlpha;
} }
else if (atomID == "C") else if (atomID == "C")
{ {
seen |= 2; m_seen |= 2;
mC = { x, y, z }; mC = { x, y, z };
ExtendBox(mC, kRadiusC + 2 * kRadiusWater); ExtendBox(mC, kRadiusC + 2 * kRadiusWater);
} }
else if (atomID == "N") else if (atomID == "N")
{ {
seen |= 4; m_seen |= 4;
mH = mN = { x, y, z }; mH = mN = { x, y, z };
ExtendBox(mN, kRadiusN + 2 * kRadiusWater); ExtendBox(mN, kRadiusN + 2 * kRadiusWater);
} }
else if (atomID == "O") else if (atomID == "O")
{ {
seen |= 8; m_seen |= 8;
mO = { x, y, z }; mO = { x, y, z };
ExtendBox(mO, kRadiusO + 2 * kRadiusWater); ExtendBox(mO, kRadiusO + 2 * kRadiusWater);
} }
else if (type != "H") else if (type != "H")
{ {
seen |= 16; m_seen |= 16;
mSideChain.emplace_back(atomID, point{ x, y, z }); mSideChain.emplace_back(atomID, point{ x, y, z });
ExtendBox(point{ x, y, z }, kRadiusSideAtom + 2 * kRadiusWater); ExtendBox(point{ x, y, z }, kRadiusSideAtom + 2 * kRadiusWater);
if (mType == kLeucine) if (mType == kLeucine)
{ {
if (atomID == "CG") if (atomID == "CG")
chiralAtoms[0] = { x, y, z }; m_chiralAtoms[0] = { x, y, z };
else if (atomID == "CB") else if (atomID == "CB")
chiralAtoms[1] = { x, y, z }; m_chiralAtoms[1] = { x, y, z };
else if (atomID == "CD1") else if (atomID == "CD1")
chiralAtoms[2] = { x, y, z }; m_chiralAtoms[2] = { x, y, z };
else if (atomID == "CD2") else if (atomID == "CD2")
chiralAtoms[3] = { x, y, z }; m_chiralAtoms[3] = { x, y, z };
} }
else if (mType == kValine) else if (mType == kValine)
{ {
if (atomID == "CB") if (atomID == "CB")
chiralAtoms[0] = { x, y, z }; m_chiralAtoms[0] = { x, y, z };
else if (atomID == "CG1") else if (atomID == "CG1")
chiralAtoms[2] = { x, y, z }; m_chiralAtoms[2] = { x, y, z };
else if (atomID == "CG2") else if (atomID == "CG2")
chiralAtoms[3] = { x, y, z }; m_chiralAtoms[3] = { x, y, z };
} }
} }
} }
void finish()
{
const int kSeenAll = (1 bitor 2 bitor 4 bitor 8); const int kSeenAll = (1 bitor 2 bitor 4 bitor 8);
mComplete = (seen bitand kSeenAll) == kSeenAll; mComplete = (m_seen bitand kSeenAll) == kSeenAll;
if (mType == kValine or mType == kLeucine) if (mType == kValine or mType == kLeucine)
mChiralVolume = dot_product(chiralAtoms[1] - chiralAtoms[0], mChiralVolume = dot_product(m_chiralAtoms[1] - m_chiralAtoms[0],
cross_product(chiralAtoms[2] - chiralAtoms[0], chiralAtoms[3] - chiralAtoms[0])); cross_product(m_chiralAtoms[2] - m_chiralAtoms[0], m_chiralAtoms[3] - m_chiralAtoms[0]));
mRadius = mBox[1].mX - mBox[0].mX; mRadius = mBox[1].mX - mBox[0].mX;
if (mRadius < mBox[1].mY - mBox[0].mY) if (mRadius < mBox[1].mY - mBox[0].mY)
...@@ -543,6 +546,9 @@ struct dssp::residue ...@@ -543,6 +546,9 @@ struct dssp::residue
helix_position_type mHelixFlags[4] = { helix_position_type::None, helix_position_type::None, helix_position_type::None, helix_position_type::None }; // helix_position_type mHelixFlags[4] = { helix_position_type::None, helix_position_type::None, helix_position_type::None, helix_position_type::None }; //
bool mBend = false; bool mBend = false;
chain_break_type mChainBreak = chain_break_type::None; chain_break_type mChainBreak = chain_break_type::None;
int m_seen = 0, m_model_nr = 0;
std::array<point, 4> m_chiralAtoms;
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -680,7 +686,7 @@ float residue::CalculateSurface(const std::vector<residue> &inResidues) ...@@ -680,7 +686,7 @@ float residue::CalculateSurface(const std::vector<residue> &inResidues)
point center = r.mCenter; point center = r.mCenter;
float radius = r.mRadius; float radius = r.mRadius;
if (distance(mCenter, center) < mRadius + radius) if (distance_sq(mCenter, center) < (mRadius + radius) * (mRadius + radius))
neighbours.push_back(const_cast<residue *>(&r)); neighbours.push_back(const_cast<residue *>(&r));
} }
...@@ -761,7 +767,15 @@ double CalculateHBondEnergy(residue &inDonor, residue &inAcceptor) ...@@ -761,7 +767,15 @@ double CalculateHBondEnergy(residue &inDonor, residue &inAcceptor)
void CalculateHBondEnergies(std::vector<residue> &inResidues) void CalculateHBondEnergies(std::vector<residue> &inResidues)
{ {
if (cif::VERBOSE)
std::cerr << "calculating hbond energies" << std::endl;
// Calculate the HBond energies // Calculate the HBond energies
std::unique_ptr<cif::progress_bar> progress;
if (cif::VERBOSE == 0)
progress.reset(new cif::progress_bar((inResidues.size() * (inResidues.size() - 1) / 2), "calculate hbond energies"));
for (uint32_t i = 0; i + 1 < inResidues.size(); ++i) for (uint32_t i = 0; i + 1 < inResidues.size(); ++i)
{ {
auto &ri = inResidues[i]; auto &ri = inResidues[i];
...@@ -770,12 +784,15 @@ void CalculateHBondEnergies(std::vector<residue> &inResidues) ...@@ -770,12 +784,15 @@ void CalculateHBondEnergies(std::vector<residue> &inResidues)
{ {
auto &rj = inResidues[j]; auto &rj = inResidues[j];
if (distance(ri.mCAlpha, rj.mCAlpha) < kMinimalCADistance) if (distance_sq(ri.mCAlpha, rj.mCAlpha) < kMinimalCADistance * kMinimalCADistance)
{ {
CalculateHBondEnergy(ri, rj); CalculateHBondEnergy(ri, rj);
if (j != i + 1) if (j != i + 1)
CalculateHBondEnergy(rj, ri); CalculateHBondEnergy(rj, ri);
} }
if (progress)
progress->consumed(1);
} }
} }
} }
...@@ -854,9 +871,16 @@ bool Linked(const bridge &a, const bridge &b) ...@@ -854,9 +871,16 @@ bool Linked(const bridge &a, const bridge &b)
void CalculateBetaSheets(std::vector<residue> &inResidues, statistics &stats) void CalculateBetaSheets(std::vector<residue> &inResidues, statistics &stats)
{ {
if (cif::VERBOSE)
std::cerr << "calculating beta sheets" << std::endl;
// Calculate Bridges // Calculate Bridges
std::vector<bridge> bridges; std::vector<bridge> bridges;
std::unique_ptr<cif::progress_bar> progress;
if (cif::VERBOSE == 0)
progress.reset(new cif::progress_bar(((inResidues.size() - 5) * (inResidues.size() - 4) / 2), "calculate beta sheets"));
for (uint32_t i = 1; i + 4 < inResidues.size(); ++i) for (uint32_t i = 1; i + 4 < inResidues.size(); ++i)
{ {
auto &ri = inResidues[i]; auto &ri = inResidues[i];
...@@ -1089,6 +1113,9 @@ void CalculateBetaSheets(std::vector<residue> &inResidues, statistics &stats) ...@@ -1089,6 +1113,9 @@ void CalculateBetaSheets(std::vector<residue> &inResidues, statistics &stats)
void CalculateAlphaHelices(std::vector<residue> &inResidues, statistics &stats, bool inPreferPiHelices = true) void CalculateAlphaHelices(std::vector<residue> &inResidues, statistics &stats, bool inPreferPiHelices = true)
{ {
if (cif::VERBOSE)
std::cerr << "calculating alpha helices" << std::endl;
// Helix and Turn // Helix and Turn
for (helix_type helixType : { helix_type::_3_10, helix_type::alpha, helix_type::pi }) for (helix_type helixType : { helix_type::_3_10, helix_type::alpha, helix_type::pi })
{ {
...@@ -1180,7 +1207,7 @@ void CalculateAlphaHelices(std::vector<residue> &inResidues, statistics &stats, ...@@ -1180,7 +1207,7 @@ void CalculateAlphaHelices(std::vector<residue> &inResidues, statistics &stats,
std::string asym; std::string asym;
size_t helixLength = 0; size_t helixLength = 0;
for (auto r : inResidues) for (auto &r : inResidues)
{ {
if (r.mAsymID != asym) if (r.mAsymID != asym)
{ {
...@@ -1205,6 +1232,9 @@ void CalculateAlphaHelices(std::vector<residue> &inResidues, statistics &stats, ...@@ -1205,6 +1232,9 @@ void CalculateAlphaHelices(std::vector<residue> &inResidues, statistics &stats,
void CalculatePPHelices(std::vector<residue> &inResidues, statistics &stats, int stretch_length) void CalculatePPHelices(std::vector<residue> &inResidues, statistics &stats, int stretch_length)
{ {
if (cif::VERBOSE)
std::cerr << "calculating pp helices" << std::endl;
size_t N = inResidues.size(); size_t N = inResidues.size();
const float epsilon = 29; const float epsilon = 29;
...@@ -1368,41 +1398,63 @@ DSSP_impl::DSSP_impl(const cif::datablock &db, int model_nr, int min_poly_prolin ...@@ -1368,41 +1398,63 @@ DSSP_impl::DSSP_impl(const cif::datablock &db, int model_nr, int min_poly_prolin
: mDB(db) : mDB(db)
, m_min_poly_proline_stretch_length(min_poly_proline_stretch_length) , m_min_poly_proline_stretch_length(min_poly_proline_stretch_length)
{ {
using namespace cif::literals;
if (cif::VERBOSE)
std::cerr << "loading residues" << std::endl;
int resNumber = 0; int resNumber = 0;
auto &pdbx_poly_seq_scheme = mDB["pdbx_poly_seq_scheme"]; auto &pdbx_poly_seq_scheme = mDB["pdbx_poly_seq_scheme"];
auto &atom_site = mDB["atom_site"]; auto &atom_site = mDB["atom_site"];
for (auto r : pdbx_poly_seq_scheme) using key_type = std::tuple<std::string,int>;
{ using index_type = std::map<key_type, size_t>;
std::string pdb_strand_id, pdb_ins_code;
int pdb_seq_num;
cif::tie(pdb_strand_id, pdb_seq_num, pdb_ins_code) = r.get("pdb_strand_id", "pdb_seq_num", "pdb_ins_code"); index_type index;
chain_break_type brk = chain_break_type::NewChain; for (const auto &[asym_id, seq_id, pdb_strand_id, pdb_seq_num, pdb_ins_code]
: pdbx_poly_seq_scheme.rows<std::string,int, std::string, int, std::string>("asym_id", "seq_id", "pdb_strand_id", "pdb_seq_num", "pdb_ins_code"))
{
index[{asym_id, seq_id}] = mResidues.size();
mResidues.emplace_back(model_nr, pdb_strand_id, pdb_seq_num, pdb_ins_code);
}
residue residue(model_nr, pdb_strand_id, pdb_seq_num, pdb_ins_code, pdbx_poly_seq_scheme.get_children(r, atom_site)); for (auto atom : atom_site)
{
std::string asym_id;
int seq_id;
if (not residue.mComplete) cif::tie(asym_id, seq_id) = atom.get("label_asym_id", "label_seq_id");
auto i = index.find({asym_id, seq_id});
if (i == index.end())
continue; continue;
++resNumber; mResidues[i->second].addAtom(atom);
}
for (auto &residue : mResidues)
residue.finish();
if (mResidues.empty()) mResidues.erase(std::remove_if(mResidues.begin(), mResidues.end(), [](const dssp::residue &r) { return not r.mComplete; }), mResidues.end());
mStats.count.chains = 1; mStats.count.chains = 1;
else
chain_break_type brk = chain_break_type::NewChain;
for (size_t i = 0; i < mResidues.size(); ++i)
{ {
if (mResidues.back().mAsymID != residue.mAsymID) auto &residue = mResidues[i];
++mStats.count.chains; ++resNumber;
if (distance(mResidues.back().mC, residue.mN) > kMaxPeptideBondLength) if (i > 0)
{ {
if (mResidues.back().mAsymID == residue.mAsymID) if (distance(mResidues[i - 1].mC, mResidues[i].mN) > kMaxPeptideBondLength)
{ {
++mStats.count.chains; ++mStats.count.chains;
if (mResidues[i - 1].mAsymID == mResidues[i].mAsymID)
brk = chain_break_type::Gap; brk = chain_break_type::Gap;
} else
brk = chain_break_type::NewChain;
++resNumber; ++resNumber;
} }
} }
...@@ -1410,8 +1462,6 @@ DSSP_impl::DSSP_impl(const cif::datablock &db, int model_nr, int min_poly_prolin ...@@ -1410,8 +1462,6 @@ DSSP_impl::DSSP_impl(const cif::datablock &db, int model_nr, int min_poly_prolin
residue.mChainBreak = brk; residue.mChainBreak = brk;
residue.mNumber = resNumber; residue.mNumber = resNumber;
mResidues.emplace_back(std::move(residue));
brk = chain_break_type::None; brk = chain_break_type::None;
} }
...@@ -1482,6 +1532,9 @@ DSSP_impl::DSSP_impl(const cif::datablock &db, int model_nr, int min_poly_prolin ...@@ -1482,6 +1532,9 @@ DSSP_impl::DSSP_impl(const cif::datablock &db, int model_nr, int min_poly_prolin
void DSSP_impl::calculateSecondaryStructure() void DSSP_impl::calculateSecondaryStructure()
{ {
if (cif::VERBOSE)
std::cerr << "calculating secondary structure" << std::endl;
using namespace cif::literals; using namespace cif::literals;
for (auto [asym1, seq1, asym2, seq2] : mDB["struct_conn"].find<std::string, int, std::string, int>("conn_type_id"_key == "disulf", for (auto [asym1, seq1, asym2, seq2] : mDB["struct_conn"].find<std::string, int, std::string, int>("conn_type_id"_key == "disulf",
......
...@@ -93,16 +93,10 @@ int d_main(int argc, const char *argv[]) ...@@ -93,16 +93,10 @@ int d_main(int argc, const char *argv[])
exit(0); exit(0);
} }
if (config.has("help")) if (config.has("help") or config.operands().empty())
{ {
std::cerr << config << std::endl; std::cerr << config << std::endl;
exit(0); exit(config.has("help") ? 0 : 1);
}
if (config.operands().empty())
{
std::cerr << "Input file not specified" << std::endl;
exit(1);
} }
if (config.has("output-format") and config.get<std::string>("output-format") != "dssp" and config.get<std::string>("output-format") != "mmcif") if (config.has("output-format") and config.get<std::string>("output-format") != "dssp" and config.get<std::string>("output-format") != "mmcif")
...@@ -130,8 +124,6 @@ int d_main(int argc, const char *argv[]) ...@@ -130,8 +124,6 @@ int d_main(int argc, const char *argv[])
} }
cif::file f = cif::pdb::read(in); cif::file f = cif::pdb::read(in);
if (cif::VERBOSE > 0 and not f.is_valid())
std::cerr << "Warning, the input file is not valid. Run with --verbose to see why." << std::endl;
// -------------------------------------------------------------------- // --------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment