Commit 8aa74b98 by rjoosten

Merge branch 'trunk' of github.com:PDB-REDO/dssp into trunk

parents 1ef68b65 b87ef206
......@@ -25,7 +25,7 @@
cmake_minimum_required(VERSION 3.15)
# set the project name
project(mkdssp VERSION 4.3 LANGUAGES CXX)
project(mkdssp VERSION 4.3.1 LANGUAGES CXX)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
......@@ -105,7 +105,7 @@ find_package(Threads)
if(NOT PDB_REDO_META)
find_package(libmcfp REQUIRED)
find_package(cifpp 5.0.8 REQUIRED)
find_package(cifpp 5.1.0 REQUIRED)
if(BUILD_WEBSERVER)
find_package(zeep REQUIRED)
......@@ -135,7 +135,8 @@ target_link_libraries(mkdssp PRIVATE dssp cifpp::cifpp libmcfp::libmcfp dssp::ds
if(USE_RSRC)
mrc_target_resources(mkdssp
${CIFPP_SHARE_DIR}/mmcif_pdbx.dic
${CIFPP_SHARE_DIR}/mmcif_ddl.dic)
${CIFPP_SHARE_DIR}/mmcif_ddl.dic
${PROJECT_SOURCE_DIR}/mmcif_pdbx/dssp-extension.dic)
endif()
if(BUILD_WEBSERVER)
......
Version 4.3.1
- Optimised for speed
- Fix in sheet numbering (mmCIF output)
Version 4.3
- Write new output by default
- Fix some issues with this new output, typo e.g.
......
......@@ -173,7 +173,7 @@ save__dssp_struct_bridge_pairs.acceptor_1_label_asym_id
;
_item.name '_dssp_struct_bridge_pairs.acceptor_1_label_asym_id'
_item.category_id dssp_struct_bridge_pairs
_item.mandatory_code yes
_item.mandatory_code no
_item_type.code code
save_
......@@ -250,7 +250,7 @@ save__dssp_struct_bridge_pairs.acceptor_2_label_asym_id
;
_item.name '_dssp_struct_bridge_pairs.acceptor_2_label_asym_id'
_item.category_id dssp_struct_bridge_pairs
_item.mandatory_code yes
_item.mandatory_code no
_item_type.code code
save_
......@@ -327,7 +327,7 @@ save__dssp_struct_bridge_pairs.donor_1_label_asym_id
;
_item.name '_dssp_struct_bridge_pairs.donor_1_label_asym_id'
_item.category_id dssp_struct_bridge_pairs
_item.mandatory_code yes
_item.mandatory_code no
_item_type.code code
save_
......@@ -404,7 +404,7 @@ save__dssp_struct_bridge_pairs.donor_2_label_asym_id
;
_item.name '_dssp_struct_bridge_pairs.donor_2_label_asym_id'
_item.category_id dssp_struct_bridge_pairs
_item.mandatory_code yes
_item.mandatory_code no
_item_type.code code
save_
......
......@@ -28,6 +28,7 @@
#include "revision.hpp"
#include <cif++/pdb/io.hpp>
#include <cif++/dictionary_parser.hpp>
#include <exception>
#include <filesystem>
......@@ -224,8 +225,6 @@ void writeDSSP(const dssp &dssp, std::ostream &os)
void writeBridgePairs(cif::datablock &db, const dssp &dssp)
{
using ResidueInfo = dssp::residue_info;
auto &hb = db["dssp_struct_bridge_pairs"];
hb.add_column("id");
......@@ -248,23 +247,16 @@ void writeBridgePairs(cif::datablock &db, const dssp &dssp)
for (auto &res : dssp)
{
auto write_res = [&](const std::string &prefix, ResidueInfo const &r, double energy)
{
hb.back().assign({ { prefix + "label_comp_id", r.compound_id() },
{ prefix + "label_seq_id", r.seq_id() },
{ prefix + "label_asym_id", r.asym_id() },
// { prefix + "auth_comp_id", r.compound_id() },
{ prefix + "auth_seq_id", r.auth_seq_id() },
{ prefix + "auth_asym_id", r.auth_asym_id() },
{ prefix + "pdbx_PDB_ins_code", r.pdb_ins_code() } });
if (not prefix.empty())
hb.back().assign({ { prefix + "energy", energy, 1 } });
};
hb.emplace({ { "id", hb.get_unique_id("") } });
write_res("", res, 0);
cif::row_initializer data({
{ "id", hb.get_unique_id("") },
{ "label_comp_id", res.compound_id() },
{ "label_seq_id", res.seq_id() },
{ "label_asym_id", res.asym_id() },
// { "auth_comp_id", res.compound_id() },
{ "auth_seq_id", res.auth_seq_id() },
{ "auth_asym_id", res.auth_asym_id() },
{ "pdbx_PDB_ins_code", res.pdb_ins_code() }
});
for (int i : { 0, 1 })
{
......@@ -272,11 +264,59 @@ void writeBridgePairs(cif::datablock &db, const dssp &dssp)
const auto &&[donor, donorEnergy] = res.donor(i);
if (acceptor)
write_res(i ? "acceptor_2_" : "acceptor_1_", acceptor, acceptorEnergy);
{
if (i == 0)
{
data.emplace_back("acceptor_1_label_comp_id", acceptor.compound_id());
data.emplace_back("acceptor_1_label_seq_id", acceptor.seq_id());
data.emplace_back("acceptor_1_label_asym_id", acceptor.asym_id());
// data.emplace_back("acceptor_1_auth_comp_id", acceptor.compound_id());
data.emplace_back("acceptor_1_auth_seq_id", acceptor.auth_seq_id());
data.emplace_back("acceptor_1_auth_asym_id", acceptor.auth_asym_id());
data.emplace_back("acceptor_1_pdbx_PDB_ins_code", acceptor.pdb_ins_code());
data.emplace_back("acceptor_1_energy", acceptorEnergy, 1);
}
else
{
data.emplace_back("acceptor_2_label_comp_id", acceptor.compound_id());
data.emplace_back("acceptor_2_label_seq_id", acceptor.seq_id());
data.emplace_back("acceptor_2_label_asym_id", acceptor.asym_id());
// data.emplace_back("acceptor_2_auth_comp_id", acceptor.compound_id());
data.emplace_back("acceptor_2_auth_seq_id", acceptor.auth_seq_id());
data.emplace_back("acceptor_2_auth_asym_id", acceptor.auth_asym_id());
data.emplace_back("acceptor_2_pdbx_PDB_ins_code", acceptor.pdb_ins_code());
data.emplace_back("acceptor_2_energy", acceptorEnergy, 1);
}
}
if (donor)
write_res(i ? "donor_2_" : "donor_1_", donor, donorEnergy);
{
if (i == 0)
{
data.emplace_back("donor_1_label_comp_id", donor.compound_id());
data.emplace_back("donor_1_label_seq_id", donor.seq_id());
data.emplace_back("donor_1_label_asym_id", donor.asym_id());
// data.emplace_back("donor_1_auth_comp_id", donor.compound_id());
data.emplace_back("donor_1_auth_seq_id", donor.auth_seq_id());
data.emplace_back("donor_1_auth_asym_id", donor.auth_asym_id());
data.emplace_back("donor_1_pdbx_PDB_ins_code", donor.pdb_ins_code());
data.emplace_back("donor_1_energy", donorEnergy, 1);
}
else
{
data.emplace_back("donor_2_label_comp_id", donor.compound_id());
data.emplace_back("donor_2_label_seq_id", donor.seq_id());
data.emplace_back("donor_2_label_asym_id", donor.asym_id());
// data.emplace_back("donor_2_auth_comp_id", donor.compound_id());
data.emplace_back("donor_2_auth_seq_id", donor.auth_seq_id());
data.emplace_back("donor_2_auth_asym_id", donor.auth_asym_id());
data.emplace_back("donor_2_pdbx_PDB_ins_code", donor.pdb_ins_code());
data.emplace_back("donor_2_energy", donorEnergy, 1);
}
}
}
hb.emplace(std::move(data));
}
}
......@@ -319,7 +359,7 @@ void writeSheets(cif::datablock &db, const dssp &dssp)
if (not sheetMap.count(sheetID))
sheetMap[sheetID] = static_cast<int>(sheetMap.size());
strands.emplace_back(std::make_tuple(sheetMap[sheetID], res_list{ res }));
strands.emplace_back(sheetMap[sheetID], res_list{ res });
}
// sort the strands vector
......@@ -341,9 +381,13 @@ void writeSheets(cif::datablock &db, const dssp &dssp)
{
if (sheetNr != lastSheet)
{
struct_sheet.emplace({ { "id", cif::cif_id_for_number(sheetNr) },
{ "number_strands", std::count_if(strands.begin(), strands.end(), [nr = sheetNr](std::tuple<int, res_list> const &s)
{ return std::get<0>(s) == nr; }) } });
struct_sheet.emplace({
{ "id", cif::cif_id_for_number(sheetNr) },
{ "number_strands",
std::count_if(strands.begin(), strands.end(), [nr = sheetNr](std::tuple<int, res_list> const &s)
{ return std::get<0>(s) == nr; })
}
});
lastSheet = sheetNr;
}
......@@ -352,26 +396,20 @@ void writeSheets(cif::datablock &db, const dssp &dssp)
// Each residue resides in a single strand which is part of a single sheet
// this function returns the sequence number inside the sheet for the strand
// containing res
auto strandNrForResidue = [&strands, &sheetMap](dssp::residue_info const &res)
std::map<int,int> strandMap;
int strandNr = 0;
for (auto &&[sheet, strand] : strands)
{
for (const auto &[k, iSheet] : sheetMap)
for (auto &r : strand)
{
int result = 0;
for (auto &&[sheet, strand] : strands)
{
if (sheet != iSheet)
continue;
if (std::find(strand.begin(), strand.end(), res) != strand.end())
return result;
++result;
}
if (r.type() != ss_type::Strand)
continue;
strandMap[r.nr()] = strandNr;
}
assert(false);
return -1;
};
++strandNr;
}
// This map is used to record the sense of a ladder, and can be used
// to detect ladders already seen.
......@@ -382,7 +420,7 @@ void writeSheets(cif::datablock &db, const dssp &dssp)
if (res.type() != ss_type::Strand)
continue;
int s1 = strandNrForResidue(res);
int s1 = strandMap[res.nr()];
for (int i : { 0, 1 })
{
......@@ -390,7 +428,7 @@ void writeSheets(cif::datablock &db, const dssp &dssp)
if (not p or p.asym_id() != res.asym_id() or p.sheet() != res.sheet() or p.type() != ss_type::Strand)
continue;
int s2 = strandNrForResidue(p);
int s2 = strandMap[p.nr()];
// assert(s1 != s2);
if (s2 == s1)
continue;
......@@ -428,10 +466,9 @@ void writeSheets(cif::datablock &db, const dssp &dssp)
res_list strand1, strand2;
int strandIx = 0;
for (auto const &s : strands)
for (int strandIx = 0; static_cast<size_t>(strandIx) < strands.size(); ++strandIx)
{
const auto &[sSheet, strand] = s;
const auto &[sSheet, strand] = strands[strandIx];
if (sSheet != sheet)
continue;
......@@ -442,8 +479,6 @@ void writeSheets(cif::datablock &db, const dssp &dssp)
strand2 = strand;
break;
}
++strandIx;
}
assert(not(strand1.empty() or strand2.empty()));
......@@ -723,8 +758,9 @@ void writeSheets(cif::datablock &db, const dssp &dssp)
auto &beg = strand.front();
auto &end = strand.back();
struct_sheet_range.emplace({ { "sheet_id", cif::cif_id_for_number(sheet) },
{ "id", strandNrForResidue(strand.front()) + 1 },
struct_sheet_range.emplace({
{ "sheet_id", cif::cif_id_for_number(sheet) },
{ "id", strandMap[strand.front().nr()] + 1 },
{ "beg_label_comp_id", beg.compound_id() },
{ "beg_label_asym_id", beg.asym_id() },
{ "beg_label_seq_id", beg.seq_id() },
......@@ -850,14 +886,15 @@ void writeStatistics(cif::datablock &db, const dssp &dssp)
auto &dssp_statistics = db["dssp_statistics"];
dssp_statistics.emplace({ { "entry_id", db.name() },
auto stats_i = dssp_statistics.emplace({ { "entry_id", db.name() },
{ "nr_of_residues", stats.count.residues },
{ "nr_of_chains", stats.count.chains },
{ "nr_of_ss_bridges_total", stats.count.SS_bridges },
{ "nr_of_ss_bridges_intra_chain", stats.count.intra_chain_SS_bridges },
{ "nr_of_ss_bridges_inter_chain", stats.count.SS_bridges - stats.count.intra_chain_SS_bridges },
{ "accessible_surface_of_protein", stats.accessible_surface } });
{ "nr_of_ss_bridges_inter_chain", stats.count.SS_bridges - stats.count.intra_chain_SS_bridges } });
if (stats.accessible_surface > 0)
(*stats_i)["accessible_surface_of_protein"] = stats.accessible_surface;
auto &dssp_struct_hbonds = db["dssp_statistics_hbond"];
......@@ -930,6 +967,8 @@ void writeStatistics(cif::datablock &db, const dssp &dssp)
void writeSummary(cif::datablock &db, const dssp &dssp)
{
bool writeAccessibility = dssp.get_statistics().accessible_surface > 0;
// A approximation of the old format
auto &dssp_struct_summary = db["dssp_struct_summary"];
......@@ -1028,13 +1067,14 @@ void writeSummary(cif::datablock &db, const dssp &dssp)
{ "sheet", res.sheet() ? cif::cif_id_for_number(res.sheet() - 1) : "." },
{ "accessibility", res.accessibility(), 1 },
{ "x_ca", cax, 1 },
{ "y_ca", cay, 1 },
{ "z_ca", caz, 1 },
};
if (writeAccessibility)
data.emplace_back("accessibility", res.accessibility(), 1);
if (res.tco().has_value())
data.emplace_back("TCO", *res.tco(), 3);
else
......@@ -1068,6 +1108,14 @@ void annotateDSSP(cif::datablock &db, const dssp &dssp, bool writeOther, bool wr
{
using namespace std::literals;
auto &validator = const_cast<cif::validator &>(*db.get_validator());
if (validator.get_validator_for_category("dssp_struct_summary") == nullptr)
{
auto dssp_extension = cif::load_resource("dssp-extension.dic");
if (dssp_extension)
cif::extend_dictionary(validator, *dssp_extension);
}
if (dssp.empty())
{
if (cif::VERBOSE > 0)
......
......@@ -27,6 +27,7 @@
// Calculate DSSP-like secondary structure information
#include "dssp.hpp"
#include "queue.hpp"
#include <deque>
#include <iomanip>
......@@ -40,6 +41,8 @@ using helix_type = dssp::helix_type;
using helix_position_type = dssp::helix_position_type;
using chain_break_type = dssp::chain_break_type;
using queue_type = blocking_queue<std::tuple<uint32_t,uint32_t>>;
// --------------------------------------------------------------------
const double
......@@ -249,13 +252,16 @@ const float
struct dssp::residue
{
residue(residue &&) = default;
residue &operator=(residue &&) = default;
residue(int model_nr,
std::string_view pdb_strand_id, int pdb_seq_num, std::string_view pdb_ins_code,
const std::vector<cif::row_handle> &atoms)
std::string_view pdb_strand_id, int pdb_seq_num, std::string_view pdb_ins_code)
: mPDBStrandID(pdb_strand_id)
, mPDBSeqNum(pdb_seq_num)
, mPDBInsCode(pdb_ins_code)
, mChainBreak(chain_break_type::None)
, m_model_nr(model_nr)
{
// update the box containing all atoms
mBox[0].mX = mBox[0].mY = mBox[0].mZ = std::numeric_limits<float>::max();
......@@ -263,105 +269,105 @@ struct dssp::residue
mH = point{ std::numeric_limits<float>::max(), std::numeric_limits<float>::max(), std::numeric_limits<float>::max() };
int seen = 0;
std::array<point, 4> chiralAtoms;
for (auto &p : chiralAtoms)
for (auto &p : m_chiralAtoms)
p = {};
}
void addAtom(cif::row_handle atom)
{
std::string asymID, compID, atomID, type, authAsymID;
std::optional<std::string> altID;
int seqID, authSeqID;
std::optional<int> model;
float x, y, z;
cif::tie(asymID, compID, atomID, altID, type, seqID, model, x, y, z, authAsymID, authSeqID) =
atom.get("label_asym_id", "label_comp_id", "label_atom_id", "label_alt_id", "type_symbol", "label_seq_id",
"pdbx_PDB_model_num", "Cartn_x", "Cartn_y", "Cartn_z",
"auth_asym_id", "auth_seq_id");
if (model and model != m_model_nr)
return;
for (auto atom : atoms)
if (m_seen == 0)
{
std::string asymID, compID, atomID, type, authAsymID;
std::optional<std::string> altID;
int seqID, authSeqID;
std::optional<int> model;
float x, y, z;
cif::tie(asymID, compID, atomID, altID, type, seqID, model, x, y, z, authAsymID, authSeqID) =
atom.get("label_asym_id", "label_comp_id", "label_atom_id", "label_alt_id", "type_symbol", "label_seq_id",
"pdbx_PDB_model_num", "Cartn_x", "Cartn_y", "Cartn_z",
"auth_asym_id", "auth_seq_id");
if (model and model != model_nr)
continue;
mAsymID = asymID;
mCompoundID = compID;
mSeqID = seqID;
if (seen == 0)
{
mAsymID = asymID;
mCompoundID = compID;
mSeqID = seqID;
mAuthSeqID = authSeqID;
mAuthAsymID = authAsymID;
mAuthSeqID = authSeqID;
mAuthAsymID = authAsymID;
mType = MapResidue(mCompoundID);
mType = MapResidue(mCompoundID);
if (altID)
mAltID = *altID;
}
if (altID)
mAltID = *altID;
}
if (atomID == "CA")
{
m_seen |= 1;
mCAlpha = { x, y, z };
ExtendBox(mCAlpha, kRadiusCA + 2 * kRadiusWater);
if (atomID == "CA")
{
seen |= 1;
mCAlpha = { x, y, z };
ExtendBox(mCAlpha, kRadiusCA + 2 * kRadiusWater);
if (mType == kValine)
m_chiralAtoms[1] = mCAlpha;
}
else if (atomID == "C")
{
m_seen |= 2;
mC = { x, y, z };
ExtendBox(mC, kRadiusC + 2 * kRadiusWater);
}
else if (atomID == "N")
{
m_seen |= 4;
mH = mN = { x, y, z };
ExtendBox(mN, kRadiusN + 2 * kRadiusWater);
}
else if (atomID == "O")
{
m_seen |= 8;
mO = { x, y, z };
ExtendBox(mO, kRadiusO + 2 * kRadiusWater);
}
else if (type != "H")
{
m_seen |= 16;
mSideChain.emplace_back(atomID, point{ x, y, z });
ExtendBox(point{ x, y, z }, kRadiusSideAtom + 2 * kRadiusWater);
if (mType == kValine)
chiralAtoms[1] = mCAlpha;
}
else if (atomID == "C")
{
seen |= 2;
mC = { x, y, z };
ExtendBox(mC, kRadiusC + 2 * kRadiusWater);
}
else if (atomID == "N")
if (mType == kLeucine)
{
seen |= 4;
mH = mN = { x, y, z };
ExtendBox(mN, kRadiusN + 2 * kRadiusWater);
if (atomID == "CG")
m_chiralAtoms[0] = { x, y, z };
else if (atomID == "CB")
m_chiralAtoms[1] = { x, y, z };
else if (atomID == "CD1")
m_chiralAtoms[2] = { x, y, z };
else if (atomID == "CD2")
m_chiralAtoms[3] = { x, y, z };
}
else if (atomID == "O")
else if (mType == kValine)
{
seen |= 8;
mO = { x, y, z };
ExtendBox(mO, kRadiusO + 2 * kRadiusWater);
}
else if (type != "H")
{
seen |= 16;
mSideChain.emplace_back(atomID, point{ x, y, z });
ExtendBox(point{ x, y, z }, kRadiusSideAtom + 2 * kRadiusWater);
if (mType == kLeucine)
{
if (atomID == "CG")
chiralAtoms[0] = { x, y, z };
else if (atomID == "CB")
chiralAtoms[1] = { x, y, z };
else if (atomID == "CD1")
chiralAtoms[2] = { x, y, z };
else if (atomID == "CD2")
chiralAtoms[3] = { x, y, z };
}
else if (mType == kValine)
{
if (atomID == "CB")
chiralAtoms[0] = { x, y, z };
else if (atomID == "CG1")
chiralAtoms[2] = { x, y, z };
else if (atomID == "CG2")
chiralAtoms[3] = { x, y, z };
}
if (atomID == "CB")
m_chiralAtoms[0] = { x, y, z };
else if (atomID == "CG1")
m_chiralAtoms[2] = { x, y, z };
else if (atomID == "CG2")
m_chiralAtoms[3] = { x, y, z };
}
}
}
void finish()
{
const int kSeenAll = (1 bitor 2 bitor 4 bitor 8);
mComplete = (seen bitand kSeenAll) == kSeenAll;
mComplete = (m_seen bitand kSeenAll) == kSeenAll;
if (mType == kValine or mType == kLeucine)
mChiralVolume = dot_product(chiralAtoms[1] - chiralAtoms[0],
cross_product(chiralAtoms[2] - chiralAtoms[0], chiralAtoms[3] - chiralAtoms[0]));
mChiralVolume = dot_product(m_chiralAtoms[1] - m_chiralAtoms[0],
cross_product(m_chiralAtoms[2] - m_chiralAtoms[0], m_chiralAtoms[3] - m_chiralAtoms[0]));
mRadius = mBox[1].mX - mBox[0].mX;
if (mRadius < mBox[1].mY - mBox[0].mY)
......@@ -543,6 +549,9 @@ struct dssp::residue
helix_position_type mHelixFlags[4] = { helix_position_type::None, helix_position_type::None, helix_position_type::None, helix_position_type::None }; //
bool mBend = false;
chain_break_type mChainBreak = chain_break_type::None;
int m_seen = 0, m_model_nr = 0;
std::array<point, 4> m_chiralAtoms;
};
// --------------------------------------------------------------------
......@@ -680,7 +689,7 @@ float residue::CalculateSurface(const std::vector<residue> &inResidues)
point center = r.mCenter;
float radius = r.mRadius;
if (distance(mCenter, center) < mRadius + radius)
if (distance_sq(mCenter, center) < (mRadius + radius) * (mRadius + radius))
neighbours.push_back(const_cast<residue *>(&r));
}
......@@ -759,24 +768,23 @@ double CalculateHBondEnergy(residue &inDonor, residue &inAcceptor)
// --------------------------------------------------------------------
void CalculateHBondEnergies(std::vector<residue> &inResidues)
void CalculateHBondEnergies(std::vector<residue> &inResidues, std::vector<std::tuple<uint32_t, uint32_t>> &q)
{
// Calculate the HBond energies
for (uint32_t i = 0; i + 1 < inResidues.size(); ++i)
std::unique_ptr<cif::progress_bar> progress;
if (cif::VERBOSE == 0 or cif::VERBOSE == 1)
progress.reset(new cif::progress_bar(q.size(), "calculate hbond energies"));
for (const auto &[i, j] : q)
{
auto &ri = inResidues[i];
for (uint32_t j = i + 1; j < inResidues.size(); ++j)
{
auto &rj = inResidues[j];
if (distance(ri.mCAlpha, rj.mCAlpha) < kMinimalCADistance)
{
CalculateHBondEnergy(ri, rj);
if (j != i + 1)
CalculateHBondEnergy(rj, ri);
}
}
auto &rj = inResidues[j];
CalculateHBondEnergy(ri, rj);
if (j != i + 1)
CalculateHBondEnergy(rj, ri);
if (progress)
progress->consumed(1);
}
}
......@@ -852,59 +860,65 @@ bool Linked(const bridge &a, const bridge &b)
// --------------------------------------------------------------------
void CalculateBetaSheets(std::vector<residue> &inResidues, statistics &stats)
void CalculateBetaSheets(std::vector<residue> &inResidues, statistics &stats, std::vector<std::tuple<uint32_t, uint32_t>> &q)
{
// if (cif::VERBOSE)
// std::cerr << "calculating beta sheets" << std::endl;
std::unique_ptr<cif::progress_bar> progress;
if (cif::VERBOSE == 0 or cif::VERBOSE == 1)
progress.reset(new cif::progress_bar(q.size(), "calculate beta sheets"));
// Calculate Bridges
std::vector<bridge> bridges;
for (uint32_t i = 1; i + 4 < inResidues.size(); ++i)
for (const auto &[i, j] : q)
{
if (progress)
progress->consumed(1);
auto &ri = inResidues[i];
auto &rj = inResidues[j];
for (uint32_t j = i + 3; j + 1 < inResidues.size(); ++j)
{
auto &rj = inResidues[j];
bridge_type type = TestBridge(ri, rj);
if (type == bridge_type::None)
continue;
bridge_type type = TestBridge(ri, rj);
if (type == bridge_type::None)
bool found = false;
for (bridge &bridge : bridges)
{
if (type != bridge.type or i != bridge.i.back() + 1)
continue;
bool found = false;
for (bridge &bridge : bridges)
if (type == bridge_type::Parallel and bridge.j.back() + 1 == j)
{
if (type != bridge.type or i != bridge.i.back() + 1)
continue;
if (type == bridge_type::Parallel and bridge.j.back() + 1 == j)
{
bridge.i.push_back(i);
bridge.j.push_back(j);
found = true;
break;
}
if (type == bridge_type::AntiParallel and bridge.j.front() - 1 == j)
{
bridge.i.push_back(i);
bridge.j.push_front(j);
found = true;
break;
}
bridge.i.push_back(i);
bridge.j.push_back(j);
found = true;
break;
}
if (not found)
if (type == bridge_type::AntiParallel and bridge.j.front() - 1 == j)
{
bridge bridge = {};
bridge.type = type;
bridge.i.push_back(i);
bridge.chainI = ri.mAsymID;
bridge.j.push_back(j);
bridge.chainJ = rj.mAsymID;
bridges.push_back(bridge);
bridge.j.push_front(j);
found = true;
break;
}
}
if (not found)
{
bridge bridge = {};
bridge.type = type;
bridge.i.push_back(i);
bridge.chainI = ri.mAsymID;
bridge.j.push_back(j);
bridge.chainJ = rj.mAsymID;
bridges.push_back(bridge);
}
}
// extend ladders
......@@ -1089,6 +1103,9 @@ void CalculateBetaSheets(std::vector<residue> &inResidues, statistics &stats)
void CalculateAlphaHelices(std::vector<residue> &inResidues, statistics &stats, bool inPreferPiHelices = true)
{
if (cif::VERBOSE)
std::cerr << "calculating alpha helices" << std::endl;
// Helix and Turn
for (helix_type helixType : { helix_type::_3_10, helix_type::alpha, helix_type::pi })
{
......@@ -1180,7 +1197,7 @@ void CalculateAlphaHelices(std::vector<residue> &inResidues, statistics &stats,
std::string asym;
size_t helixLength = 0;
for (auto r : inResidues)
for (auto &r : inResidues)
{
if (r.mAsymID != asym)
{
......@@ -1205,6 +1222,9 @@ void CalculateAlphaHelices(std::vector<residue> &inResidues, statistics &stats,
void CalculatePPHelices(std::vector<residue> &inResidues, statistics &stats, int stretch_length)
{
if (cif::VERBOSE)
std::cerr << "calculating pp helices" << std::endl;
size_t N = inResidues.size();
const float epsilon = 29;
......@@ -1368,41 +1388,65 @@ DSSP_impl::DSSP_impl(const cif::datablock &db, int model_nr, int min_poly_prolin
: mDB(db)
, m_min_poly_proline_stretch_length(min_poly_proline_stretch_length)
{
using namespace cif::literals;
if (cif::VERBOSE)
std::cerr << "loading residues" << std::endl;
int resNumber = 0;
auto &pdbx_poly_seq_scheme = mDB["pdbx_poly_seq_scheme"];
auto &atom_site = mDB["atom_site"];
for (auto r : pdbx_poly_seq_scheme)
{
std::string pdb_strand_id, pdb_ins_code;
int pdb_seq_num;
using key_type = std::tuple<std::string,int>;
using index_type = std::map<key_type, size_t>;
index_type index;
cif::tie(pdb_strand_id, pdb_seq_num, pdb_ins_code) = r.get("pdb_strand_id", "pdb_seq_num", "pdb_ins_code");
mResidues.reserve(pdbx_poly_seq_scheme.size());
chain_break_type brk = chain_break_type::NewChain;
for (const auto &[asym_id, seq_id, pdb_strand_id, pdb_seq_num, pdb_ins_code]
: pdbx_poly_seq_scheme.rows<std::string,int, std::string, int, std::string>("asym_id", "seq_id", "pdb_strand_id", "pdb_seq_num", "pdb_ins_code"))
{
index[{asym_id, seq_id}] = mResidues.size();
mResidues.emplace_back(model_nr, pdb_strand_id, pdb_seq_num, pdb_ins_code);
}
residue residue(model_nr, pdb_strand_id, pdb_seq_num, pdb_ins_code, pdbx_poly_seq_scheme.get_children(r, atom_site));
for (auto atom : atom_site)
{
std::string asym_id;
int seq_id;
if (not residue.mComplete)
cif::tie(asym_id, seq_id) = atom.get("label_asym_id", "label_seq_id");
auto i = index.find({asym_id, seq_id});
if (i == index.end())
continue;
mResidues[i->second].addAtom(atom);
}
for (auto &residue : mResidues)
residue.finish();
mResidues.erase(std::remove_if(mResidues.begin(), mResidues.end(), [](const dssp::residue &r) { return not r.mComplete; }), mResidues.end());
mStats.count.chains = 1;
chain_break_type brk = chain_break_type::NewChain;
for (size_t i = 0; i < mResidues.size(); ++i)
{
auto &residue = mResidues[i];
++resNumber;
if (mResidues.empty())
mStats.count.chains = 1;
else
if (i > 0)
{
if (mResidues.back().mAsymID != residue.mAsymID)
++mStats.count.chains;
if (distance(mResidues.back().mC, residue.mN) > kMaxPeptideBondLength)
if (distance(mResidues[i - 1].mC, mResidues[i].mN) > kMaxPeptideBondLength)
{
if (mResidues.back().mAsymID == residue.mAsymID)
{
++mStats.count.chains;
++mStats.count.chains;
if (mResidues[i - 1].mAsymID == mResidues[i].mAsymID)
brk = chain_break_type::Gap;
}
else
brk = chain_break_type::NewChain;
++resNumber;
}
}
......@@ -1410,8 +1454,6 @@ DSSP_impl::DSSP_impl(const cif::datablock &db, int model_nr, int min_poly_prolin
residue.mChainBreak = brk;
residue.mNumber = resNumber;
mResidues.emplace_back(std::move(residue));
brk = chain_break_type::None;
}
......@@ -1482,6 +1524,9 @@ DSSP_impl::DSSP_impl(const cif::datablock &db, int model_nr, int min_poly_prolin
void DSSP_impl::calculateSecondaryStructure()
{
if (cif::VERBOSE)
std::cerr << "calculating secondary structure" << std::endl;
using namespace cif::literals;
for (auto [asym1, seq1, asym2, seq2] : mDB["struct_conn"].find<std::string, int, std::string, int>("conn_type_id"_key == "disulf",
......@@ -1508,8 +1553,45 @@ void DSSP_impl::calculateSecondaryStructure()
mSSBonds.emplace_back(&*r1, &*r2);
}
CalculateHBondEnergies(mResidues);
CalculateBetaSheets(mResidues, mStats);
// Prefetch the c-alpha positions. No, really, that might be the trick
std::vector<point> cAlphas;
cAlphas.reserve(mResidues.size());
for (auto &r : mResidues)
cAlphas.emplace_back(r.mCAlpha);
std::unique_ptr<cif::progress_bar> progress;
if (cif::VERBOSE == 0 or cif::VERBOSE == 1)
progress.reset(new cif::progress_bar((mResidues.size() * (mResidues.size() - 1)) / 2, "calculate distances"));
// Calculate the HBond energies
std::vector<std::tuple<uint32_t,uint32_t>> near;
for (uint32_t i = 0; i + 1 < mResidues.size(); ++i)
{
auto cai = cAlphas[i];
for (uint32_t j = i + 1; j < mResidues.size(); ++j)
{
auto caj = cAlphas[j];
if (distance_sq(cai, caj) > (kMinimalCADistance * kMinimalCADistance))
continue;
near.emplace_back(i, j);
}
if (progress)
progress->consumed(mResidues.size() - i - 1);
}
if (cif::VERBOSE > 0)
std::cerr << "Considering " << near.size() << " pairs of residues" << std::endl;
progress.reset(nullptr);
CalculateHBondEnergies(mResidues, near);
CalculateBetaSheets(mResidues, mStats, near);
CalculateAlphaHelices(mResidues, mStats);
CalculatePPHelices(mResidues, mStats, m_min_poly_proline_stretch_length);
......
......@@ -74,6 +74,8 @@ int d_main(int argc, const char *argv[])
mcfp::make_option("write-other", "If set, write the type OTHER for loops, default is to leave this out"),
mcfp::make_option("no-dssp-categories", "If set, will suppress output of new DSSP output in mmCIF format"),
mcfp::make_option("calculate-accessibility", "Default is to not calculate the surface accessibility when the output format is mmCIF"),
mcfp::make_option<std::string>("mmcif-dictionary", "Path to the mmcif_pdbx.dic file to use instead of default"),
mcfp::make_option("help,h", "Display help message"),
......@@ -93,16 +95,10 @@ int d_main(int argc, const char *argv[])
exit(0);
}
if (config.has("help"))
if (config.has("help") or config.operands().empty())
{
std::cerr << config << std::endl;
exit(0);
}
if (config.operands().empty())
{
std::cerr << "Input file not specified" << std::endl;
exit(1);
exit(config.has("help") ? 0 : 1);
}
if (config.has("output-format") and config.get<std::string>("output-format") != "dssp" and config.get<std::string>("output-format") != "mmcif")
......@@ -130,8 +126,6 @@ int d_main(int argc, const char *argv[])
}
cif::file f = cif::pdb::read(in);
if (cif::VERBOSE > 0 and not f.is_valid())
std::cerr << "Warning, the input file is not valid. Run with --verbose to see why." << std::endl;
// --------------------------------------------------------------------
......@@ -164,7 +158,7 @@ int d_main(int argc, const char *argv[])
fmt = "cif";
}
dssp dssp(f.front(), 1, pp_stretch, true);
dssp dssp(f.front(), 1, pp_stretch, fmt == "dssp" or config.has("calculate-accessibility"));
if (not output.empty())
{
......
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <condition_variable>
#include <mutex>
#include <queue>
template <typename T, size_t N = 100>
class blocking_queue
{
public:
void push(T const &value)
{
std::unique_lock<std::mutex> lock(m_guard);
while (m_queue.size() >= N)
m_full_signal.wait(lock);
m_queue.push(value);
m_empty_signal.notify_one();
}
T pop()
{
std::unique_lock<std::mutex> lock(m_guard);
while (m_queue.empty())
m_empty_signal.wait(lock);
auto value = m_queue.front();
m_queue.pop();
m_full_signal.notify_one();
return value;
}
template<class Rep, class Period>
std::tuple<bool,T> pop(const std::chrono::duration<Rep, Period>& wait_for)
{
std::unique_lock<std::mutex> lock(m_guard);
while (m_queue.empty())
{
auto now = std::chrono::system_clock::now();
if (m_empty_signal.wait_until(lock, now + wait_for) == std::cv_status::timeout)
return { true , T{} };
}
auto value = m_queue.front();
m_queue.pop();
m_full_signal.notify_one();
return { false, value };
}
bool is_full() const
{
std::unique_lock<std::mutex> lock(m_guard);
return m_queue.size() >= N;
}
private:
std::queue<T> m_queue;
mutable std::mutex m_guard;
std::condition_variable m_empty_signal, m_full_signal;
};
template <typename T, size_t N = 10>
class non_blocking_queue
{
public:
bool push(T const &value)
{
bool result = false;
std::unique_lock<std::mutex> lock(m_guard);
if (m_queue.size() < N)
{
m_queue.push(value);
m_empty_signal.notify_one();
result = true;
}
return result;
}
T pop()
{
std::unique_lock<std::mutex> lock(m_guard);
while (m_queue.empty())
m_empty_signal.wait(lock);
auto value = m_queue.front();
m_queue.pop();
return value;
}
private:
std::queue<T> m_queue;
mutable std::mutex m_guard;
std::condition_variable m_empty_signal;
};
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment