Commit cf9ec46a by Maarten L. Hekkelman

Removed DSSP code, moved to dssp project

parent ecbef51b
......@@ -212,8 +212,6 @@ set(project_sources
# ${PROJECT_SOURCE_DIR}/src/pdb/PDB2Cif.cpp
# ${PROJECT_SOURCE_DIR}/src/pdb/PDB2CifRemark3.cpp
${PROJECT_SOURCE_DIR}/src/dssp/DSSP.cpp
${PROJECT_SOURCE_DIR}/src/structure/AtomType.cpp
# ${PROJECT_SOURCE_DIR}/src/structure/BondMap.cpp
${PROJECT_SOURCE_DIR}/src/structure/Compound.cpp
......@@ -243,7 +241,6 @@ set(project_headers
${PROJECT_SOURCE_DIR}/include/cif++/cif/condition.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/category.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/row.hpp
${PROJECT_SOURCE_DIR}/include/cif++/dssp/DSSP.hpp
${PROJECT_SOURCE_DIR}/include/cif++/structure/AtomType.hpp
${PROJECT_SOURCE_DIR}/include/cif++/structure/BondMap.hpp
${PROJECT_SOURCE_DIR}/include/cif++/structure/TlsParser.hpp
......@@ -428,7 +425,6 @@ if(CIFPP_BUILD_TESTS)
# structure
# sugar
# unit
dssp
unit-v2)
foreach(CIFPP_TEST IN LISTS CIFPP_tests)
......
Version 5.0.0
- Total rewrite of cif part
- Removed DSSP code, moved into dssp project itself
Version 4.2.1
- Improved REMARK 3 parser (for TLS in large molecules)
......
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/// \file DSSP.hpp
/// Calculate DSSP-like secondary structure information.
#pragma once
#include <cif++/cif.hpp>
namespace dssp
{
struct residue;
enum class structure_type : char
{
Loop = ' ',
Alphahelix = 'H',
Betabridge = 'B',
Strand = 'E',
Helix_3 = 'G',
Helix_5 = 'I',
Helix_PPII = 'P',
Turn = 'T',
Bend = 'S'
};
enum class helix_type
{
_3_10,
alpha,
pi,
pp
};
enum class helix_position_type
{
None,
Start,
End,
StartAndEnd,
Middle
};
const size_t
kHistogramSize = 30;
struct statistics
{
struct
{
uint32_t residues, chains, SS_bridges, intra_chain_SS_bridges, H_bonds;
uint32_t H_bonds_in_antiparallel_bridges, H_bonds_in_parallel_bridges;
uint32_t H_Bonds_per_distance[11];
} count;
double accessible_surface;
struct
{
uint32_t residues_per_alpha_helix[kHistogramSize];
uint32_t parallel_bridges_per_ladder[kHistogramSize];
uint32_t antiparallel_bridges_per_ladder[kHistogramSize];
uint32_t ladders_per_sheet[kHistogramSize];
} histogram;
};
enum class chain_break_type
{
None,
NewChain,
Gap
};
class DSSP
{
public:
DSSP(const cif::datablock &db, int model_nr, int min_poly_proline_stretch_length, bool calculateSurfaceAccessibility);
~DSSP();
DSSP(const DSSP &) = delete;
DSSP &operator=(const DSSP &) = delete;
statistics get_statistics() const;
class iterator;
using res_iterator = typename std::vector<residue>::iterator;
class residue_info
{
public:
friend class iterator;
residue_info() = default;
residue_info(const residue_info &rhs) = default;
residue_info &operator=(const residue_info &rhs) = default;
explicit operator bool() const { return not empty(); }
bool empty() const { return m_impl == nullptr; }
std::string asym_id() const;
int seq_id() const;
std::string alt_id() const;
std::string compound_id() const;
std::string auth_asym_id() const;
int auth_seq_id() const;
std::string pdb_strand_id() const;
int pdb_seq_num() const;
std::string pdb_ins_code() const;
float alpha() const;
float kappa() const;
float phi() const;
float psi() const;
float tco() const;
std::tuple<float, float, float> ca_location() const;
chain_break_type chain_break() const;
/// \brief the internal number in DSSP
int nr() const;
structure_type type() const;
int ssBridgeNr() const;
helix_position_type helix(helix_type helixType) const;
bool is_alpha_helix_end_before_start() const;
bool bend() const;
double accessibility() const;
/// \brief returns resinfo, ladder and parallel
std::tuple<residue_info, int, bool> bridge_partner(int i) const;
int sheet() const;
/// \brief return resinfo and the energy of the bond
std::tuple<residue_info, double> acceptor(int i) const;
std::tuple<residue_info, double> donor(int i) const;
/// \brief Simple compare equals
bool operator==(const residue_info &rhs) const
{
return m_impl == rhs.m_impl;
}
/// \brief Returns \result true if there is a bond between two residues
friend bool test_bond(residue_info const &a, residue_info const &b);
private:
residue_info(residue *res)
: m_impl(res)
{
}
residue *m_impl = nullptr;
};
class iterator
{
public:
using iterator_category = std::bidirectional_iterator_tag;
using value_type = residue_info;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
using reference = value_type &;
iterator(const iterator &i) = default;
iterator(residue *res);
iterator &operator=(const iterator &i) = default;
reference operator*() { return m_current; }
pointer operator->() { return &m_current; }
iterator &operator++();
iterator operator++(int)
{
auto tmp(*this);
this->operator++();
return tmp;
}
iterator &operator--();
iterator operator--(int)
{
auto tmp(*this);
this->operator--();
return tmp;
}
bool operator==(const iterator &rhs) const { return m_current.m_impl == rhs.m_current.m_impl; }
bool operator!=(const iterator &rhs) const { return m_current.m_impl != rhs.m_current.m_impl; }
private:
residue_info m_current;
};
using value_type = residue_info;
// To access residue info by key, i.e. LabelAsymID and LabelSeqID
using key_type = std::tuple<std::string,int>;
iterator begin() const;
iterator end() const;
residue_info operator[](const key_type &key) const;
bool empty() const { return begin() == end(); }
// convenience method, when creating old style DSSP files
enum class pdb_record_type { HEADER, COMPND, SOURCE, AUTHOR };
std::string get_pdb_header_line(pdb_record_type pdb_record) const;
private:
struct DSSP_impl *m_impl;
};
} // namespace dssp
1 A _
2 A _
3 A _
4 A _
5 A E
6 A E
7 A E
8 A E
9 A E
10 A E
11 A E
12 A E
13 A E
14 A S
15 A H
16 A H
17 A H
18 A H
19 A H
20 A H
21 A H
22 A T
23 A T
24 A _
25 A _
26 A H
27 A H
28 A H
29 A H
30 A H
31 A H
32 A H
33 A H
34 A H
35 A H
36 A H
37 A T
38 A S
39 A _
40 A E
41 A E
42 A E
43 A E
44 A E
45 A E
46 A E
47 A T
48 A T
49 A E
50 A E
51 A E
52 A E
53 A E
54 A E
55 A E
56 A _
57 A S
58 A S
59 A _
60 A E
61 A E
62 A E
63 A E
64 A E
65 A E
66 A E
67 A T
68 A T
69 A S
70 A _
71 A E
72 A E
73 A E
74 A E
75 A _
76 A T
77 A T
78 A S
79 A _
80 A E
81 A E
82 A E
83 A E
84 A E
85 A E
86 A E
87 A E
88 A E
89 A E
90 A T
91 A T
92 A E
93 A E
94 A E
95 A E
96 A E
97 A E
98 A E
99 A E
100 A S
101 A S
102 A S
103 A _
104 A _
105 A _
106 A _
107 A E
108 A E
109 A E
110 A E
111 A E
112 A E
113 A E
114 A _
115 A T
116 A T
117 A S
118 A _
119 A E
120 A E
121 A E
122 A E
123 A E
124 A E
125 A E
126 A T
127 A T
128 A E
129 A E
130 A E
131 A E
132 A E
133 A E
134 A E
135 A E
136 A E
137 A _
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define BOOST_TEST_ALTERNATIVE_INIT_API
#include <boost/test/included/unit_test.hpp>
#include <charconv>
#include <stdexcept>
#include <cif++/cif.hpp>
#include <cif++/dssp/DSSP.hpp>
namespace tt = boost::test_tools;
std::filesystem::path gTestDir = std::filesystem::current_path(); // filled in first test
// --------------------------------------------------------------------
cif::file operator""_cf(const char *text, size_t length)
{
struct membuf : public std::streambuf
{
membuf(char *text, size_t length)
{
this->setg(text, text, text + length);
}
} buffer(const_cast<char *>(text), length);
std::istream is(&buffer);
return cif::file(is);
}
// --------------------------------------------------------------------
bool init_unit_test()
{
cif::VERBOSE = 1;
// not a test, just initialize test dir
if (boost::unit_test::framework::master_test_suite().argc == 2)
gTestDir = boost::unit_test::framework::master_test_suite().argv[1];
// do this now, avoids the need for installing
cif::add_file_resource("mmcif_pdbx.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic");
// initialize CCD location
cif::add_file_resource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");
return true;
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(dssp_1)
{
cif::file f(gTestDir / "1cbs.cif");
BOOST_ASSERT(f.is_valid());
std::ifstream t(gTestDir / "1cbs-dssp-test.tsv");
dssp::DSSP dssp(f.front(), 1, 3, true);
for (auto residue : dssp)
{
std::string line;
getline(t, line);
std::cout << line << std::endl;
auto f = cif::split(line, "\t");
BOOST_CHECK_EQUAL(f.size(), 3);
if (f.size() != 3)
continue;
int seqID;
std::from_chars(f[0].begin(), f[0].end(), seqID);
std::string asymID{ f[1] };
std::string secstr{ f[2] };
if (secstr == "_")
secstr = " ";
BOOST_CHECK_EQUAL(residue.asym_id(), asymID);
BOOST_CHECK_EQUAL(residue.seq_id(), seqID);
BOOST_CHECK_EQUAL((char)residue.type(), secstr.front());
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment