Commit 26a5410b by Maarten L. Hekkelman

documenting model

parent f44e6d09
...@@ -30,13 +30,31 @@ ...@@ -30,13 +30,31 @@
#include "cif++/datablock.hpp" #include "cif++/datablock.hpp"
#include "cif++/point.hpp" #include "cif++/point.hpp"
#include <numeric>
#include <memory> #include <memory>
#include <numeric>
#if __cpp_lib_format #if __cpp_lib_format
#include <format> #include <format>
#endif #endif
/** @file model.hpp
*
* This file contains code to work with models of molecules.
*
* The classes available encapsulate the real world concepts of
* atoms, residues, monomers, polymers and everything is then
* bound together in a structure.
*
* This code is not finished yet, ideally it would be a high
* level interface to manipulate macro molecular structures
* and an attempt has been made to start work on this. But
* there's still a lot that needs to be implemented.
*
* However, the code that is here is still useful in
* manipulating the underlying mmCIF data model.
*
*/
namespace cif::mm namespace cif::mm
{ {
...@@ -48,9 +66,24 @@ class structure; ...@@ -48,9 +66,24 @@ class structure;
// -------------------------------------------------------------------- // --------------------------------------------------------------------
/**
* @brief The class atom encapsulates the data in _atom_site and
* _atom_site_anisotrop
*
* The class atom is a kind of flyweight class. It can be copied
* with low overhead. All data is stored in the underlying mmCIF
* categories but some very often used fields are cached in the
* impl.
*
* It is also possible to have symmetry copies of atoms. They
* share the same data in the cif::category but their location
* differs by using a symmetry operator.
*/
class atom class atom
{ {
private: private:
/** @cond */
struct atom_impl : public std::enable_shared_from_this<atom_impl> struct atom_impl : public std::enable_shared_from_this<atom_impl>
{ {
atom_impl(const datablock &db, std::string_view id) atom_impl(const datablock &db, std::string_view id)
...@@ -93,24 +126,24 @@ class atom ...@@ -93,24 +126,24 @@ class atom
row_handle row() row_handle row()
{ {
return m_cat[{{"id", m_id}}]; return m_cat[{ { "id", m_id } }];
} }
const row_handle row() const const row_handle row() const
{ {
return m_cat[{{"id", m_id}}]; return m_cat[{ { "id", m_id } }];
} }
row_handle row_aniso() row_handle row_aniso()
{ {
auto cat = m_db.get("atom_site_anisotrop"); auto cat = m_db.get("atom_site_anisotrop");
return cat ? cat->operator[]({ {"id", m_id} }) : row_handle{}; return cat ? cat->operator[]({ { "id", m_id } }) : row_handle{};
} }
const row_handle row_aniso() const const row_handle row_aniso() const
{ {
auto cat = m_db.get("atom_site_anisotrop"); auto cat = m_db.get("atom_site_anisotrop");
return cat ? cat->operator[]({ {"id", m_id} }) : row_handle{}; return cat ? cat->operator[]({ { "id", m_id } }) : row_handle{};
} }
const datablock &m_db; const datablock &m_db;
...@@ -119,46 +152,62 @@ class atom ...@@ -119,46 +152,62 @@ class atom
point m_location; point m_location;
std::string m_symop = "1_555"; std::string m_symop = "1_555";
}; };
/** @endcond */
public: public:
/**
* @brief Construct a new, empty atom object
*/
atom() {} atom() {}
/**
* @brief Construct a new atom object using @a impl as impl
*
* @param impl The implementation objectt
*/
atom(std::shared_ptr<atom_impl> impl) atom(std::shared_ptr<atom_impl> impl)
: m_impl(impl) : m_impl(impl)
{ {
} }
/**
* @brief Copy construct a new atom object
*/
atom(const atom &rhs) atom(const atom &rhs)
: m_impl(rhs.m_impl) : m_impl(rhs.m_impl)
{ {
} }
/**
* @brief Construct a new atom object based on a cif::row
*
* @param db The datablock where the _atom_site category resides
* @param row The row containing the data for this atom
*/
atom(const datablock &db, const row_handle &row) atom(const datablock &db, const row_handle &row)
: atom(std::make_shared<atom_impl>(db, row["id"].as<std::string>())) : atom(std::make_shared<atom_impl>(db, row["id"].as<std::string>()))
{ {
} }
// a special constructor to create symmetry copies /**
* @brief A special constructor to create symmetry copies
*
* @param rhs The original atom to copy
* @param symmmetry_location The symmetry location
* @param symmetry_operation The symmetry operator used
*/
atom(const atom &rhs, const point &symmmetry_location, const std::string &symmetry_operation) atom(const atom &rhs, const point &symmmetry_location, const std::string &symmetry_operation)
: atom(std::make_shared<atom_impl>(*rhs.m_impl, symmmetry_location, symmetry_operation)) : atom(std::make_shared<atom_impl>(*rhs.m_impl, symmmetry_location, symmetry_operation))
{ {
} }
/// \brief To quickly test if the atom has data
explicit operator bool() const { return (bool)m_impl; } explicit operator bool() const { return (bool)m_impl; }
// // return a copy of this atom, with data copied instead of referenced /// \brief Copy assignement operator
// atom clone() const
// {
// auto copy = std::make_shared<atom_impl>(*m_impl);
// copy->mClone = true;
// return atom(copy);
// }
atom &operator=(const atom &rhs) = default; atom &operator=(const atom &rhs) = default;
// template <typename T> /// \brief Return the field named @a name in the _atom_site category for this atom
// T get_property(const std::string_view name) const;
std::string get_property(std::string_view name) const std::string get_property(std::string_view name) const
{ {
if (not m_impl) if (not m_impl)
...@@ -166,6 +215,7 @@ class atom ...@@ -166,6 +215,7 @@ class atom
return m_impl->get_property(name); return m_impl->get_property(name);
} }
/// \brief Return the field named @a name in the _atom_site category for this atom cast to an int
int get_property_int(std::string_view name) const int get_property_int(std::string_view name) const
{ {
if (not m_impl) if (not m_impl)
...@@ -173,6 +223,7 @@ class atom ...@@ -173,6 +223,7 @@ class atom
return m_impl->get_property_int(name); return m_impl->get_property_int(name);
} }
/// \brief Return the field named @a name in the _atom_site category for this atom cast to a float
float get_property_float(std::string_view name) const float get_property_float(std::string_view name) const
{ {
if (not m_impl) if (not m_impl)
...@@ -180,6 +231,7 @@ class atom ...@@ -180,6 +231,7 @@ class atom
return m_impl->get_property_float(name); return m_impl->get_property_float(name);
} }
/// \brief Set value for the field named @a name in the _atom_site category to @a value
void set_property(const std::string_view name, const std::string &value) void set_property(const std::string_view name, const std::string &value)
{ {
if (not m_impl) if (not m_impl)
...@@ -187,17 +239,27 @@ class atom ...@@ -187,17 +239,27 @@ class atom
m_impl->set_property(name, value); m_impl->set_property(name, value);
} }
/// \brief Set value for the field named @a name in the _atom_site category to @a value
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0> template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
void set_property(const std::string_view name, const T &value) void set_property(const std::string_view name, const T &value)
{ {
set_property(name, std::to_string(value)); set_property(name, std::to_string(value));
} }
/** Return the ID of the _atom_site record.
*
* @note Although I've never seen anything other than integers,
* the standard says this should be a string and so we use that.
*/
const std::string &id() const { return impl().m_id; } const std::string &id() const { return impl().m_id; }
/// \brief Return the type of the atom
cif::atom_type get_type() const { return atom_type_traits(get_property("type_symbol")).type(); } cif::atom_type get_type() const { return atom_type_traits(get_property("type_symbol")).type(); }
/// \brief Return the cached location of this atom
point get_location() const { return impl().m_location; } point get_location() const { return impl().m_location; }
/// \brief Set the location of this atom, will set both the cached data as well as the data in the underlying _atom_site category
void set_location(point p) void set_location(point p)
{ {
if (not m_impl) if (not m_impl)
...@@ -246,53 +308,57 @@ class atom ...@@ -246,53 +308,57 @@ class atom
set_location(loc); set_location(loc);
} }
// for direct access to underlying data, be careful! /// for direct access to underlying data, be careful!
const row_handle get_row() const { return impl().row(); } const row_handle get_row() const { return impl().row(); }
/// for direct access to underlying data, be careful!
const row_handle get_row_aniso() const { return impl().row_aniso(); } const row_handle get_row_aniso() const { return impl().row_aniso(); }
/// Return if the atom is actually a symmetry copy or the original one
bool is_symmetry_copy() const { return impl().m_symop != "1_555"; } bool is_symmetry_copy() const { return impl().m_symop != "1_555"; }
std::string symmetry() const { return impl().m_symop; }
// const compound &compound() const; /// Return the symmetry operator used
std::string symmetry() const { return impl().m_symop; }
/// Return true if this atom is part of a water molecule
bool is_water() const bool is_water() const
{ {
auto comp_id = get_label_comp_id(); auto comp_id = get_label_comp_id();
return comp_id == "HOH" or comp_id == "H2O" or comp_id == "WAT"; return comp_id == "HOH" or comp_id == "H2O" or comp_id == "WAT";
} }
/// Return the charge
int get_charge() const { return impl().get_charge(); } int get_charge() const { return impl().get_charge(); }
// float uIso() const; /// Return the occupancy
// bool getAnisoU(float anisou[6]) const { return impl().getAnisoU(anisou); }
float get_occupancy() const { return get_property_float("occupancy"); } float get_occupancy() const { return get_property_float("occupancy"); }
// specifications // specifications
std::string get_label_asym_id() const { return get_property("label_asym_id"); } std::string get_label_asym_id() const { return get_property("label_asym_id"); } ///< Return the label_asym_id property
int get_label_seq_id() const { return get_property_int("label_seq_id"); } int get_label_seq_id() const { return get_property_int("label_seq_id"); } ///< Return the label_seq_id property
std::string get_label_atom_id() const { return get_property("label_atom_id"); } std::string get_label_atom_id() const { return get_property("label_atom_id"); } ///< Return the label_atom_id property
std::string get_label_alt_id() const { return get_property("label_alt_id"); } std::string get_label_alt_id() const { return get_property("label_alt_id"); } ///< Return the label_alt_id property
std::string get_label_comp_id() const { return get_property("label_comp_id"); } std::string get_label_comp_id() const { return get_property("label_comp_id"); } ///< Return the label_comp_id property
std::string get_label_entity_id() const { return get_property("label_entity_id"); } std::string get_label_entity_id() const { return get_property("label_entity_id"); } ///< Return the label_entity_id property
std::string get_auth_asym_id() const { return get_property("auth_asym_id"); } std::string get_auth_asym_id() const { return get_property("auth_asym_id"); } ///< Return the auth_asym_id property
std::string get_auth_seq_id() const { return get_property("auth_seq_id"); } std::string get_auth_seq_id() const { return get_property("auth_seq_id"); } ///< Return the auth_seq_id property
std::string get_auth_atom_id() const { return get_property("auth_atom_id"); } std::string get_auth_atom_id() const { return get_property("auth_atom_id"); } ///< Return the auth_atom_id property
std::string get_auth_alt_id() const { return get_property("auth_alt_id"); } std::string get_auth_alt_id() const { return get_property("auth_alt_id"); } ///< Return the auth_alt_id property
std::string get_auth_comp_id() const { return get_property("auth_comp_id"); } std::string get_auth_comp_id() const { return get_property("auth_comp_id"); } ///< Return the auth_comp_id property
std::string get_pdb_ins_code() const { return get_property("pdbx_PDB_ins_code"); } std::string get_pdb_ins_code() const { return get_property("pdbx_PDB_ins_code"); } ///< Return the pdb_ins_code property
/// Return true if this atom is an alternate
bool is_alternate() const { return not get_label_alt_id().empty(); } bool is_alternate() const { return not get_label_alt_id().empty(); }
// std::string labelID() const; // label_comp_id + '_' + label_asym_id + '_' + label_seq_id /// Convenience method to return a string that might be ID in PDB space
std::string pdb_id() const std::string pdb_id() const
{ {
return get_label_comp_id() + '_' + get_auth_asym_id() + '_' + get_auth_seq_id() + get_pdb_ins_code(); return get_label_comp_id() + '_' + get_auth_asym_id() + '_' + get_auth_seq_id() + get_pdb_ins_code();
} }
/// Compare two atoms
bool operator==(const atom &rhs) const bool operator==(const atom &rhs) const
{ {
if (m_impl == rhs.m_impl) if (m_impl == rhs.m_impl)
...@@ -304,32 +370,35 @@ class atom ...@@ -304,32 +370,35 @@ class atom
return &m_impl->m_db == &rhs.m_impl->m_db and m_impl->m_id == rhs.m_impl->m_id; return &m_impl->m_db == &rhs.m_impl->m_db and m_impl->m_id == rhs.m_impl->m_id;
} }
/// Compare two atoms
bool operator!=(const atom &rhs) const bool operator!=(const atom &rhs) const
{ {
return not operator==(rhs); return not operator==(rhs);
} }
// // access data in compound for this atom /// Is this atom a backbone atom
// convenience routine
bool is_back_bone() const bool is_back_bone() const
{ {
auto atomID = get_label_atom_id(); auto atomID = get_label_atom_id();
return atomID == "N" or atomID == "O" or atomID == "C" or atomID == "CA"; return atomID == "N" or atomID == "O" or atomID == "C" or atomID == "CA";
} }
/// swap
void swap(atom &b) void swap(atom &b)
{ {
std::swap(m_impl, b.m_impl); std::swap(m_impl, b.m_impl);
} }
/// Compare this atom with @a b
int compare(const atom &b) const { return impl().compare(*b.m_impl); } int compare(const atom &b) const { return impl().compare(*b.m_impl); }
/// Should this atom sort before @a rhs
bool operator<(const atom &rhs) const bool operator<(const atom &rhs) const
{ {
return compare(rhs) < 0; return compare(rhs) < 0;
} }
/// Write the atom to std::ostream @a os
friend std::ostream &operator<<(std::ostream &os, const atom &atom); friend std::ostream &operator<<(std::ostream &os, const atom &atom);
private: private:
...@@ -345,16 +414,23 @@ class atom ...@@ -345,16 +414,23 @@ class atom
std::shared_ptr<atom_impl> m_impl; std::shared_ptr<atom_impl> m_impl;
}; };
/** swap */
inline void swap(atom &a, atom &b) inline void swap(atom &a, atom &b)
{ {
a.swap(b); a.swap(b);
} }
/** Calculate the distance between atoms @a and @a b in ångström */
inline float distance(const atom &a, const atom &b) inline float distance(const atom &a, const atom &b)
{ {
return distance(a.get_location(), b.get_location()); return distance(a.get_location(), b.get_location());
} }
/** Calculate the square of the distance between atoms @a and @a b in ångström
*
* @note Use this whenever possible instead of simply using distance since
* this function does not have to calculate a square root which is expensive.
*/
inline float distance_squared(const atom &a, const atom &b) inline float distance_squared(const atom &a, const atom &b)
{ {
return distance_squared(a.get_location(), b.get_location()); return distance_squared(a.get_location(), b.get_location());
...@@ -362,23 +438,36 @@ inline float distance_squared(const atom &a, const atom &b) ...@@ -362,23 +438,36 @@ inline float distance_squared(const atom &a, const atom &b)
// -------------------------------------------------------------------- // --------------------------------------------------------------------
/**
* @brief The entity types that can be found in a mmCIF file
*
*/
enum class EntityType enum class EntityType
{ {
polymer, Polymer, ///< entity is a polymer
NonPolymer, NonPolymer, ///< entity is not a polymer
Macrolide, Macrolide, ///< entity is a macrolide
Water, Water, ///< water in the solvent model
Branched Branched ///< entity is branched
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
/**
* @brief The class residue is a collection of atoms forming a molecule
*
* This class is used to store ligand e.g. Derived classes are monomer
* and sugar.
*/
class residue class residue
{ {
public: public:
friend class structure; friend class structure;
// constructor /**
* @brief Construct a new residue object based on key items
*/
residue(structure &structure, const std::string &compoundID, residue(structure &structure, const std::string &compoundID,
const std::string &asymID, int seqID, const std::string &asymID, int seqID,
const std::string &authAsymID, const std::string &authSeqID, const std::string &authAsymID, const std::string &authSeqID,
...@@ -393,8 +482,10 @@ class residue ...@@ -393,8 +482,10 @@ class residue
{ {
} }
/** Construct a new residue in structure with the atoms in @a atoms */
residue(structure &structure, const std::vector<atom> &atoms); residue(structure &structure, const std::vector<atom> &atoms);
/** @cond */
residue(const residue &rhs) = delete; residue(const residue &rhs) = delete;
residue &operator=(const residue &rhs) = delete; residue &operator=(const residue &rhs) = delete;
...@@ -402,50 +493,61 @@ class residue ...@@ -402,50 +493,61 @@ class residue
residue &operator=(residue &&rhs) = default; residue &operator=(residue &&rhs) = default;
virtual ~residue() = default; virtual ~residue() = default;
/** @endcond */
/** Return the entity_id of this residue */
std::string get_entity_id() const; std::string get_entity_id() const;
/** Return the entity type of this residue */
EntityType entity_type() const; EntityType entity_type() const;
const std::string &get_asym_id() const { return m_asym_id; } const std::string &get_asym_id() const { return m_asym_id; } ///< Return the asym_id
int get_seq_id() const { return m_seq_id; } int get_seq_id() const { return m_seq_id; } ///< Return the seq_id
const std::string get_auth_asym_id() const { return m_auth_asym_id; } const std::string get_auth_asym_id() const { return m_auth_asym_id; } ///< Return the auth_asym_id
const std::string get_auth_seq_id() const { return m_auth_seq_id; } const std::string get_auth_seq_id() const { return m_auth_seq_id; } ///< Return the auth_seq_id
std::string get_pdb_ins_code() const { return m_pdb_ins_code; } std::string get_pdb_ins_code() const { return m_pdb_ins_code; } ///< Return the pdb_ins_code
const std::string &get_compound_id() const { return m_compound_id; } const std::string &get_compound_id() const { return m_compound_id; } ///< Return the compound_id
void set_compound_id(const std::string &id) { m_compound_id = id; } void set_compound_id(const std::string &id) { m_compound_id = id; } ///< Set the compound_id to @a id
/** Return the structure this residue belongs to */
structure *get_structure() const { return m_structure; } structure *get_structure() const { return m_structure; }
// const compound &compound() const; /** Return a list of the atoms for this residue */
std::vector<atom> &atoms() std::vector<atom> &atoms()
{ {
return m_atoms; return m_atoms;
} }
/** Return a const list of the atoms for this residue */
const std::vector<atom> &atoms() const const std::vector<atom> &atoms() const
{ {
return m_atoms; return m_atoms;
} }
/** Add atom @a atom to the atoms in this residue */
void add_atom(atom &atom); void add_atom(atom &atom);
/// \brief Unique atoms returns only the atoms without alternates and the first of each alternate atom id. /// \brief Unique atoms returns only the atoms without alternates and the first of each alternate atom id.
std::vector<atom> unique_atoms() const; std::vector<atom> unique_atoms() const;
/// \brief The alt ID used for the unique atoms /// \brief Return the atom with atom_id @a atomID
std::string unique_alt_id() const;
atom get_atom_by_atom_id(const std::string &atomID) const; atom get_atom_by_atom_id(const std::string &atomID) const;
// Is this residue a single entity? /// \brief Return the list of atoms having ID \a atomID
///
/// This includes all alternate atoms with this ID
/// whereas get_atom_by_atom_id only returns the first unique atom
std::vector<atom> get_atoms_by_id(const std::string &atomID) const;
/// \brief Is this residue a single entity?
bool is_entity() const; bool is_entity() const;
/// \brief Is this residue a water molecule?
bool is_water() const { return m_compound_id == "HOH"; } bool is_water() const { return m_compound_id == "HOH"; }
// bool empty() const { return m_structure == nullptr; }
/// \brief Return true if this residue has alternate atoms
bool has_alternate_atoms() const; bool has_alternate_atoms() const;
/// \brief Return the list of unique alt ID's present in this residue /// \brief Return the list of unique alt ID's present in this residue
...@@ -454,14 +556,13 @@ class residue ...@@ -454,14 +556,13 @@ class residue
/// \brief Return the list of unique atom ID's /// \brief Return the list of unique atom ID's
std::set<std::string> get_atom_ids() const; std::set<std::string> get_atom_ids() const;
/// \brief Return the list of atoms having ID \a atomID /// \brief Return a tuple containing the center location and the radius for the atoms of this residue
std::vector<atom> get_atoms_by_id(const std::string &atomID) const;
// some routines for 3d work
std::tuple<point, float> center_and_radius() const; std::tuple<point, float> center_and_radius() const;
/// \brief Write the residue @a res to the std::ostream @a os
friend std::ostream &operator<<(std::ostream &os, const residue &res); friend std::ostream &operator<<(std::ostream &os, const residue &res);
/// \brief Return true if this residue is equal to @a rhs
bool operator==(const residue &rhs) const bool operator==(const residue &rhs) const
{ {
return this == &rhs or (m_structure == rhs.m_structure and return this == &rhs or (m_structure == rhs.m_structure and
...@@ -472,6 +573,7 @@ class residue ...@@ -472,6 +573,7 @@ class residue
} }
protected: protected:
/** @cond */
residue() {} residue() {}
structure *m_structure = nullptr; structure *m_structure = nullptr;
...@@ -479,45 +581,53 @@ class residue ...@@ -479,45 +581,53 @@ class residue
int m_seq_id = 0; int m_seq_id = 0;
std::string m_auth_asym_id, m_auth_seq_id, m_pdb_ins_code; std::string m_auth_asym_id, m_auth_seq_id, m_pdb_ins_code;
std::vector<atom> m_atoms; std::vector<atom> m_atoms;
/** @endcond */
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// a monomer models a single residue in a protein chain
/**
* @brief a monomer models a single residue in a protein chain
*
*/
class monomer : public residue class monomer : public residue
{ {
public: public:
// monomer();
monomer(const monomer &rhs) = delete; monomer(const monomer &rhs) = delete;
monomer &operator=(const monomer &rhs) = delete; monomer &operator=(const monomer &rhs) = delete;
/// \brief Move constructor
monomer(monomer &&rhs); monomer(monomer &&rhs);
/// \brief Move assignment operator
monomer &operator=(monomer &&rhs); monomer &operator=(monomer &&rhs);
/// \brief constructor with actual values
monomer(const polymer &polymer, size_t index, int seqID, const std::string &authSeqID, monomer(const polymer &polymer, size_t index, int seqID, const std::string &authSeqID,
const std::string &pdbInsCode, const std::string &compoundID); const std::string &pdbInsCode, const std::string &compoundID);
bool is_first_in_chain() const; bool is_first_in_chain() const; ///< Return if this residue is the first residue in the chain
bool is_last_in_chain() const; bool is_last_in_chain() const; ///< Return if this residue is the last residue in the chain
// convenience // convenience
bool has_alpha() const; bool has_alpha() const; ///< Return if a alpha value can be calculated (depends on location in chain)
bool has_kappa() const; bool has_kappa() const; ///< Return if a kappa value can be calculated (depends on location in chain)
// Assuming this is really an amino acid... // Assuming this is really an amino acid...
float phi() const; float phi() const; ///< Return the phi value for this residue
float psi() const; float psi() const; ///< Return the psi value for this residue
float alpha() const; float alpha() const; ///< Return the alpha value for this residue
float kappa() const; float kappa() const; ///< Return the kappa value for this residue
float tco() const; float tco() const; ///< Return the tco value for this residue
float omega() const; float omega() const; ///< Return the omega value for this residue
// torsion angles // torsion angles
size_t nr_of_chis() const; size_t nr_of_chis() const; ///< Return how many torsion angles can be calculated
float chi(size_t i) const; float chi(size_t i) const; ///< Return torsion angle @a i
bool is_cis() const; bool is_cis() const; ///< Return true if this residue is in a cis conformation
/// \brief Returns true if the four atoms C, CA, N and O are present /// \brief Returns true if the four atoms C, CA, N and O are present
bool is_complete() const; bool is_complete() const;
...@@ -525,24 +635,38 @@ class monomer : public residue ...@@ -525,24 +635,38 @@ class monomer : public residue
/// \brief Returns true if any of the backbone atoms has an alternate /// \brief Returns true if any of the backbone atoms has an alternate
bool has_alternate_backbone_atoms() const; bool has_alternate_backbone_atoms() const;
atom CAlpha() const { return get_atom_by_atom_id("CA"); } atom CAlpha() const { return get_atom_by_atom_id("CA"); } ///< Return the CAlpha atom
atom C() const { return get_atom_by_atom_id("C"); } atom C() const { return get_atom_by_atom_id("C"); } ///< Return the C atom
atom N() const { return get_atom_by_atom_id("N"); } atom N() const { return get_atom_by_atom_id("N"); } ///< Return the N atom
atom O() const { return get_atom_by_atom_id("O"); } atom O() const { return get_atom_by_atom_id("O"); } ///< Return the O atom
atom H() const { return get_atom_by_atom_id("H"); } atom H() const { return get_atom_by_atom_id("H"); } ///< Return the H atom
/// \brief Return true if this monomer is bonded to monomer @a rhs
bool is_bonded_to(const monomer &rhs) const bool is_bonded_to(const monomer &rhs) const
{ {
return this != &rhs and are_bonded(*this, rhs); return this != &rhs and are_bonded(*this, rhs);
} }
/**
* @brief Return true if the distance between the CA atoms of the
* two monomers @a a and @a b are within the expected range with
* an error margin of @a errorMargin.
*
* The expected distance is 3.0 ångström for a cis conformation
* and 3.8 ångström for trans.
*/
static bool are_bonded(const monomer &a, const monomer &b, float errorMargin = 0.5f); static bool are_bonded(const monomer &a, const monomer &b, float errorMargin = 0.5f);
/// \brief Return true if the bond between @a a and @a b is cis
static bool is_cis(const monomer &a, const monomer &b); static bool is_cis(const monomer &a, const monomer &b);
/// \brief Return the omega angle between @a a and @a b
static float omega(const monomer &a, const monomer &b); static float omega(const monomer &a, const monomer &b);
// for LEU and VAL /// \brief Return the chiral volume, only for LEU and VAL
float chiral_volume() const; float chiral_volume() const;
/// \brief Compare this monomer with \a rhs
bool operator==(const monomer &rhs) const bool operator==(const monomer &rhs) const
{ {
return m_polymer == rhs.m_polymer and m_index == rhs.m_index; return m_polymer == rhs.m_polymer and m_index == rhs.m_index;
...@@ -555,24 +679,24 @@ class monomer : public residue ...@@ -555,24 +679,24 @@ class monomer : public residue
// -------------------------------------------------------------------- // --------------------------------------------------------------------
/**
* @brief A polymer is simply a list of monomers
*
*/
class polymer : public std::vector<monomer> class polymer : public std::vector<monomer>
{ {
public: public:
/// \brief Constructor
polymer(structure &s, const std::string &entityID, const std::string &asymID, const std::string &auth_asym_id); polymer(structure &s, const std::string &entityID, const std::string &asymID, const std::string &auth_asym_id);
polymer(const polymer &) = delete; polymer(const polymer &) = delete;
polymer &operator=(const polymer &) = delete; polymer &operator=(const polymer &) = delete;
// monomer &getBySeqID(int seqID); structure *get_structure() const { return m_structure; } ///< Return the structure
// const monomer &getBySeqID(int seqID) const;
structure *get_structure() const { return m_structure; }
std::string get_asym_id() const { return m_asym_id; } std::string get_asym_id() const { return m_asym_id; } ///< Return the asym_id
std::string get_auth_asym_id() const { return m_auth_asym_id; } // The PDB chain ID, actually std::string get_auth_asym_id() const { return m_auth_asym_id; } ///< Return the PDB chain ID, actually
std::string get_entity_id() const { return m_entity_id; } std::string get_entity_id() const { return m_entity_id; } ///< Return the entity_id
// int Distance(const monomer &a, const monomer &b) const;
private: private:
structure *m_structure; structure *m_structure;
...@@ -586,28 +710,52 @@ class polymer : public std::vector<monomer> ...@@ -586,28 +710,52 @@ class polymer : public std::vector<monomer>
class branch; class branch;
/**
* @brief A sugar is a residue that is part of a glycosylation site
*
*/
class sugar : public residue class sugar : public residue
{ {
public: public:
/// \brief constructor
sugar(branch &branch, const std::string &compoundID, sugar(branch &branch, const std::string &compoundID,
const std::string &asymID, int authSeqID); const std::string &asymID, int authSeqID);
/** @cond */
sugar(sugar &&rhs); sugar(sugar &&rhs);
sugar &operator=(sugar &&rhs); sugar &operator=(sugar &&rhs);
/** @endcond */
int num() const {
/**
* @brief Return the sugar number in the glycosylation tree
*
* To store the sugar number, the auth_seq_id field has been overloaded
* in the specification. But since a sugar number should be, ehm, a number
* and auth_seq_id is specified to contain a string, we do a check here
* to see if it really is a number.
*
* @return The sugar number
*/
int num() const
{
int result; int result;
auto r = std::from_chars(m_auth_seq_id.data(), m_auth_seq_id.data() + m_auth_seq_id.length(), result); auto r = std::from_chars(m_auth_seq_id.data(), m_auth_seq_id.data() + m_auth_seq_id.length(), result);
if (r.ec != std::errc()) if (r.ec != std::errc())
throw std::runtime_error("The auth_seq_id should be a number for a sugar"); throw std::runtime_error("The auth_seq_id should be a number for a sugar");
return result; return result;
} }
/// \brief Return the name of this sugar
std::string name() const; std::string name() const;
/// \brief Return the atom the C1 is linked to /// \brief Return the atom the C1 is linked to
atom get_link() const { return m_link; } atom get_link() const { return m_link; }
/// \brief Set the link atom C1 is linked to to @a link
void set_link(atom link) { m_link = link; } void set_link(atom link) { m_link = link; }
/// \brief Return the sugar number of the sugar linked to C1
size_t get_link_nr() const size_t get_link_nr() const
{ {
size_t result = 0; size_t result = 0;
...@@ -616,42 +764,69 @@ class sugar : public residue ...@@ -616,42 +764,69 @@ class sugar : public residue
return result; return result;
} }
cif::mm::atom add_atom(row_initializer atom_info); /// \brief Construct an atom based on the info in @a atom_info and add it to this sugar
atom add_atom(row_initializer atom_info);
private: private:
branch *m_branch; branch *m_branch;
atom m_link; atom m_link;
}; };
/**
* @brief A branch is a list of sugars
*
* A list is how it is stored, but a branch is like a branch in a tree,
* with potentially lots of sub branches. Each sugar is linked to a sugar
* up in the branch with its (almost always) C1 atom.
*
*/
class branch : public std::vector<sugar> class branch : public std::vector<sugar>
{ {
public: public:
/// \brief constructor
branch(structure &structure, const std::string &asym_id, const std::string &entity_id); branch(structure &structure, const std::string &asym_id, const std::string &entity_id);
branch(const branch &) = delete; branch(const branch &) = delete;
branch &operator=(const branch &) = delete; branch &operator=(const branch &) = delete;
/** @cond */
branch(branch &&) = default; branch(branch &&) = default;
branch &operator=(branch &&) = default; branch &operator=(branch &&) = default;
/** @endcond */
/// \brief Update the link atoms in all sugars in this branch
void link_atoms(); void link_atoms();
/// \brief Return the name of the branch
std::string name() const; std::string name() const;
/// \brief Return the weight of the branch based on the formulae of the sugars
float weight() const; float weight() const;
std::string get_asym_id() const { return m_asym_id; }
std::string get_entity_id() const { return m_entity_id; }
structure &get_structure() { return *m_structure; } std::string get_asym_id() const { return m_asym_id; } ///< Return the asym_id
structure &get_structure() const { return *m_structure; } std::string get_entity_id() const { return m_entity_id; } ///< Return the entity_id
structure &get_structure() { return *m_structure; } ///< Return the structure
structure &get_structure() const { return *m_structure; } ///< Return the structure
/// \brief Return a reference to the sugar with number @a num
sugar &get_sugar_by_num(int nr); sugar &get_sugar_by_num(int nr);
/// \brief Return a const reference to the sugar with number @a num
const sugar &get_sugar_by_num(int nr) const const sugar &get_sugar_by_num(int nr) const
{ {
return const_cast<branch *>(this)->get_sugar_by_num(nr); return const_cast<branch *>(this)->get_sugar_by_num(nr);
} }
/// \brief Construct a new sugar with compound ID @a compound_id in this branch
/// and return a reference to the newly created sugar. Use this to create a first
/// sugar in a branch.
sugar &construct_sugar(const std::string &compound_id); sugar &construct_sugar(const std::string &compound_id);
/// \brief Construct a new sugar with compound ID @a compound_id in this branch
/// and return a reference to the newly created sugar. The newly created sugar
/// will be connected to an already created sugar in the branch using the
/// information in @a atom_id, @a linked_sugar_nr and @a linked_atom_id
sugar &construct_sugar(const std::string &compound_id, const std::string &atom_id, sugar &construct_sugar(const std::string &compound_id, const std::string &atom_id,
int linked_sugar_nr, const std::string &linked_atom_id); int linked_sugar_nr, const std::string &linked_atom_id);
...@@ -666,11 +841,13 @@ class branch : public std::vector<sugar> ...@@ -666,11 +841,13 @@ class branch : public std::vector<sugar>
// -------------------------------------------------------------------- // --------------------------------------------------------------------
/// \brief A still very limited set of options for reading structures
enum class StructureOpenOptions enum class StructureOpenOptions
{ {
SkipHydrogen = 1 << 0 SkipHydrogen = 1 << 0 ///< Do not include hydrogen atoms in the structure object
}; };
/// \brief A way to combine two options. Not very useful as there is only one...
constexpr inline bool operator&(StructureOpenOptions a, StructureOpenOptions b) constexpr inline bool operator&(StructureOpenOptions a, StructureOpenOptions b)
{ {
return static_cast<int>(a) bitand static_cast<int>(b); return static_cast<int>(a) bitand static_cast<int>(b);
...@@ -678,60 +855,65 @@ constexpr inline bool operator&(StructureOpenOptions a, StructureOpenOptions b) ...@@ -678,60 +855,65 @@ constexpr inline bool operator&(StructureOpenOptions a, StructureOpenOptions b)
// -------------------------------------------------------------------- // --------------------------------------------------------------------
/**
* @brief A structure is the combination of polymers, ligand and sugar branches found
* in the mmCIF file. This will always contain one model, the first model is taken
* if not otherwise specified.
*
*/
class structure class structure
{ {
public: public:
/// \brief Read the structure from cif::file @a p
structure(file &p, size_t modelNr = 1, StructureOpenOptions options = {}); structure(file &p, size_t modelNr = 1, StructureOpenOptions options = {});
/// \brief Load the structure from already parsed mmCIF data in @a db
structure(datablock &db, size_t modelNr = 1, StructureOpenOptions options = {}); structure(datablock &db, size_t modelNr = 1, StructureOpenOptions options = {});
/** @cond */
structure(structure &&s) = default; structure(structure &&s) = default;
/** @endcond */
// Create a read-only clone of the current structure (for multithreaded calculations that move atoms) // structures cannot be copied.
// NOTE: removed, simply create a new structure for each thread
structure(const structure &) = delete;
structure(const structure &) = delete;
structure &operator=(const structure &) = delete; structure &operator=(const structure &) = delete;
// Structure &operator=(Structure &&s) = default;
~structure() = default; ~structure() = default;
/// \brief Return the model number
size_t get_model_nr() const { return m_model_nr; } size_t get_model_nr() const { return m_model_nr; }
/// \brief Return a list of all the atoms in this structure
const std::vector<atom> &atoms() const { return m_atoms; } const std::vector<atom> &atoms() const { return m_atoms; }
// std::vector<atom> &atoms() { return m_atoms; }
EntityType get_entity_type_for_entity_id(const std::string entityID) const; EntityType get_entity_type_for_entity_id(const std::string entityID) const; ///< Return the entity type for the entity with id @a entity_id
EntityType get_entity_type_for_asym_id(const std::string asymID) const; EntityType get_entity_type_for_asym_id(const std::string asymID) const; ///< Return the entity type for the asym with id @a asym_id
// std::vector<atom> waters() const; const std::list<polymer> &polymers() const { return m_polymers; } ///< Return the list of polymers
std::list<polymer> &polymers() { return m_polymers; } ///< Return the list of polymers
const std::list<polymer> &polymers() const { return m_polymers; } polymer &get_polymer_by_asym_id(const std::string &asymID); ///< Return the polymer having asym ID @a asymID
std::list<polymer> &polymers() { return m_polymers; } const polymer &get_polymer_by_asym_id(const std::string &asymID) const ///< Return the polymer having asym ID @a asymID
polymer &get_polymer_by_asym_id(const std::string &asymID);
const polymer &get_polymer_by_asym_id(const std::string &asymID) const
{ {
return const_cast<structure *>(this)->get_polymer_by_asym_id(asymID); return const_cast<structure *>(this)->get_polymer_by_asym_id(asymID);
} }
const std::list<branch> &branches() const { return m_branches; } const std::list<branch> &branches() const { return m_branches; } ///< Return the list of all branches
std::list<branch> &branches() { return m_branches; } std::list<branch> &branches() { return m_branches; } ///< Return the list of all branches
branch &get_branch_by_asym_id(const std::string &asymID); branch &get_branch_by_asym_id(const std::string &asymID); ///< Return the branch having asym ID @a asymID
const branch &get_branch_by_asym_id(const std::string &asymID) const; const branch &get_branch_by_asym_id(const std::string &asymID) const; ///< Return the branch having asym ID @a asymID
const std::vector<residue> &non_polymers() const { return m_non_polymers; } const std::vector<residue> &non_polymers() const { return m_non_polymers; } ///< Return the list of non-polymers, actually the list of ligands
bool has_atom_id(const std::string &id) const; bool has_atom_id(const std::string &id) const; ///< Return true if an atom with ID @a id exists in this structure
atom get_atom_by_id(const std::string &id) const; atom get_atom_by_id(const std::string &id) const; ///< Return the atom with ID @a id
// atom getAtomByLocation(point pt, float maxDistance) const;
/// \brief Return the atom identified by the label_ values specified
atom get_atom_by_label(const std::string &atomID, const std::string &asymID, atom get_atom_by_label(const std::string &atomID, const std::string &asymID,
const std::string &compID, int seqID, const std::string &altID = ""); const std::string &compID, int seqID, const std::string &altID = "");
// /// \brief Return the atom closest to point \a p /// \brief Return the atom closest to point \a p
atom get_atom_by_position(point p) const; atom get_atom_by_position(point p) const;
/// \brief Return the atom closest to point \a p with atom type \a type in a residue of type \a res_type /// \brief Return the atom closest to point \a p with atom type \a type in a residue of type \a res_type
...@@ -783,13 +965,32 @@ class structure ...@@ -783,13 +965,32 @@ class structure
} }
// Actions // Actions
/// \brief Remove atom @a a
void remove_atom(atom &a) void remove_atom(atom &a)
{ {
remove_atom(a, true); remove_atom(a, true);
} }
void swap_atoms(atom a1, atom a2); // swap the labels for these atoms void swap_atoms(atom a1, atom a2); ///< swap the labels for these atoms
void move_atom(atom a, point p); // move atom to a new location void move_atom(atom a, point p); ///< move atom to a new location
/**
* @brief Change residue @a res to a new compound ID optionally
* remapping atoms.
*
* A new chem_comp entry as well as an entity is created if needed and
* if the list of @a remappedAtoms is not empty it is used to remap.
*
* The array in @a remappedAtoms contains tuples of strings, both
* strings contain an atom_id. The first is the one in the current
* residue and the second is the atom_id that should be used instead.
* If the second string is empty, the atom is removed from the residue.
*
* @param res
* @param newcompound
* @param remappedAtoms
*/
void change_residue(residue &res, const std::string &newcompound, void change_residue(residue &res, const std::string &newcompound,
const std::vector<std::tuple<std::string, std::string>> &remappedAtoms); const std::vector<std::tuple<std::string, std::string>> &remappedAtoms);
...@@ -862,6 +1063,7 @@ class structure ...@@ -862,6 +1063,7 @@ class structure
/// \brief Translate, rotate and translate again the coordinates of all atoms in the structure by \a t1 , \a q and \a t2 /// \brief Translate, rotate and translate again the coordinates of all atoms in the structure by \a t1 , \a q and \a t2
void translate_rotate_and_translate(point t1, quaternion q, point t2); void translate_rotate_and_translate(point t1, quaternion q, point t2);
/// \brief Remove all categories that have no rows left
void cleanup_empty_categories(); void cleanup_empty_categories();
/// \brief Direct access to underlying data /// \brief Direct access to underlying data
...@@ -870,29 +1072,31 @@ class structure ...@@ -870,29 +1072,31 @@ class structure
return m_db[name]; return m_db[name];
} }
/// \brief Direct access to underlying data
datablock &get_datablock() const datablock &get_datablock() const
{ {
return m_db; return m_db;
} }
/// \brief Check if all atoms are part of either a polymer, a branch or one of the non-polymer residues
void validate_atoms() const; void validate_atoms() const;
// TODO: make this protected? /// \brief emplace a newly created atom using @a args
void load_atoms_for_model(StructureOpenOptions options);
template <typename... Args> template <typename... Args>
atom &emplace_atom(Args&... args) atom &emplace_atom(Args &...args)
{ {
return emplace_atom(atom{ std::forward<Args>(args)... }); return emplace_atom(atom{ std::forward<Args>(args)... });
} }
/// \brief emplace the moved atom @a atom
atom &emplace_atom(atom &&atom); atom &emplace_atom(atom &&atom);
private: private:
friend polymer; friend polymer;
friend residue; friend residue;
void load_atoms_for_model(StructureOpenOptions options);
std::string insert_compound(const std::string &compoundID, bool is_entity); std::string insert_compound(const std::string &compoundID, bool is_entity);
std::string create_entity_for_branch(branch &branch); std::string create_entity_for_branch(branch &branch);
......
...@@ -327,37 +327,6 @@ residue::residue(structure &structure, const std::vector<atom> &atoms) ...@@ -327,37 +327,6 @@ residue::residue(structure &structure, const std::vector<atom> &atoms)
m_atoms.push_back(atom); m_atoms.push_back(atom);
} }
// residue::residue(residue &&rhs)
// : m_structure(rhs.m_structure)
// , m_compound_id(std::move(rhs.m_compound_id))
// , m_asym_id(std::move(rhs.m_asym_id))
// , m_seq_id(rhs.m_seq_id)
// , m_auth_seq_id(rhs.m_auth_seq_id)
// , m_atoms(std::move(rhs.m_atoms))
// {
// // std::cerr << "move constructor residue" << std::endl;
// rhs.m_structure = nullptr;
// }
// residue &residue::operator=(residue &&rhs)
// {
// // std::cerr << "move assignment residue" << std::endl;
// m_structure = rhs.m_structure;
// rhs.m_structure = nullptr;
// m_compound_id = std::move(rhs.m_compound_id);
// m_asym_id = std::move(rhs.m_asym_id);
// m_seq_id = rhs.m_seq_id;
// m_auth_seq_id = rhs.m_auth_seq_id;
// m_atoms = std::move(rhs.m_atoms);
// return *this;
// }
// residue::~residue()
// {
// // std::cerr << "~residue" << std::endl;
// }
std::string residue::get_entity_id() const std::string residue::get_entity_id() const
{ {
std::string result; std::string result;
...@@ -381,68 +350,13 @@ EntityType residue::entity_type() const ...@@ -381,68 +350,13 @@ EntityType residue::entity_type() const
return m_structure->get_entity_type_for_entity_id(get_entity_id()); return m_structure->get_entity_type_for_entity_id(get_entity_id());
} }
// std::string residue::authInsCode() const
// {
// assert(m_structure);
// std::string result;
// if (not m_atoms.empty())
// result = m_atoms.front().get_property("pdbx_PDB_ins_code");
// return result;
// }
// std::string residue::get_auth_asym_id() const
// {
// assert(m_structure);
// std::string result;
// if (not m_atoms.empty())
// result = m_atoms.front().get_property("auth_asym_id");
// return result;
// }
// std::string residue::authSeqID() const
// {
// return m_auth_seq_id;
// }
// const Compound &residue::compound() const
// {
// auto result = compound_factory::instance().create(m_compound_id);
// if (result == nullptr)
// throw std::runtime_error("Failed to create compound " + m_compound_id);
// return *result;
// }
// std::string residue::unique_alt_id() const
// {
// if (m_structure == nullptr)
// throw std::runtime_error("Invalid residue object");
// auto firstAlt = std::find_if(m_atoms.begin(), m_atoms.end(), [](auto &a)
// { return not a.get_label_alt_id().empty(); });
// return firstAlt != m_atoms.end() ? firstAlt->get_label_alt_id() : "";
// }
void residue::add_atom(atom &atom) void residue::add_atom(atom &atom)
{ {
// atom.set_property("label_comp_id", m_compound_id);
// atom.set_property("label_asym_id", m_asym_id);
// if (m_seq_id != 0)
// atom.set_property("label_seq_id", std::to_string(m_seq_id));
// atom.set_property("auth_seq_id", m_auth_seq_id);
m_atoms.push_back(atom); m_atoms.push_back(atom);
} }
std::vector<atom> residue::unique_atoms() const std::vector<atom> residue::unique_atoms() const
{ {
// if (m_structure == nullptr)
// throw std::runtime_error("Invalid residue object");
std::vector<atom> result; std::vector<atom> result;
std::string firstAlt; std::string firstAlt;
...@@ -1494,7 +1408,7 @@ EntityType structure::get_entity_type_for_entity_id(const std::string entityID) ...@@ -1494,7 +1408,7 @@ EntityType structure::get_entity_type_for_entity_id(const std::string entityID)
EntityType result; EntityType result;
if (iequals(entity_type, "polymer")) if (iequals(entity_type, "polymer"))
result = EntityType::polymer; result = EntityType::Polymer;
else if (iequals(entity_type, "non-polymer")) else if (iequals(entity_type, "non-polymer"))
result = EntityType::NonPolymer; result = EntityType::NonPolymer;
else if (iequals(entity_type, "macrolide")) else if (iequals(entity_type, "macrolide"))
...@@ -2162,7 +2076,7 @@ void structure::remove_residue(residue &res) ...@@ -2162,7 +2076,7 @@ void structure::remove_residue(residue &res)
switch (res.entity_type()) switch (res.entity_type())
{ {
case EntityType::polymer: case EntityType::Polymer:
{ {
auto &m = dynamic_cast<monomer &>(res); auto &m = dynamic_cast<monomer &>(res);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment