Commit 5476eef0 by Maarten L. Hekkelman

some extensions for sugar tree building

parent 33c1eea9
......@@ -36,6 +36,7 @@
#include <cif++.hpp>
#include <cif++/atom_type.hpp>
#include <cif++/point.hpp>
namespace cif
{
......@@ -75,6 +76,11 @@ struct compound_atom
bool leaving_atom = false;
bool stereo_config = false;
float x, y, z;
point get_location() const
{
return { x, y, z };
}
};
/// --------------------------------------------------------------------
......@@ -114,6 +120,7 @@ class compound
compound_atom get_atom_by_atom_id(const std::string &atom_id) const;
bool atoms_bonded(const std::string &atomId_1, const std::string &atomId_2) const;
float bond_length(const std::string &atomId_1, const std::string &atomId_2) const;
bool is_water() const
{
......
......@@ -394,7 +394,7 @@ class residue
friend class structure;
// constructor
residue(const structure &structure, const std::string &compoundID,
residue(structure &structure, const std::string &compoundID,
const std::string &asymID, int seqID,
const std::string &authAsymID, const std::string &authSeqID,
const std::string &pdbInsCode)
......@@ -408,7 +408,7 @@ class residue
{
}
residue(const structure &structure, const std::vector<atom> &atoms);
residue(structure &structure, const std::vector<atom> &atoms);
residue(const residue &rhs) = delete;
residue &operator=(const residue &rhs) = delete;
......@@ -432,7 +432,7 @@ class residue
const std::string &get_compound_id() const { return m_compound_id; }
void set_compound_id(const std::string &id) { m_compound_id = id; }
const structure *get_structure() const { return m_structure; }
structure *get_structure() const { return m_structure; }
// const compound &compound() const;
......@@ -489,7 +489,7 @@ class residue
protected:
residue() {}
const structure *m_structure = nullptr;
structure *m_structure = nullptr;
std::string m_compound_id, m_asym_id;
int m_seq_id = 0;
std::string m_auth_asym_id, m_auth_seq_id, m_pdb_ins_code;
......@@ -573,7 +573,7 @@ class monomer : public residue
class polymer : public std::vector<monomer>
{
public:
polymer(const structure &s, const std::string &entityID, const std::string &asymID, const std::string &auth_asym_id);
polymer(structure &s, const std::string &entityID, const std::string &asymID, const std::string &auth_asym_id);
polymer(const polymer &) = delete;
polymer &operator=(const polymer &) = delete;
......@@ -581,7 +581,7 @@ class polymer : public std::vector<monomer>
// monomer &getBySeqID(int seqID);
// const monomer &getBySeqID(int seqID) const;
const structure *get_structure() const { return m_structure; }
structure *get_structure() const { return m_structure; }
std::string get_asym_id() const { return m_asym_id; }
std::string get_auth_asym_id() const { return m_auth_asym_id; } // The PDB chain ID, actually
......@@ -590,7 +590,7 @@ class polymer : public std::vector<monomer>
// int Distance(const monomer &a, const monomer &b) const;
private:
const structure *m_structure;
structure *m_structure;
std::string m_entity_id;
std::string m_asym_id;
std::string m_auth_asym_id;
......@@ -604,7 +604,7 @@ class branch;
class sugar : public residue
{
public:
sugar(const branch &branch, const std::string &compoundID,
sugar(branch &branch, const std::string &compoundID,
const std::string &asymID, int authSeqID);
sugar(sugar &&rhs);
......@@ -631,27 +631,42 @@ class sugar : public residue
return result;
}
cif::mm::atom add_atom(row_initializer atom_info);
private:
const branch *m_branch;
branch *m_branch;
atom m_link;
};
class branch : public std::vector<sugar>
{
public:
branch(structure &structure, const std::string &asymID);
branch(structure &structure, const std::string &asym_id, const std::string &entity_id);
branch(const branch &) = delete;
branch &operator=(const branch &) = delete;
branch(branch &&) = default;
branch &operator=(branch &&) = default;
void link_atoms();
std::string name() const;
float weight() const;
std::string get_asym_id() const { return m_asym_id; }
std::string get_entity_id() const { return m_entity_id; }
structure &get_structure() { return *m_structure; }
const structure &get_structure() const { return *m_structure; }
structure &get_structure() const { return *m_structure; }
sugar &getSugarByNum(int nr);
const sugar &getSugarByNum(int nr) const;
sugar &get_sugar_by_num(int nr);
const sugar &get_sugar_by_num(int nr) const
{
return const_cast<branch *>(this)->get_sugar_by_num(nr);
}
sugar &construct_sugar(const std::string &compound_id);
private:
friend sugar;
......@@ -659,7 +674,7 @@ class branch : public std::vector<sugar>
std::string name(const sugar &s) const;
structure *m_structure;
std::string m_asym_id;
std::string m_asym_id, m_entity_id;
};
// // --------------------------------------------------------------------
......@@ -851,6 +866,9 @@ class structure
/// \return The newly create asym ID
std::string create_non_poly(const std::string &entity_id, std::vector<row_initializer> atoms);
/// \brief Create a new and empty (sugar) branch
branch &create_branch();
/// \brief Create a new (sugar) branch with one first NAG containing atoms constructed from \a atoms
branch &create_branch(std::vector<row_initializer> atoms);
......
......@@ -223,6 +223,28 @@ bool compound::atoms_bonded(const std::string &atomId_1, const std::string &atom
return i != m_bonds.end();
}
float compound::bond_length(const std::string &atomId_1, const std::string &atomId_2) const
{
auto i = find_if(m_bonds.begin(), m_bonds.end(),
[&](const compound_bond &b)
{
return (b.atom_id[0] == atomId_1 and b.atom_id[1] == atomId_2) or (b.atom_id[0] == atomId_2 and b.atom_id[1] == atomId_1);
});
float result = std::numeric_limits<float>::max();
if (i != m_bonds.end())
{
auto a = get_atom_by_atom_id(atomId_1);
auto b = get_atom_by_atom_id(atomId_2);
result = distance(point{a.x, a.y, a.z}, point{b.x, b.y, b.z});
}
return result;
}
// --------------------------------------------------------------------
// known amino acids and bases
......
......@@ -305,7 +305,7 @@ std::ostream &operator<<(std::ostream &os, const atom &atom)
// --------------------------------------------------------------------
// residue
residue::residue(const structure &structure, const std::vector<atom> &atoms)
residue::residue(structure &structure, const std::vector<atom> &atoms)
: m_structure(&structure)
{
if (atoms.empty())
......@@ -960,7 +960,7 @@ bool monomer::is_cis(const monomer &a, const monomer &b)
// --------------------------------------------------------------------
// polymer
polymer::polymer(const structure &s, const std::string &entityID, const std::string &asym_id, const std::string &auth_asym_id)
polymer::polymer(structure &s, const std::string &entityID, const std::string &asym_id, const std::string &auth_asym_id)
: m_structure(const_cast<structure *>(&s))
, m_entity_id(entityID)
, m_asym_id(asym_id)
......@@ -1046,7 +1046,7 @@ polymer::polymer(const structure &s, const std::string &entityID, const std::str
// --------------------------------------------------------------------
sugar::sugar(const branch &branch, const std::string &compoundID,
sugar::sugar(branch &branch, const std::string &compoundID,
const std::string &asym_id, int authSeqID)
: residue(branch.get_structure(), compoundID, asym_id, 0, asym_id, std::to_string(authSeqID), "")
, m_branch(&branch)
......@@ -1114,9 +1114,39 @@ std::string sugar::name() const
return result;
}
branch::branch(structure &structure, const std::string &asym_id)
cif::mm::atom sugar::add_atom(row_initializer atom_info)
{
auto &db = m_structure->get_datablock();
auto &atom_site = db["atom_site"];
auto atom_id = atom_site.get_unique_id("");
atom_info.set_value({"group_PDB", "HETATM"});
atom_info.set_value({"id", atom_id});
atom_info.set_value({"label_entity_id", m_branch->get_entity_id()});
atom_info.set_value({"label_asym_id", m_branch->get_asym_id()});
atom_info.set_value({"label_comp_id", m_compound_id});
atom_info.set_value({"label_seq_id", "."});
atom_info.set_value({"label_alt_id", "."});
atom_info.set_value({"auth_asym_id", m_branch->get_asym_id()});
atom_info.set_value({"auth_comp_id", m_compound_id});
atom_info.set_value({"auth_seq_id", m_auth_seq_id});
atom_info.set_value({"occupancy", 1.0, 2});
atom_info.set_value({"B_iso_or_equiv", 30.0, 2});
atom_info.set_value({"pdbx_PDB_model_num", 1});
auto row = atom_site.emplace(std::move(atom_info));
auto result = m_structure->emplace_atom(db, row);
residue::add_atom(result);
return result;
}
branch::branch(structure &structure, const std::string &asym_id, const std::string &entity_id)
: m_structure(&structure)
, m_asym_id(asym_id)
, m_entity_id(entity_id)
{
using namespace literals;
......@@ -1174,11 +1204,46 @@ void branch::link_atoms()
}
}
sugar &branch::get_sugar_by_num(int nr)
{
auto i = find_if(begin(), end(), [nr](const sugar &s) { return s.num() == nr; });
if (i == end())
throw std::out_of_range("Sugar with num " + std::to_string(nr) + " not found in branch " + m_asym_id);
return *i;
}
std::string branch::name() const
{
return empty() ? "" : name(front());
}
sugar &branch::construct_sugar(const std::string &compound_id)
{
auto &db = m_structure->get_datablock();
sugar &result = emplace_back(*this, compound_id, m_asym_id, size() + 1);
db["pdbx_branch_scheme"].emplace({
{"asym_id", result.get_asym_id()},
{"entity_id", result.get_entity_id()},
{"num", result.num()},
{"mon_id", result.get_compound_id()},
{"pdb_asym_id", result.get_asym_id()},
{"pdb_seq_num", result.num()},
{"pdb_mon_id", result.get_compound_id()},
{"auth_asym_id", result.get_auth_asym_id()},
{"auth_mon_id", result.get_compound_id()},
{"auth_seq_num", result.get_auth_seq_id()},
{"hetero", "n"}
});
return result;
}
std::string branch::name(const sugar &s) const
{
using namespace literals;
......@@ -1298,10 +1363,10 @@ void structure::load_data()
auto &branchScheme = m_db["pdbx_branch_scheme"];
for (const auto &asym_id : branchScheme.rows<std::string>("asym_id"))
for (const auto &[asym_id, entity_id] : branchScheme.rows<std::string,std::string>("asym_id", "entity_id"))
{
if (m_branches.empty() or m_branches.back().get_asym_id() != asym_id)
m_branches.emplace_back(*this, asym_id);
m_branches.emplace_back(*this, asym_id, entity_id);
}
auto &nonPolyScheme = m_db["pdbx_nonpoly_scheme"];
......@@ -2208,188 +2273,197 @@ std::string structure::create_non_poly(const std::string &entity_id, std::vector
return asym_id;
}
branch &structure::create_branch(std::vector<row_initializer> atoms)
branch &structure::create_branch()
{
// sanity check
for (auto &nag_atom : atoms)
{
for (const auto &[name, value] : nag_atom)
{
if (name == "label_comp_id" and value != "NAG")
throw std::logic_error("The first sugar in a branch should be a NAG");
}
}
using namespace literals;
auto &struct_asym = m_db["struct_asym"];
std::string asym_id = struct_asym.get_unique_id();
auto entity_id = m_db["entity"].get_unique_id("");
auto &branch = m_branches.emplace_back(*this, asym_id);
auto &sugar = branch.emplace_back(branch, "NAG", asym_id, 1);
auto tmp_entity_id = m_db["entity"].get_unique_id("");
struct_asym.emplace({
{"id", asym_id},
{"pdbx_blank_PDB_chainid_flag", "N"},
{"pdbx_modified", "N"},
{"entity_id", entity_id},
{"details", "?"}
});
auto &atom_site = m_db["atom_site"];
return m_branches.emplace_back(*this, asym_id, entity_id);
}
for (auto &atom : atoms)
{
auto atom_id = atom_site.get_unique_id("");
branch &structure::create_branch(std::vector<row_initializer> atoms)
{
// // sanity check
// for (auto &nag_atom : atoms)
// {
// for (const auto &[name, value] : nag_atom)
// {
// if (name == "label_comp_id" and value != "NAG")
// throw std::logic_error("The first sugar in a branch should be a NAG");
// }
// }
atom.set_value("id", atom_id);
atom.set_value("label_asym_id", asym_id);
atom.set_value("auth_asym_id", asym_id);
atom.set_value("label_entity_id", tmp_entity_id);
atom.set_value({ "auth_seq_id", 1 });
// using namespace literals;
atom.set_value_if_empty({"group_PDB", "HETATM"});
atom.set_value_if_empty({"label_comp_id", "NAG"});
atom.set_value_if_empty({"label_seq_id", "."});
atom.set_value_if_empty({"auth_comp_id", "NAG"});
atom.set_value_if_empty({"pdbx_PDB_model_num", 1});
atom.set_value_if_empty({"label_alt_id", ""});
// auto &branch = create_branch();
// auto asym_id = branch.get_asym_id();
// auto entity_id = branch.get_entity_id();
auto row = atom_site.emplace(atom.begin(), atom.end());
// auto &sugar = branch.emplace_back(branch, "NAG", asym_id, 1);
auto &newAtom = emplace_atom(std::make_shared<atom::atom_impl>(m_db, atom_id));
sugar.add_atom(newAtom);
}
// auto &atom_site = m_db["atom_site"];
// now we can create the entity and get the real ID
auto entity_id = create_entity_for_branch(branch);
assert(not entity_id.empty());
// for (auto &atom : atoms)
// {
// auto atom_id = atom_site.get_unique_id("");
struct_asym.emplace({
{"id", asym_id},
{"pdbx_blank_PDB_chainid_flag", "N"},
{"pdbx_modified", "N"},
{"entity_id", entity_id},
{"details", "?"}
});
// atom.set_value("id", atom_id);
// atom.set_value("label_asym_id", asym_id);
// atom.set_value("auth_asym_id", asym_id);
// atom.set_value("label_entity_id", entity_id);
// atom.set_value({ "auth_seq_id", 1 });
for (auto &a : sugar.atoms())
a.set_property("label_entity_id", entity_id);
// atom.set_value_if_empty({"group_PDB", "HETATM"});
// atom.set_value_if_empty({"label_comp_id", "NAG"});
// atom.set_value_if_empty({"label_seq_id", "."});
// atom.set_value_if_empty({"auth_comp_id", "NAG"});
// atom.set_value_if_empty({"pdbx_PDB_model_num", 1});
// atom.set_value_if_empty({"label_alt_id", ""});
m_db["pdbx_branch_scheme"].emplace({
{"asym_id", asym_id},
{"entity_id", entity_id},
{"num", 1},
{"mon_id", "NAG"},
// auto row = atom_site.emplace(atom.begin(), atom.end());
{"pdb_asym_id", asym_id},
{"pdb_seq_num", 1},
{"pdb_mon_id", "NAG"},
// auto &newAtom = emplace_atom(std::make_shared<atom::atom_impl>(m_db, atom_id));
// sugar.add_atom(newAtom);
// }
// TODO: need fix, collect from nag_atoms?
{"auth_asym_id", asym_id},
{"auth_mon_id", "NAG"},
{"auth_seq_num", 1},
// // // now we can create the entity and get the real ID
// // auto entity_id = create_entity_for_branch(branch);
// // assert(not entity_id.empty());
{"hetero", "n"}
});
// for (auto &a : sugar.atoms())
// a.set_property("label_entity_id", entity_id);
return branch;
// m_db["pdbx_branch_scheme"].emplace({
// {"asym_id", asym_id},
// {"entity_id", entity_id},
// {"num", 1},
// {"mon_id", "NAG"},
// {"pdb_asym_id", asym_id},
// {"pdb_seq_num", 1},
// {"pdb_mon_id", "NAG"},
// // TODO: need fix, collect from nag_atoms?
// {"auth_asym_id", asym_id},
// {"auth_mon_id", "NAG"},
// {"auth_seq_num", 1},
// {"hetero", "n"}
// });
// return branch;
}
branch &structure::extend_branch(const std::string &asym_id, std::vector<row_initializer> atom_info,
int link_sugar, const std::string &link_atom)
{
// sanity check
std::string compoundID;
for (auto &atom : atom_info)
{
for (const auto &[name, value] : atom)
{
if (name != "label_comp_id")
continue;
if (compoundID.empty())
compoundID = value;
else if (value != compoundID)
throw std::logic_error("All atoms should be of the same type");
}
}
// // sanity check
// std::string compoundID;
using namespace literals;
// auto &branch = m_branches.emplace_back(*this, asym_id);
auto tmp_entity_id = m_db["entity"].get_unique_id("");
// for (auto &atom : atom_info)
// {
// for (const auto &[name, value] : atom)
// {
// if (name != "label_comp_id")
// continue;
auto &atom_site = m_db["atom_site"];
// if (compoundID.empty())
// compoundID = value;
// else if (value != compoundID)
// throw std::logic_error("All atoms should be of the same type");
// }
// }
auto bi = std::find_if(m_branches.begin(), m_branches.end(), [asym_id](branch &b)
{ return b.get_asym_id() == asym_id; });
if (bi == m_branches.end())
throw std::logic_error("Create a branch first!");
// using namespace literals;
branch &branch = *bi;
// // auto &branch = m_branches.emplace_back(*this, asym_id);
// auto tmp_entity_id = m_db["entity"].get_unique_id("");
int sugarNum = static_cast<int>(branch.size() + 1);
// auto &atom_site = m_db["atom_site"];
auto &sugar = branch.emplace_back(branch, compoundID, asym_id, sugarNum);
// auto bi = std::find_if(m_branches.begin(), m_branches.end(), [asym_id](branch &b)
// { return b.get_asym_id() == asym_id; });
// if (bi == m_branches.end())
// throw std::logic_error("Create a branch first!");
for (auto &atom : atom_info)
{
auto atom_id = atom_site.get_unique_id("");
// branch &branch = *bi;
atom.set_value("id", atom_id);
atom.set_value("label_asym_id", asym_id);
atom.set_value("auth_asym_id", asym_id);
atom.set_value("label_entity_id", tmp_entity_id);
atom.set_value({"auth_seq_id", sugarNum });
// int sugarNum = static_cast<int>(branch.size() + 1);
atom.set_value_if_empty({"group_PDB", "HETATM"});
atom.set_value_if_empty({"label_comp_id", compoundID});
atom.set_value_if_empty({"auth_comp_id", compoundID});
atom.set_value_if_empty({"pdbx_PDB_model_num", 1});
atom.set_value_if_empty({"label_alt_id", ""});
// auto &sugar = branch.emplace_back(branch, compoundID, asym_id, sugarNum);
auto row = atom_site.emplace(atom.begin(), atom.end());
// for (auto &atom : atom_info)
// {
// auto atom_id = atom_site.get_unique_id("");
auto &newAtom = emplace_atom(std::make_shared<atom::atom_impl>(m_db, atom_id));
sugar.add_atom(newAtom);
}
// atom.set_value("id", atom_id);
// atom.set_value("label_asym_id", asym_id);
// atom.set_value("auth_asym_id", asym_id);
// atom.set_value("label_entity_id", tmp_entity_id);
// atom.set_value({"auth_seq_id", sugarNum });
sugar.set_link(branch.at(link_sugar - 1).get_atom_by_atom_id(link_atom));
// atom.set_value_if_empty({"group_PDB", "HETATM"});
// atom.set_value_if_empty({"label_comp_id", compoundID});
// atom.set_value_if_empty({"auth_comp_id", compoundID});
// atom.set_value_if_empty({"pdbx_PDB_model_num", 1});
// atom.set_value_if_empty({"label_alt_id", ""});
auto entity_id = create_entity_for_branch(branch);
// auto row = atom_site.emplace(atom.begin(), atom.end());
// Update the entity id of the asym
auto &struct_asym = m_db["struct_asym"];
auto r = struct_asym.find1("id"_key == asym_id);
r["entity_id"] = entity_id;
// auto &newAtom = emplace_atom(std::make_shared<atom::atom_impl>(m_db, atom_id));
// sugar.add_atom(newAtom);
// }
for (auto &s2 : branch)
{
for (auto atom : s2.atoms())
atom.set_property("label_entity_id", entity_id);
}
// sugar.set_link(branch.at(link_sugar - 1).get_atom_by_atom_id(link_atom));
auto &pdbx_branch_scheme = m_db["pdbx_branch_scheme"];
pdbx_branch_scheme.erase("asym_id"_key == asym_id);
// auto entity_id = create_entity_for_branch(branch);
for (auto &s2 : branch)
{
pdbx_branch_scheme.emplace({
{"asym_id", asym_id},
{"entity_id", entity_id},
{"num", s2.num()},
{"mon_id", s2.get_compound_id()},
// // Update the entity id of the asym
// auto &struct_asym = m_db["struct_asym"];
// auto r = struct_asym.find1("id"_key == asym_id);
// r["entity_id"] = entity_id;
{"pdb_asym_id", asym_id},
{"pdb_seq_num", s2.num()},
{"pdb_mon_id", s2.get_compound_id()},
// for (auto &s2 : branch)
// {
// for (auto atom : s2.atoms())
// atom.set_property("label_entity_id", entity_id);
// }
// TODO: need fix, collect from nag_atoms?
{"auth_asym_id", asym_id},
{"auth_mon_id", s2.get_compound_id()},
{"auth_seq_num", s2.get_auth_seq_id()},
// auto &pdbx_branch_scheme = m_db["pdbx_branch_scheme"];
// pdbx_branch_scheme.erase("asym_id"_key == asym_id);
{"hetero", "n"}
});
}
// for (auto &s2 : branch)
// {
// pdbx_branch_scheme.emplace({
// {"asym_id", asym_id},
// {"entity_id", entity_id},
// {"num", s2.num()},
// {"mon_id", s2.get_compound_id()},
// {"pdb_asym_id", asym_id},
// {"pdb_seq_num", s2.num()},
// {"pdb_mon_id", s2.get_compound_id()},
// // TODO: need fix, collect from nag_atoms?
// {"auth_asym_id", asym_id},
// {"auth_mon_id", s2.get_compound_id()},
// {"auth_seq_num", s2.get_auth_seq_id()},
// {"hetero", "n"}
// });
// }
return branch;
// return branch;
}
std::string structure::create_entity_for_branch(branch &branch)
......@@ -2409,11 +2483,13 @@ std::string structure::create_entity_for_branch(branch &branch)
if (VERBOSE)
std::cout << "Creating new entity " << entityID << " for branched sugar " << entityName << std::endl;
entity.emplace({{"id", entityID},
entity.emplace({
{"id", entityID},
{"type", "branched"},
{"src_method", "man"},
{"pdbx_description", entityName},
{"formula_weight", branch.weight()}});
auto &pdbx_entity_branch_list = m_db["pdbx_entity_branch_list"];
for (auto &sugar : branch)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment