Commit 58f1b626 by Maarten L. Hekkelman

change getResidue

parent c104a08e
......@@ -25,7 +25,7 @@
cmake_minimum_required(VERSION 3.16)
# set the project name
project(cifpp VERSION 3.0.5 LANGUAGES CXX)
project(cifpp VERSION 4.0.0 LANGUAGES CXX)
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
......
Version 4.0.0
- getResidue in mmcif::Structure now requires both a
sequence ID and an auth sequence ID. Very unfortunate.
Version 3.0.5
- mmcif::Structure redesign. It is now a wrapper around a cif::Datablock.
......
......@@ -296,7 +296,7 @@ class Residue
public:
// constructor
Residue(const Structure &structure, const std::string &compoundID,
const std::string &asymID, int seqID = 0, const std::string &authSeqID = {})
const std::string &asymID, int seqID, const std::string &authSeqID)
: mStructure(&structure)
, mCompoundID(compoundID)
, mAsymID(asymID)
......@@ -383,9 +383,6 @@ class Residue
const Structure *mStructure = nullptr;
std::string mCompoundID, mAsymID;
int mSeqID = 0;
// Watch out, this is used only to label waters... The rest of the code relies on
// MapLabelToAuth to get this info. Perhaps we should rename this member field.
std::string mAuthSeqID;
AtomView mAtoms;
};
......@@ -625,26 +622,41 @@ class Structure
/// \brief Return the atom closest to point \a p with atom type \a type in a residue of type \a res_type
Atom getAtomByPositionAndType(Point p, std::string_view type, std::string_view res_type) const;
/// \brief Get a residue, if \a seqID is zero, the non-polymers are searched
const Residue &getResidue(const std::string &asymID, const std::string &compID, int seqID = 0) const;
/// \brief Get a residue for an asym with id \a asymID seq id \a seqID and authSeqID \a authSeqID
Residue &getResidue(const std::string &asymID, int seqID, const std::string &authSeqID);
/// \brief Get a residue, if \a seqID is zero, the non-polymers are searched
Residue &getResidue(const std::string &asymID, const std::string &compID, int seqID = 0);
/// \brief Get a the single residue for an asym with id \a asymID seq id \a seqID and authSeqID \a authSeqID
const Residue &getResidue(const std::string &asymID, int seqID, const std::string &authSeqID) const
{
return const_cast<Structure*>(this)->getResidue(asymID, seqID, authSeqID);
}
/// \brief Get a the single residue for an asym with id \a asymID
const Residue &getResidue(const std::string &asymID) const;
/// \brief Get a residue for an asym with id \a asymID, compound id \a compID, seq id \a seqID and authSeqID \a authSeqID
Residue &getResidue(const std::string &asymID, const std::string &compID, int seqID, const std::string &authSeqID);
/// \brief Get a the single residue for an asym with id \a asymID and seq id \a seqID
const Residue &getResidue(const std::string &asymID, int seqID) const;
/// \brief Get a residue for an asym with id \a asymID, compound id \a compID, seq id \a seqID and authSeqID \a authSeqID
const Residue &getResidue(const std::string &asymID, const std::string &compID, int seqID, const std::string &authSeqID) const
{
return const_cast<Structure*>(this)->getResidue(asymID, compID, seqID, authSeqID);
}
/// \brief Get a the single residue for an asym with id \a asymID
Residue &getResidue(const std::string &asymID);
/// \brief Get a the single residue for an asym with id \a asymID
const Residue &getResidue(const std::string &asymID) const
{
return const_cast<Structure*>(this)->getResidue(asymID);
}
/// \brief Get a the residue for atom \a atom
Residue &getResidue(const mmcif::Atom &atom);
/// \brief Get a the residue for atom \a atom
const Residue &getResidue(const mmcif::Atom &atom) const;
const Residue &getResidue(const mmcif::Atom &atom) const
{
return const_cast<Structure*>(this)->getResidue(atom);
}
// map between auth and label locations
......@@ -697,11 +709,14 @@ class Structure
/// \brief Create a new (sugar) branch with one first NAG containing atoms \a nag_atoms
Branch& createBranch(std::vector<std::vector<cif::Item>> &nag_atoms);
/// \brief Extend an existing (sugar) branch identified by \a asymID with one sugar containing atoms \a atoms
Branch& extendBranch(const std::string &asym_id, std::vector<std::vector<cif::Item>> &atoms);
/// \brief Remove a residue, can be monomer or nonpoly
///
/// \param asym_id The asym ID
/// \param seq_id The sequence ID
void removeResidue(const std::string &sym_id, int seq_id);
void removeResidue(const std::string &asym_id, int seq_id);
/// \brief To sort the atoms in order of model > asym-id > res-id > atom-id
/// Will asssign new atom_id's to all atoms. Be carefull
......
......@@ -255,10 +255,10 @@ BondMap::BondMap(const Structure &p)
cif::Progress progress(compounds.size(), "Creating bond map");
// some helper indices to speed things up a bit
std::map<std::tuple<std::string, int, std::string>, std::string> atomMapByAsymSeqAndAtom;
std::map<std::tuple<std::string, int, std::string, std::string>, std::string> atomMapByAsymSeqAndAtom;
for (auto &a : p.atoms())
{
auto key = make_tuple(a.labelAsymID(), a.labelSeqID(), a.labelAtomID());
auto key = make_tuple(a.labelAsymID(), a.labelSeqID(), a.labelAtomID(), a.authSeqID());
atomMapByAsymSeqAndAtom[key] = a.id();
}
......@@ -280,8 +280,8 @@ BondMap::BondMap(const Structure &p)
continue;
}
auto c = atomMapByAsymSeqAndAtom[make_tuple(asymID, lastSeqID, "C")];
auto n = atomMapByAsymSeqAndAtom[make_tuple(asymID, seqID, "N")];
auto c = atomMapByAsymSeqAndAtom[make_tuple(asymID, lastSeqID, "C", "")];
auto n = atomMapByAsymSeqAndAtom[make_tuple(asymID, seqID, "N", "")];
if (not(c.empty() or n.empty()))
bindAtoms(c, n);
......@@ -293,14 +293,16 @@ BondMap::BondMap(const Structure &p)
{
std::string asym1, asym2, atomId1, atomId2;
int seqId1 = 0, seqId2 = 0;
cif::tie(asym1, asym2, atomId1, atomId2, seqId1, seqId2) =
std::string authSeqId1, authSeqId2;
cif::tie(asym1, asym2, atomId1, atomId2, seqId1, seqId2, authSeqId1, authSeqId2) =
l.get("ptnr1_label_asym_id", "ptnr2_label_asym_id",
"ptnr1_label_atom_id", "ptnr2_label_atom_id",
"ptnr1_label_seq_id", "ptnr2_label_seq_id");
std::string a = atomMapByAsymSeqAndAtom[make_tuple(asym1, seqId1, atomId1)];
std::string b = atomMapByAsymSeqAndAtom[make_tuple(asym2, seqId2, atomId2)];
"ptnr1_label_seq_id", "ptnr2_label_seq_id",
"ptnr1_auth_seq_id", "ptnr2_auth_seq_id");
std::string a = atomMapByAsymSeqAndAtom[make_tuple(asym1, seqId1, atomId1, authSeqId1)];
std::string b = atomMapByAsymSeqAndAtom[make_tuple(asym2, seqId2, atomId2, authSeqId2)];
if (not(a.empty() or b.empty()))
linkAtoms(a, b);
}
......@@ -373,15 +375,12 @@ BondMap::BondMap(const Structure &p)
}
// loop over pdbx_branch_scheme
for (auto r : db["pdbx_branch_scheme"].find(cif::Key("mon_id") == c))
for (const auto &[asym_id, pdb_seq_num] : db["pdbx_branch_scheme"].find<std::string,std::string>(cif::Key("mon_id") == c, "asym_id", "pdb_seq_num"))
{
std::string asymID;
cif::tie(asymID) = r.get("asym_id");
std::vector<Atom> rAtoms;
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
[&](auto &a)
{ return a.labelAsymID() == asymID; });
[&](const Atom &a)
{ return a.labelAsymID() == asym_id and a.authSeqID() == pdb_seq_num; });
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
{
......
......@@ -28,13 +28,19 @@
#include <filesystem>
#include <fstream>
#include <iomanip>
#include <numeric>
#include <boost/algorithm/string.hpp>
#include <boost/format.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#if __cpp_lib_format
#include <format>
#else
#include <boost/format.hpp>
#endif
#include "cif++/Cif2PDB.hpp"
#include "cif++/CifParser.hpp"
#include "cif++/PDB2Cif.hpp"
......@@ -144,14 +150,18 @@ int Atom::AtomImpl::charge() const
{
auto &compound = comp();
for (auto cAtom : compound.atoms())
{
if (cAtom.id != mAtomID)
continue;
formalCharge = cAtom.charge;
break;
}
if (compound.atoms().size() == 1)
formalCharge = compound.atoms().front().charge;
// {
// for (auto cAtom : compound.atoms())
// {
// if (cAtom.id != mAtomID)
// continue;
// formalCharge = cAtom.charge;
// break;
// }
// }
}
return formalCharge.value_or(0);
......@@ -165,9 +175,15 @@ void Atom::AtomImpl::moveTo(const Point &p)
if (not mClone)
{
mRow.assign("Cartn_x", std::to_string(p.getX()), true, false);
mRow.assign("Cartn_y", std::to_string(p.getY()), true, false);
mRow.assign("Cartn_z", std::to_string(p.getZ()), true, false);
#if __cpp_lib_format
mRow.assign("Cartn_x", std::format("{:.3f}", p.getX()), true, false);
mRow.assign("Cartn_y", std::format("{:.3f}", p.getY()), true, false);
mRow.assign("Cartn_z", std::format("{:.3f}", p.getZ()), true, false);
#else
mRow.assign("Cartn_x", (boost::format("%.3f") % p.getX()).str(), true, false);
mRow.assign("Cartn_y", (boost::format("%.3f") % p.getY()).str(), true, false);
mRow.assign("Cartn_z", (boost::format("%.3f") % p.getZ()).str(), true, false);
#endif
}
mLocation = p;
......@@ -200,7 +216,7 @@ const std::string Atom::AtomImpl::get_property(const std::string_view name) cons
return ref.as<std::string>();
}
mCachedRefs.emplace_back(name, mRow[name]);
mCachedRefs.emplace_back(name, const_cast<cif::Row&>(mRow)[name]);
return std::get<1>(mCachedRefs.back()).as<std::string>();
}
......@@ -440,21 +456,7 @@ std::string Residue::authAsymID() const
std::string Residue::authSeqID() const
{
assert(mStructure);
std::string result;
try
{
int seqID;
tie(std::ignore, seqID, std::ignore) = mStructure->MapLabelToAuth(mAsymID, mSeqID);
result = std::to_string(seqID);
}
catch (...)
{
}
return result;
return mAuthSeqID;
}
const Compound &Residue::compound() const
......@@ -1453,10 +1455,7 @@ void Structure::loadData()
cif::tie(asymID, monID, pdbSeqNum) =
r.get("asym_id", "mon_id", "pdb_seq_num");
if (monID == "HOH")
mNonPolymers.emplace_back(*this, monID, asymID, 0, pdbSeqNum);
else if (mNonPolymers.empty() or mNonPolymers.back().asymID() != asymID)
mNonPolymers.emplace_back(*this, monID, asymID);
mNonPolymers.emplace_back(*this, monID, asymID, 0, pdbSeqNum);
}
// place atoms in residues
......@@ -1659,8 +1658,30 @@ Atom Structure::getAtomByPositionAndType(Point p, std::string_view type, std::st
return {};
}
const Residue &Structure::getResidue(const std::string &asymID, const std::string &compID, int seqID) const
Residue &Structure::getResidue(const std::string &asymID)
{
for (auto &res : mNonPolymers)
{
if (res.asymID() != asymID)
continue;
return res;
}
throw std::out_of_range("Could not find residue " + asymID);
}
Residue &Structure::getResidue(const std::string &asymID, int seqID, const std::string &authSeqID)
{
if (seqID == 0)
{
for (auto &res : mNonPolymers)
{
if (res.asymID() == asymID and res.authSeqID() == authSeqID)
return res;
}
}
for (auto &poly : mPolymers)
{
if (poly.asymID() != asymID)
......@@ -1668,7 +1689,7 @@ const Residue &Structure::getResidue(const std::string &asymID, const std::strin
for (auto &res : poly)
{
if (res.seqID() == seqID and res.compoundID() == compID)
if (res.seqID() == seqID)
return res;
}
}
......@@ -1680,53 +1701,22 @@ const Residue &Structure::getResidue(const std::string &asymID, const std::strin
for (auto &sugar : branch)
{
if (sugar.asymID() == asymID and sugar.compoundID() == compID and sugar.num() == seqID)
if (sugar.asymID() == asymID and sugar.authSeqID() == authSeqID)
return sugar;
}
}
if (seqID == 0)
{
for (auto &res : mNonPolymers)
{
if (res.asymID() != asymID or res.compoundID() != compID)
continue;
return res;
}
}
throw std::out_of_range("Could not find residue " + asymID + '/' + std::to_string(seqID));
}
Residue &Structure::getResidue(const std::string &asymID, const std::string &compID, int seqID)
{
return const_cast<Residue &>(const_cast<Structure const &>(*this).getResidue(asymID, compID, seqID));
}
const Residue &Structure::getResidue(const std::string &asymID) const
{
for (auto &res : mNonPolymers)
{
if (res.asymID() != asymID)
continue;
return res;
}
throw std::out_of_range("Could not find residue " + asymID);
throw std::out_of_range("Could not find residue " + asymID + '/' + std::to_string(seqID) + '-' + authSeqID);
}
const Residue &Structure::getResidue(const std::string &asymID, int seqID) const
Residue &Structure::getResidue(const std::string &asymID, const std::string &compID, int seqID, const std::string &authSeqID)
{
if (seqID == 0)
{
for (auto &res : mNonPolymers)
{
if (res.asymID() != asymID)
continue;
return res;
if (res.asymID() == asymID and res.authSeqID() == authSeqID and res.compoundID() == compID)
return res;
}
}
......@@ -1737,7 +1727,7 @@ const Residue &Structure::getResidue(const std::string &asymID, int seqID) const
for (auto &res : poly)
{
if (res.seqID() == seqID)
if (res.seqID() == seqID and res.compoundID() == compID)
return res;
}
}
......@@ -1749,17 +1739,12 @@ const Residue &Structure::getResidue(const std::string &asymID, int seqID) const
for (auto &sugar : branch)
{
if (sugar.asymID() == asymID and sugar.num() == seqID)
if (sugar.asymID() == asymID and sugar.authSeqID() == authSeqID and sugar.compoundID() == compID)
return sugar;
}
}
throw std::out_of_range("Could not find residue " + asymID + '/' + std::to_string(seqID));
}
Residue &Structure::getResidue(const std::string &asymID)
{
return const_cast<Residue &>(const_cast<Structure const &>(*this).getResidue(asymID));
throw std::out_of_range("Could not find residue " + asymID + '/' + std::to_string(seqID) + '-' + authSeqID);
}
Residue &Structure::getResidue(const mmcif::Atom &atom)
......@@ -1832,11 +1817,6 @@ Residue &Structure::getResidue(const mmcif::Atom &atom)
throw std::out_of_range("Could not find residue " + asymID);
}
const Residue &Structure::getResidue(const mmcif::Atom &atom) const
{
return const_cast<Structure*>(this)->getResidue(atom.labelAsymID(), atom.labelCompID(), atom.labelSeqID());
}
std::tuple<char, int, char> Structure::MapLabelToAuth(
const std::string &asymID, int seqID) const
{
......@@ -2249,7 +2229,7 @@ std::string Structure::createNonpoly(const std::string &entity_id, const std::ve
auto &atom_site = db["atom_site"];
auto &res = mNonPolymers.emplace_back(*this, comp_id, asym_id);
auto &res = mNonPolymers.emplace_back(*this, comp_id, asym_id, 0, "1");
for (auto &atom : atoms)
{
......@@ -2320,7 +2300,7 @@ std::string Structure::createNonpoly(const std::string &entity_id, std::vector<s
auto &atom_site = db["atom_site"];
auto &res = mNonPolymers.emplace_back(*this, comp_id, asym_id);
auto &res = mNonPolymers.emplace_back(*this, comp_id, asym_id, 0, "1");
auto appendUnlessSet = [](std::vector<cif::Item> &ai, cif::Item &&i)
{
......@@ -2414,7 +2394,7 @@ Branch& Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
appendUnlessSet(atom, { "id", atom_id} );
appendUnlessSet(atom, { "label_comp_id", "NAG"} );
appendUnlessSet(atom, { "label_asym_id", asym_id} );
appendUnlessSet(atom, { "label_seq_id", ""} );
appendUnlessSet(atom, { "label_seq_id", "."} );
appendUnlessSet(atom, { "label_entity_id", entity_id} );
appendUnlessSet(atom, { "auth_comp_id", "NAG"} );
appendUnlessSet(atom, { "auth_asym_id", asym_id} );
......@@ -2437,40 +2417,115 @@ Branch& Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
{ "pdb_asym_id", asym_id },
{ "pdb_seq_num", 1 },
{ "pdb_mon_id", "NAG" },
// TODO: need fix, collect from nag_atoms?
{ "auth_asym_id", asym_id },
{ "auth_mon_id", "NAG" },
{ "auth_seq_num", 1 },
{ "hetero", "n" }
});
return branch;
}
Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vector<cif::Item>> &atoms)
{
// sanity check
std::string compoundID;
for (auto &atom : atoms)
{
for (auto info : atom)
{
if (info.name() != "label_comp_id")
continue;
if (compoundID.empty())
compoundID = info.value();
else if (info.value() != compoundID)
throw std::logic_error("All atoms should be of the same type");
}
}
using namespace cif::literals;
cif::Datablock &db = datablock();
auto bi = std::find_if(mBranches.begin(), mBranches.end(), [asym_id](Branch &b) { return b.asymID() == asym_id; });
if (bi == mBranches.end())
throw std::logic_error("Create a branch first!");
Branch &branch = *bi;
// auto &branch = mBranches.emplace_back(*this, asym_id);
int sugarNum = branch.size() + 1;
auto &sugar = branch.emplace_back(branch, compoundID, asym_id, sugarNum);
auto entity_id = createEntityForBranch(branch);
// db["pdbx_entity_branch"].emplace({
// { "entity_id", entityID },
// { "type", "oligosaccharide" }
// });
for (auto &sugar : branch)
{
for (auto atom : sugar.atoms())
atom.set_property("label_entity_id", entity_id);
}
auto &atom_site = db["atom_site"];
auto appendUnlessSet = [](std::vector<cif::Item> &ai, cif::Item &&i)
{
if (find_if(ai.begin(), ai.end(), [name = i.name()](cif::Item &ci) { return ci.name() == name; }) == ai.end())
ai.emplace_back(std::move(i));
};
for (auto &atom : atoms)
{
auto atom_id = atom_site.getUniqueID("");
appendUnlessSet(atom, { "group_PDB", "HETATM"} );
appendUnlessSet(atom, { "id", atom_id} );
appendUnlessSet(atom, { "label_comp_id", compoundID} );
appendUnlessSet(atom, { "label_asym_id", asym_id} );
appendUnlessSet(atom, { "label_seq_id", "."} );
appendUnlessSet(atom, { "label_entity_id", entity_id} );
appendUnlessSet(atom, { "auth_comp_id", compoundID} );
appendUnlessSet(atom, { "auth_asym_id", asym_id} );
appendUnlessSet(atom, { "auth_seq_id", sugarNum} );
appendUnlessSet(atom, { "pdbx_PDB_model_num", 1} );
appendUnlessSet(atom, { "label_alt_id", ""} );
// for (std::size_t i = 0; i < st.size(); ++i)
// {
// db["pdbx_entity_branch_list"].emplace({
// { "entity_id", entityID },
// { "comp_id", st[i].compID },
// { "num", i + 1 },
// { "hetero", "n" }
// });
// }
auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end());
auto &newAtom = mAtoms.emplace_back(std::make_shared<Atom::AtomImpl>(db, atom_id, row));
sugar.addAtom(newAtom);
}
// for (std::size_t i = 0; i + 1 < st.size(); ++i)
// {
// auto linkID = db["pdbx_entity_branch_link"].getUniqueID("");
auto &pdbx_branch_scheme = db["pdbx_branch_scheme"];
pdbx_branch_scheme.erase("asym_id"_key == asym_id);
// db["pdbx_entity_branch_link"].emplace({
// { "link_id", linkID },
// { "atom_id_1", "C1" }
for (auto &sugar : branch)
{
pdbx_branch_scheme.emplace({
{ "asym_id", asym_id },
{ "entity_id", entity_id },
{ "num", sugar.num() },
{ "mon_id", sugar.compoundID() },
// });
// }
// }
{ "pdb_asym_id", asym_id },
{ "pdb_seq_num", sugar.num() },
{ "pdb_mon_id", sugar.compoundID() },
// TODO: need fix, collect from nag_atoms?
{ "auth_asym_id", asym_id },
{ "auth_mon_id", sugar.compoundID() },
{ "auth_seq_num", sugar.num() },
{ "hetero", "n" }
});
}
// return asym_id;
return branch;
}
std::string Structure::createEntityForBranch(Branch &branch)
......@@ -2501,6 +2556,19 @@ std::string Structure::createEntityForBranch(Branch &branch)
});
}
auto &pdbx_entity_branch_list = mDb["pdbx_entity_branch_list"];
pdbx_entity_branch_list.erase("entity_id"_key == entityID);
for (auto &sugar : branch)
{
pdbx_entity_branch_list.emplace({
{ "entity_id", entityID },
{ "comp_id", sugar.compoundID() },
{ "num", sugar.num() },
{ "hetero", "n" }
});
}
return entityID;
}
......
......@@ -32,7 +32,7 @@ int main(int argc, char* argv[])
mmcif::File f(testdir / ".."/"examples"/"1cbs.cif.gz");
mmcif::Structure structure(f);
auto &res = structure.getResidue("B", "REA");
auto &res = structure.getResidue("B");
structure.changeResidue(res, "RXA", {});
structure.cleanupEmptyCategories();
......
......@@ -123,8 +123,8 @@ _pdbx_nonpoly_scheme.asym_id A
_pdbx_nonpoly_scheme.ndb_seq_num 1
_pdbx_nonpoly_scheme.entity_id 1
_pdbx_nonpoly_scheme.mon_id HEM
_pdbx_nonpoly_scheme.pdb_seq_num 0
_pdbx_nonpoly_scheme.auth_seq_num 0
_pdbx_nonpoly_scheme.pdb_seq_num 1
_pdbx_nonpoly_scheme.auth_seq_num 1
_pdbx_nonpoly_scheme.pdb_mon_id HEM
_pdbx_nonpoly_scheme.auth_mon_id HEM
_pdbx_nonpoly_scheme.pdb_strand_id A
......
......@@ -1634,7 +1634,7 @@ PRO OXT HXT SING N N 17
for (const auto &[compound, seqnr] : std::initializer_list<std::tuple<std::string, int>>{{"PRO", 1}, {"ASN", 2}, {"PHE", 3}})
{
auto &res = structure.getResidue("A", compound, seqnr);
auto &res = structure.getResidue("A", compound, seqnr, "");
auto atoms = res.atoms();
auto dc = components.get(compound);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment