Commit af125bdd by Maarten L. Hekkelman

backup

parent 79089bbb
......@@ -394,6 +394,7 @@ if(CIFPP_BUILD_TESTS)
# pdb2cif
rename-compound
structure
sugar
unit)
foreach(CIFPP_TEST IN LISTS CIFPP_tests)
......
......@@ -2394,3 +2394,189 @@ VAL "Create component" 1999-07-08 RCSB
VAL "Modify descriptor" 2011-06-04 RCSB
#
data_NAG
#
_chem_comp.id NAG
_chem_comp.name 2-acetamido-2-deoxy-beta-D-glucopyranose
_chem_comp.type "D-saccharide, beta linking"
_chem_comp.pdbx_type ATOMS
_chem_comp.formula "C8 H15 N O6"
_chem_comp.mon_nstd_parent_comp_id ?
_chem_comp.pdbx_synonyms
;N-acetyl-beta-D-glucosamine; 2-acetamido-2-deoxy-beta-D-glucose; 2-acetamido-2-deoxy-D-glucose;
2-acetamido-2-deoxy-glucose; N-ACETYL-D-GLUCOSAMINE
;
_chem_comp.pdbx_formal_charge 0
_chem_comp.pdbx_initial_date 1999-07-08
_chem_comp.pdbx_modified_date 2020-07-17
_chem_comp.pdbx_ambiguous_flag N
_chem_comp.pdbx_release_status REL
_chem_comp.pdbx_replaced_by ?
_chem_comp.pdbx_replaces ?
_chem_comp.formula_weight 221.208
_chem_comp.one_letter_code ?
_chem_comp.three_letter_code NAG
_chem_comp.pdbx_model_coordinates_details ?
_chem_comp.pdbx_model_coordinates_missing_flag N
_chem_comp.pdbx_ideal_coordinates_details Corina
_chem_comp.pdbx_ideal_coordinates_missing_flag N
_chem_comp.pdbx_model_coordinates_db_code 8PCH
_chem_comp.pdbx_subcomponent_list ?
_chem_comp.pdbx_processing_site RCSB
# #
loop_
_pdbx_chem_comp_synonyms.ordinal
_pdbx_chem_comp_synonyms.comp_id
_pdbx_chem_comp_synonyms.name
_pdbx_chem_comp_synonyms.provenance
_pdbx_chem_comp_synonyms.type
1 NAG N-acetyl-beta-D-glucosamine PDB ?
2 NAG 2-acetamido-2-deoxy-beta-D-glucose PDB ?
3 NAG 2-acetamido-2-deoxy-D-glucose PDB ?
4 NAG 2-acetamido-2-deoxy-glucose PDB ?
5 NAG N-ACETYL-D-GLUCOSAMINE PDB ?
# #
loop_
_chem_comp_atom.comp_id
_chem_comp_atom.atom_id
_chem_comp_atom.alt_atom_id
_chem_comp_atom.type_symbol
_chem_comp_atom.charge
_chem_comp_atom.pdbx_align
_chem_comp_atom.pdbx_aromatic_flag
_chem_comp_atom.pdbx_leaving_atom_flag
_chem_comp_atom.pdbx_stereo_config
_chem_comp_atom.model_Cartn_x
_chem_comp_atom.model_Cartn_y
_chem_comp_atom.model_Cartn_z
_chem_comp_atom.pdbx_model_Cartn_x_ideal
_chem_comp_atom.pdbx_model_Cartn_y_ideal
_chem_comp_atom.pdbx_model_Cartn_z_ideal
_chem_comp_atom.pdbx_component_atom_id
_chem_comp_atom.pdbx_component_comp_id
_chem_comp_atom.pdbx_ordinal
NAG C1 C1 C 0 1 N N R 7.396 28.163 26.662 0.185 1.082 -0.421 C1 NAG 1
NAG C2 C2 C 0 1 N N R 6.973 29.233 27.644 0.790 -0.220 0.112 C2 NAG 2
NAG C3 C3 C 0 1 N N R 7.667 29.055 29.000 -0.124 -1.390 -0.265 C3 NAG 3
NAG C4 C4 C 0 1 N N S 7.573 27.588 29.490 -1.526 -1.129 0.294 C4 NAG 4
NAG C5 C5 C 0 1 N N R 7.902 26.592 28.373 -2.042 0.207 -0.246 C5 NAG 5
NAG C6 C6 C 0 1 N N N 7.599 25.173 28.797 -3.417 0.504 0.355 C6 NAG 6
NAG C7 C7 C 0 1 N N N 6.291 31.299 26.595 3.197 0.157 0.076 C7 NAG 7
NAG C8 C8 C 0 1 N N N 6.684 32.649 26.036 4.559 -0.052 -0.533 C8 NAG 8
NAG N2 N2 N 0 1 N N N 7.268 30.545 27.089 2.114 -0.422 -0.480 N2 NAG 9
NAG O1 O1 O 0 1 N Y N 6.676 28.363 25.419 1.003 2.185 -0.024 O1 NAG 10
NAG O3 O3 O 0 1 N N N 7.038 29.909 29.947 0.395 -2.600 0.291 O3 NAG 11
NAG O4 O4 O 0 1 N N N 8.494 27.358 30.574 -2.405 -2.180 -0.114 O4 NAG 12
NAG O5 O5 O 0 1 N N N 7.104 26.875 27.206 -1.130 1.248 0.113 O5 NAG 13
NAG O6 O6 O 0 1 N N N 6.232 25.040 29.165 -3.949 1.691 -0.236 O6 NAG 14
NAG O7 O7 O 0 1 N N N 5.114 30.936 26.562 3.074 0.845 1.067 O7 NAG 15
NAG H1 H1 H 0 1 N N N 8.477 28.257 26.481 0.133 1.040 -1.509 H1 NAG 16
NAG H2 H2 H 0 1 N N N 5.888 29.146 27.803 0.879 -0.163 1.197 H2 NAG 17
NAG H3 H3 H 0 1 N N N 8.729 29.321 28.892 -0.174 -1.478 -1.350 H3 NAG 18
NAG H4 H4 H 0 1 N N N 6.544 27.403 29.831 -1.483 -1.091 1.382 H4 NAG 19
NAG H5 H5 H 0 1 N N N 8.971 26.674 28.128 -2.123 0.154 -1.332 H5 NAG 20
NAG H61 H61 H 0 1 N N N 7.816 24.492 27.961 -4.088 -0.333 0.157 H61 NAG 21
NAG H62 H62 H 0 1 N N N 8.232 24.910 29.657 -3.320 0.645 1.431 H62 NAG 22
NAG H81 H81 H 0 1 N N N 5.791 33.159 25.646 4.560 0.320 -1.558 H81 NAG 23
NAG H82 H82 H 0 1 N N N 7.136 33.258 26.833 5.305 0.490 0.050 H82 NAG 24
NAG H83 H83 H 0 1 N N N 7.411 32.511 25.222 4.799 -1.115 -0.532 H83 NAG 25
NAG HN2 HN2 H 0 1 N N N 8.210 30.881 27.079 2.212 -0.973 -1.273 HN2 NAG 26
NAG HO1 HO1 H 0 1 N Y N 6.933 27.696 24.793 0.679 3.044 -0.328 HO1 NAG 27
NAG HO3 HO3 H 0 1 N Y N 7.459 29.809 30.793 -0.135 -3.384 0.091 HO3 NAG 28
NAG HO4 HO4 H 0 1 N Y N 8.425 26.456 30.863 -3.312 -2.079 0.206 HO4 NAG 29
NAG HO6 HO6 H 0 1 N Y N 6.060 24.143 29.428 -4.822 1.940 0.099 HO6 NAG 30
# #
loop_
_chem_comp_bond.comp_id
_chem_comp_bond.atom_id_1
_chem_comp_bond.atom_id_2
_chem_comp_bond.value_order
_chem_comp_bond.pdbx_aromatic_flag
_chem_comp_bond.pdbx_stereo_config
_chem_comp_bond.pdbx_ordinal
NAG C1 C2 SING N N 1
NAG C1 O1 SING N N 2
NAG C1 O5 SING N N 3
NAG C1 H1 SING N N 4
NAG C2 C3 SING N N 5
NAG C2 N2 SING N N 6
NAG C2 H2 SING N N 7
NAG C3 C4 SING N N 8
NAG C3 O3 SING N N 9
NAG C3 H3 SING N N 10
NAG C4 C5 SING N N 11
NAG C4 O4 SING N N 12
NAG C4 H4 SING N N 13
NAG C5 C6 SING N N 14
NAG C5 O5 SING N N 15
NAG C5 H5 SING N N 16
NAG C6 O6 SING N N 17
NAG C6 H61 SING N N 18
NAG C6 H62 SING N N 19
NAG C7 C8 SING N N 20
NAG C7 N2 SING N N 21
NAG C7 O7 DOUB N N 22
NAG C8 H81 SING N N 23
NAG C8 H82 SING N N 24
NAG C8 H83 SING N N 25
NAG N2 HN2 SING N N 26
NAG O1 HO1 SING N N 27
NAG O3 HO3 SING N N 28
NAG O4 HO4 SING N N 29
NAG O6 HO6 SING N N 30
# #
loop_
_pdbx_chem_comp_descriptor.comp_id
_pdbx_chem_comp_descriptor.type
_pdbx_chem_comp_descriptor.program
_pdbx_chem_comp_descriptor.program_version
_pdbx_chem_comp_descriptor.descriptor
NAG SMILES ACDLabs 12.01 "O=C(NC1C(O)C(O)C(OC1O)CO)C"
NAG InChI InChI 1.03 "InChI=1S/C8H15NO6/c1-3(11)9-5-7(13)6(12)4(2-10)15-8(5)14/h4-8,10,12-14H,2H2,1H3,(H,9,11)/t4-,5-,6-,7-,8-/m1/s1"
NAG InChIKey InChI 1.03 OVRNDRQMDRJTHS-FMDGEEDCSA-N
NAG SMILES_CANONICAL CACTVS 3.370 "CC(=O)N[C@H]1[C@H](O)O[C@H](CO)[C@@H](O)[C@@H]1O"
NAG SMILES CACTVS 3.370 "CC(=O)N[CH]1[CH](O)O[CH](CO)[CH](O)[CH]1O"
NAG SMILES_CANONICAL "OpenEye OEToolkits" 1.7.6 "CC(=O)N[C@@H]1[C@H]([C@@H]([C@H](O[C@H]1O)CO)O)O"
NAG SMILES "OpenEye OEToolkits" 1.7.6 "CC(=O)NC1C(C(C(OC1O)CO)O)O"
# #
loop_
_pdbx_chem_comp_identifier.comp_id
_pdbx_chem_comp_identifier.type
_pdbx_chem_comp_identifier.program
_pdbx_chem_comp_identifier.program_version
_pdbx_chem_comp_identifier.identifier
NAG "SYSTEMATIC NAME" ACDLabs 12.01 "2-(acetylamino)-2-deoxy-beta-D-glucopyranose"
NAG "SYSTEMATIC NAME" "OpenEye OEToolkits" 1.7.6 "N-[(2R,3R,4R,5S,6R)-6-(hydroxymethyl)-2,4,5-tris(oxidanyl)oxan-3-yl]ethanamide"
NAG "CONDENSED IUPAC CARBOHYDRATE SYMBOL" GMML 1.0 DGlcpNAcb
NAG "COMMON NAME" GMML 1.0 N-acetyl-b-D-glucopyranosamine
NAG "IUPAC CARBOHYDRATE SYMBOL" PDB-CARE 1.0 b-D-GlcpNAc
NAG "SNFG CARBOHYDRATE SYMBOL" GMML 1.0 GlcNAc
# #
loop_
_pdbx_chem_comp_feature.comp_id
_pdbx_chem_comp_feature.type
_pdbx_chem_comp_feature.value
_pdbx_chem_comp_feature.source
_pdbx_chem_comp_feature.support
NAG "CARBOHYDRATE ISOMER" D PDB ?
NAG "CARBOHYDRATE RING" pyranose PDB ?
NAG "CARBOHYDRATE ANOMER" beta PDB ?
NAG "CARBOHYDRATE PRIMARY CARBONYL GROUP" aldose PDB ?
# #
loop_
_pdbx_chem_comp_audit.comp_id
_pdbx_chem_comp_audit.action_type
_pdbx_chem_comp_audit.date
_pdbx_chem_comp_audit.processing_site
NAG "Create component" 1999-07-08 RCSB
NAG "Modify descriptor" 2011-06-04 RCSB
NAG "Modify leaving atom flag" 2011-07-01 RCSB
NAG "Modify leaving atom flag" 2012-11-26 RCSB
NAG "Other modification" 2019-08-12 RCSB
NAG "Other modification" 2019-12-19 RCSB
NAG "Other modification" 2020-07-03 RCSB
NAG "Modify name" 2020-07-17 RCSB
NAG "Modify synonyms" 2020-07-17 RCSB
##
\ No newline at end of file
......@@ -214,6 +214,10 @@ class Atom
std::string pdbID() const; // auth_comp_id + '_' + auth_asym_id + '_' + auth_seq_id + pdbx_PDB_ins_code
bool operator==(const Atom &rhs) const;
bool operator!=(const Atom &rhs) const
{
return not operator==(rhs);
}
// access data in compound for this atom
......@@ -238,6 +242,13 @@ class Atom
friend std::ostream &operator<<(std::ostream &os, const Atom &atom);
/// \brief Synchronize data with underlying cif data
void sync()
{
if (mImpl)
mImpl->prefetch();
}
private:
friend class Structure;
......@@ -289,6 +300,13 @@ typedef std::vector<Atom> AtomView;
// --------------------------------------------------------------------
enum class EntityType
{
Polymer, NonPolymer, Macrolide, Water, Branched
};
// --------------------------------------------------------------------
class Residue
{
public:
......@@ -316,10 +334,7 @@ class Residue
AtomView &atoms();
const AtomView &atoms() const;
void addAtom(const Atom &atom)
{
mAtoms.push_back(atom);
}
void addAtom(Atom &atom);
/// \brief Unique atoms returns only the atoms without alternates and the first of each alternate atom id.
AtomView unique_atoms() const;
......@@ -336,6 +351,8 @@ class Residue
int seqID() const { return mSeqID; }
std::string entityID() const;
EntityType entityType() const;
std::string authAsymID() const;
std::string authSeqID() const;
std::string authInsCode() const;
......@@ -373,6 +390,11 @@ class Residue
friend Structure;
bool operator==(const Residue &rhs) const
{
return mStructure == rhs.mStructure and mCompoundID == rhs.mCompoundID and mAsymID == rhs.mAsymID and mSeqID == rhs.mSeqID and mAuthSeqID == rhs.mAuthSeqID;
}
protected:
Residue() {}
......@@ -447,6 +469,11 @@ class Monomer : public Residue
// for LEU and VAL
float chiralVolume() const;
bool operator==(const Monomer &rhs) const
{
return mPolymer == rhs.mPolymer and mIndex == rhs.mIndex;
}
private:
const Polymer *mPolymer;
size_t mIndex;
......@@ -498,10 +525,14 @@ class Sugar : public Residue
int num() const { return std::stoi(mAuthSeqID); }
std::string name() const;
// Sugar &
/// \brief Return the atom the C1 is linked to
Atom getLink() const { return mLink; }
void setLink(Atom link) { mLink = link; }
private:
const Branch &mBranch;
Atom mLink;
};
class Branch : public std::vector<Sugar>
......@@ -509,6 +540,8 @@ class Branch : public std::vector<Sugar>
public:
Branch(Structure &structure, const std::string &asymID);
void linkAtoms();
std::string name() const;
float weight() const;
std::string asymID() const { return mAsymID; }
......@@ -522,6 +555,8 @@ class Branch : public std::vector<Sugar>
private:
friend Sugar;
std::string name(const Sugar &s) const;
Structure *mStructure;
std::string mAsymID;
};
......@@ -592,6 +627,9 @@ class Structure
const AtomView &atoms() const { return mAtoms; }
// AtomView &atoms() { return mAtoms; }
EntityType getEntityTypeForEntityID(const std::string entityID) const;
EntityType getEntityTypeForAsymID(const std::string asymID) const;
AtomView waters() const;
const std::list<Polymer> &polymers() const { return mPolymers; }
......@@ -673,6 +711,18 @@ class Structure
void changeResidue(Residue &res, const std::string &newCompound,
const std::vector<std::tuple<std::string, std::string>> &remappedAtoms);
/// \brief Remove a residue, can be monomer or nonpoly
///
/// \param asym_id The asym ID
/// \param seq_id The sequence ID
void removeResidue(const std::string &asym_id, int seq_id, const std::string &auth_seq_id)
{
removeResidue(getResidue(asym_id, seq_id, auth_seq_id));
}
/// \brief Remove residue \a res, can be monomer or nonpoly
void removeResidue(Residue &res);
/// \brief Create a new non-polymer entity, returns new ID
/// \param mon_id The mon_id for the new nonpoly, must be an existing and known compound from CCD
/// \return The ID of the created entity
......@@ -694,17 +744,20 @@ class Structure
/// \return The newly create asym ID
std::string createNonpoly(const std::string &entity_id, std::vector<std::vector<cif::Item>> &atom_info);
/// \brief Create a new (sugar) branch with one first NAG containing atoms constructed from \a nag_atom_info
Branch &createBranch(std::vector<std::vector<cif::Item>> &nag_atom_info);
/// \brief Create a new (sugar) branch with one first NAG containing atoms \a nag_atoms
Branch &createBranch(std::vector<std::vector<cif::Item>> &nag_atoms);
Branch &createBranch(std::vector<Atom> &nag_atoms);
/// \brief Extend an existing (sugar) branch identified by \a asymID with one sugar containing atoms constructed from \a atom_info
Branch &extendBranch(const std::string &asym_id, std::vector<std::vector<cif::Item>> &atom_info);
/// \brief Extend an existing (sugar) branch identified by \a asymID with one sugar containing atoms \a atoms
Branch &extendBranch(const std::string &asym_id, std::vector<std::vector<cif::Item>> &atoms);
Branch &extendBranch(const std::string &asym_id, std::vector<Atom> &atoms);
/// \brief Remove a residue, can be monomer or nonpoly
///
/// \param asym_id The asym ID
/// \param seq_id The sequence ID
void removeResidue(const std::string &asym_id, int seq_id);
/// \brief Remove \a branch
void removeBranch(Branch &branch);
/// \brief Translate the coordinates of all atoms in the structure by \a t
void translate(Point t);
......
......@@ -276,11 +276,17 @@ BondMap::BondMap(const Structure &p)
continue;
}
auto c = atomMapByAsymSeqAndAtom.at(make_tuple(asymID, lastSeqID, "C", lastAuthSeqID));
auto n = atomMapByAsymSeqAndAtom.at(make_tuple(asymID, seqID, "N", authSeqID));
auto kc = make_tuple(asymID, lastSeqID, "C", lastAuthSeqID);
auto kn = make_tuple(asymID, seqID, "N", authSeqID);
if (atomMapByAsymSeqAndAtom.count(kc) and atomMapByAsymSeqAndAtom.count(kn))
{
auto c = atomMapByAsymSeqAndAtom.at(kc);
auto n = atomMapByAsymSeqAndAtom.at(kn);
if (not(c.empty() or n.empty()))
bindAtoms(c, n);
}
// if (not(c.empty() or n.empty()))
lastSeqID = seqID;
lastAuthSeqID = authSeqID;
......@@ -298,10 +304,21 @@ BondMap::BondMap(const Structure &p)
"ptnr1_label_seq_id", "ptnr2_label_seq_id",
"ptnr1_auth_seq_id", "ptnr2_auth_seq_id");
std::string a = atomMapByAsymSeqAndAtom.at(make_tuple(asym1, seqId1, atomId1, authSeqId1));
std::string b = atomMapByAsymSeqAndAtom.at(make_tuple(asym2, seqId2, atomId2, authSeqId2));
if (not(a.empty() or b.empty()))
auto ka = make_tuple(asym1, seqId1, atomId1, authSeqId1);
auto kb = make_tuple(asym2, seqId2, atomId2, authSeqId2);
if (atomMapByAsymSeqAndAtom.count(ka) and atomMapByAsymSeqAndAtom.count(kb))
{
auto a = atomMapByAsymSeqAndAtom.at(ka);
auto b = atomMapByAsymSeqAndAtom.at(kb);
linkAtoms(a, b);
}
// std::string a = atomMapByAsymSeqAndAtom.at(make_tuple(asym1, seqId1, atomId1, authSeqId1));
// std::string b = atomMapByAsymSeqAndAtom.at(make_tuple(asym2, seqId2, atomId2, authSeqId2));
// if (not(a.empty() or b.empty()))
// linkAtoms(a, b);
}
// then link all atoms in the compounds
......
......@@ -355,8 +355,13 @@ void Atom::translateRotateAndTranslate(Point t1, Quaternion q, Point t2)
bool Atom::operator==(const Atom &rhs) const
{
return mImpl == rhs.mImpl or
(&mImpl->mDb == &rhs.mImpl->mDb and mImpl->mID == rhs.mImpl->mID);
if (mImpl == rhs.mImpl)
return true;
if (not (mImpl and rhs.mImpl))
return false;
return &mImpl->mDb == &rhs.mImpl->mDb and mImpl->mID == rhs.mImpl->mID;
}
std::ostream &operator<<(std::ostream &os, const Atom &atom)
......@@ -410,6 +415,12 @@ std::string Residue::entityID() const
return mAtoms.empty() ? "" : mAtoms.front().labelEntityID();
}
EntityType Residue::entityType() const
{
assert(mStructure);
return mStructure->getEntityTypeForEntityID(entityID());
}
std::string Residue::authInsCode() const
{
assert(mStructure);
......@@ -472,6 +483,16 @@ std::string Residue::unique_alt_id() const
return firstAlt != mAtoms.end() ? firstAlt->labelAltID() : "";
}
void Residue::addAtom(Atom &atom)
{
atom.set_property("label_comp_id", mCompoundID);
atom.set_property("label_asym_id", mAsymID);
atom.set_property("label_seq_id", std::to_string(mSeqID));
atom.set_property("auth_seq_id", mAuthSeqID);
mAtoms.push_back(atom);
}
AtomView Residue::unique_atoms() const
{
if (mStructure == nullptr)
......@@ -1150,57 +1171,96 @@ Branch::Branch(Structure &structure, const std::string &asymID)
using namespace cif::literals;
auto &db = structure.datablock();
auto &struct_asym = db["struct_asym"];
auto &branch_list = db["pdbx_entity_branch_list"];
auto &branch_link = db["pdbx_entity_branch_link"];
for (const auto &[entity_id] : struct_asym.find<std::string>("id"_key == asymID, "entity_id"))
{
auto &pdbx_entity_branch_list = db["pdbx_entity_branch_list"];
for (const auto&[comp_id, num] : pdbx_entity_branch_list.find<std::string,int>(
for (const auto&[comp_id, num] : branch_list.find<std::string,int>(
"entity_id"_key == entity_id, "comp_id", "num"
))
{
emplace_back(*this, comp_id, asymID, num);
}
for (const auto &[num1, num2, atom1, atom2] : branch_link.find<size_t, size_t, std::string, std::string>(
"entity_id"_key == entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"
))
{
if (not cif::iequals(atom1, "c1"))
throw std::runtime_error("invalid pdbx_entity_branch_link");
auto &s1 = at(num1 - 1);
auto &s2 = at(num2 - 1);
s1.setLink(s2.atomByID(atom2));
}
break;
}
}
std::string Branch::name() const
void Branch::linkAtoms()
{
// TODO: calculate
using namespace cif::literals;
return front().name();
auto &db = mStructure->datablock();
auto &branch_link = db["pdbx_entity_branch_link"];
auto entity_id = front().entityID();
for (const auto &[num1, num2, atom1, atom2] : branch_link.find<size_t, size_t, std::string, std::string>(
"entity_id"_key == entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"
))
{
if (not cif::iequals(atom1, "c1"))
throw std::runtime_error("invalid pdbx_entity_branch_link");
auto &s1 = at(num1 - 1);
auto &s2 = at(num2 - 1);
// std::string result;
s1.setLink(s2.atomByID(atom2));
}
}
// for (auto i = begin(); i != end(); ++i)
// {
// if (i->next != sugar->c1)
// continue;
std::string Branch::name() const
{
return empty() ? "" : name(front());
}
// auto n = entityName(i) + "-(1-" + std::to_string(i->leaving_o) + ")";
// if (result.empty())
// result = n;
// else
// result += "-[" + n + ']';
// }
std::string Branch::name(const Sugar &s) const
{
using namespace cif::literals;
// if (not result.empty() and result.back() != ']')
// result += '-';
std::string result;
for (auto &sn : *this)
{
if (not sn.getLink() or sn.getLink().authSeqID() != s.authSeqID())
continue;
// return result;
auto n = name(sn) + "-(1-" + sn.getLink().labelAtomID().substr(1) + ')';
result = result.empty() ? n : result + "-[" + n + ']';
}
if (not result.empty() and result.back() != ']')
result += '-';
return result + s.name();
}
float Branch::weight() const
{
// TODO: calculate
return 0;
return std::accumulate(begin(), end(), 0.f, [](float sum, const Sugar &s)
{
auto compound = mmcif::CompoundFactory::instance().create(s.compoundID());
if (compound)
sum += compound->formulaWeight();
return sum;
});
}
// --------------------------------------------------------------------
......@@ -1465,6 +1525,48 @@ void Structure::loadData()
ri->second->addAtom(atom);
}
for (auto &branch : mBranches)
branch.linkAtoms();
}
EntityType Structure::getEntityTypeForEntityID(const std::string entityID) const
{
using namespace cif::literals;
auto &db = datablock();
auto &entity = db["entity"];
auto entityType = entity.find1<std::string>("id"_key == entityID, "type");
EntityType result;
if (cif::iequals(entityType, "polymer"))
result = EntityType::Polymer;
else if (cif::iequals(entityType, "non-polymer"))
result = EntityType::NonPolymer;
else if (cif::iequals(entityType, "macrolide"))
result = EntityType::Macrolide;
else if (cif::iequals(entityType, "water"))
result = EntityType::Water;
else if (cif::iequals(entityType, "branched"))
result = EntityType::Branched;
else
throw std::runtime_error("Unknown entity type " + entityType);
return result;
}
EntityType Structure::getEntityTypeForAsymID(const std::string asymID) const
{
using namespace cif::literals;
auto &db = datablock();
auto &struct_asym = db["struct_asym"];
auto entityID = struct_asym.find1<std::string>("id"_key == asymID, "entity_id");
return getEntityTypeForEntityID(entityID);
}
AtomView Structure::waters() const
......@@ -1498,21 +1600,6 @@ AtomView Structure::waters() const
Atom Structure::getAtomByID(const std::string &id) const
{
// int L = 0, R = mAtoms.size() - 1;
// while (L <= R)
// {
// int i = (L + R) / 2;
// int d = mAtoms[i].id().compare(id);
// if (d == 0)
// return mAtoms[i];
// if (d < 0)
// L = i + 1;
// else
// R = i - 1;
// }
assert(mAtoms.size() == mAtomIndex.size());
int L = 0, R = mAtoms.size() - 1;
......@@ -1533,13 +1620,6 @@ Atom Structure::getAtomByID(const std::string &id) const
R = i - 1;
}
// for (auto &atom : mAtoms)
// {
// if (atom.id() == id)
// return atom;
// }
throw std::out_of_range("Could not find atom with id " + id);
}
......@@ -1701,6 +1781,17 @@ Residue &Structure::getResidue(const std::string &asymID, const std::string &com
throw std::out_of_range("Could not find residue " + asymID + '/' + std::to_string(seqID) + '-' + authSeqID);
}
Branch& Structure::getBranchByAsymID(const std::string &asymID)
{
for (auto &branch : mBranches)
{
if (branch.asymID() == asymID)
return branch;
}
throw std::runtime_error("Branch not found for asym id " + asymID);
}
std::string Structure::insertCompound(const std::string &compoundID, bool isEntity)
{
using namespace cif::literals;
......@@ -1779,21 +1870,12 @@ Atom& Structure::emplace_atom(Atom &&atom)
void Structure::removeAtom(Atom &a)
{
using namespace cif::literals;
cif::Datablock &db = datablock();
auto &atomSites = db["atom_site"];
for (auto i = atomSites.begin(); i != atomSites.end(); ++i)
{
std::string id;
cif::tie(id) = i->get("id");
if (id == a.id())
{
atomSites.erase(i);
break;
}
}
atomSites.erase("id"_key == a.id());
assert(mAtomIndex.size() == mAtoms.size());
......@@ -1825,27 +1907,25 @@ void Structure::swapAtoms(Atom a1, Atom a2)
cif::Datablock &db = datablock();
auto &atomSites = db["atom_site"];
auto rs1 = atomSites.find(cif::Key("id") == a1.id());
auto rs2 = atomSites.find(cif::Key("id") == a2.id());
if (rs1.size() != 1)
throw std::runtime_error("Cannot swap atoms since the number of atoms with id " + a1.id() + " is " + std::to_string(rs1.size()));
if (rs2.size() != 1)
throw std::runtime_error("Cannot swap atoms since the number of atoms with id " + a2.id() + " is " + std::to_string(rs2.size()));
auto r1 = rs1.front();
auto r2 = rs2.front();
try
{
auto r1 = atomSites.find1(cif::Key("id") == a1.id());
auto r2 = atomSites.find1(cif::Key("id") == a2.id());
auto l1 = r1["label_atom_id"];
auto l2 = r2["label_atom_id"];
l1.swap(l2);
auto l1 = r1["label_atom_id"];
auto l2 = r2["label_atom_id"];
l1.swap(l2);
std::swap(a1.mImpl->mAtomID, a2.mImpl->mAtomID);
std::swap(a1.mImpl->mAtomID, a2.mImpl->mAtomID);
auto l3 = r1["auth_atom_id"];
auto l4 = r2["auth_atom_id"];
l3.swap(l4);
auto l3 = r1["auth_atom_id"];
auto l4 = r2["auth_atom_id"];
l3.swap(l4);
}
catch (const std::exception& ex)
{
std::throw_with_nested(std::runtime_error("Failed to swap atoms"));
}
}
void Structure::moveAtom(Atom a, Point p)
......@@ -1922,13 +2002,10 @@ void Structure::changeResidue(Residue &res, const std::string &newCompound,
auto &atomSites = db["atom_site"];
auto atoms = res.atoms();
for (auto &a : remappedAtoms)
for (const auto &[a1, a2] : remappedAtoms)
{
std::string a1, a2;
tie(a1, a2) = a;
auto i = find_if(atoms.begin(), atoms.end(), [&](const Atom &a)
{ return a.labelAtomID() == a1; });
auto i = find_if(atoms.begin(), atoms.end(), [id = a1](const Atom &a)
{ return a.labelAtomID() == id; });
if (i == atoms.end())
{
if (cif::VERBOSE >= 0)
......@@ -1959,6 +2036,69 @@ void Structure::changeResidue(Residue &res, const std::string &newCompound,
}
}
void Structure::removeResidue(Residue &res)
{
using namespace cif::literals;
cif::Datablock &db = datablock();
switch (res.entityType())
{
case EntityType::Polymer:
{
Monomer &monomer = dynamic_cast<Monomer&>(res);
db["pdbx_poly_seq_scheme"].erase(
"asym_id"_key == res.asymID() and
"seq_id"_key == res.seqID()
);
for (auto &poly : mPolymers)
poly.erase(std::remove(poly.begin(), poly.end(), monomer), poly.end());
break;
}
case EntityType::NonPolymer:
db["pdbx_nonpoly_scheme"].erase("asym_id"_key == res.asymID());
db["struct_asym"].erase("id"_key == res.asymID());
mNonPolymers.erase(std::remove(mNonPolymers.begin(), mNonPolymers.end(), res), mNonPolymers.end());
break;
case EntityType::Water:
db["pdbx_nonpoly_scheme"].erase("asym_id"_key == res.asymID());
mNonPolymers.erase(std::remove(mNonPolymers.begin(), mNonPolymers.end(), res), mNonPolymers.end());
break;
case EntityType::Branched:
throw std::runtime_error("Don't remove a sugar using removeResidue...");
case EntityType::Macrolide:
// TODO: Fix this?
throw std::runtime_error("no support for macrolides yet");
}
for (auto atom : res.atoms())
removeAtom(atom);
}
void Structure::removeBranch(Branch &branch)
{
using namespace cif::literals;
auto &db = datablock();
db["pdbx_branch_scheme"].erase("asym_id"_key == branch.asymID());
db["struct_asym"].erase("id"_key == branch.asymID());
for (auto &sugar : branch)
{
for (auto atom : sugar.atoms())
removeAtom(atom);
}
mBranches.erase(remove(mBranches.begin(), mBranches.end(), branch), mBranches.end());
}
std::string Structure::createNonPolyEntity(const std::string &comp_id)
{
return insertCompound(comp_id, true);
......@@ -2119,18 +2259,6 @@ Branch& Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
auto &struct_asym = db["struct_asym"];
std::string asym_id = struct_asym.getUniqueID();
auto &branch = mBranches.emplace_back(*this, asym_id);
auto &sugar = branch.emplace_back(branch, "NAG", asym_id, 1);
auto entity_id = createEntityForBranch(branch);
struct_asym.emplace({
{"id", asym_id},
{"pdbx_blank_PDB_chainid_flag", "N"},
{"pdbx_modified", "N"},
{"entity_id", entity_id},
{"details", "?"}});
auto &atom_site = db["atom_site"];
auto appendUnlessSet = [](std::vector<cif::Item> &ai, cif::Item &&i)
......@@ -2139,6 +2267,8 @@ Branch& Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
ai.emplace_back(std::move(i));
};
std::vector<Atom> atoms;
for (auto &atom : nag_atoms)
{
auto atom_id = atom_site.getUniqueID("");
......@@ -2148,7 +2278,7 @@ Branch& Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
appendUnlessSet(atom, { "label_comp_id", "NAG"} );
appendUnlessSet(atom, { "label_asym_id", asym_id} );
appendUnlessSet(atom, { "label_seq_id", "."} );
appendUnlessSet(atom, { "label_entity_id", entity_id} );
// appendUnlessSet(atom, { "label_entity_id", tmp_entity_id} );
appendUnlessSet(atom, { "auth_comp_id", "NAG"} );
appendUnlessSet(atom, { "auth_asym_id", asym_id} );
appendUnlessSet(atom, { "auth_seq_id", 1} );
......@@ -2158,9 +2288,39 @@ Branch& Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end());
auto &newAtom = emplace_atom(std::make_shared<Atom::AtomImpl>(db, atom_id, row));
sugar.addAtom(newAtom);
atoms.push_back(newAtom);
}
return createBranch(atoms);
}
Branch& Structure::createBranch(std::vector<Atom> &nag_atoms)
{
using namespace cif::literals;
cif::Datablock &db = datablock();
auto &struct_asym = db["struct_asym"];
std::string asym_id = struct_asym.getUniqueID();
auto &branch = mBranches.emplace_back(*this, asym_id);
auto &sugar = branch.emplace_back(branch, "NAG", asym_id, 1);
for (auto &atom : nag_atoms)
sugar.addAtom(atom);
// now we can create the entity and get the real ID
auto entity_id = createEntityForBranch(branch);
struct_asym.emplace({
{"id", asym_id},
{"pdbx_blank_PDB_chainid_flag", "N"},
{"pdbx_modified", "N"},
{"entity_id", entity_id},
{"details", "?"}});
for (auto &a : sugar.atoms())
a.set_property("label_entity_id", entity_id);
db["pdbx_branch_scheme"].emplace({
{ "asym_id", asym_id },
{ "entity_id", entity_id },
......@@ -2182,12 +2342,12 @@ Branch& Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
return branch;
}
Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vector<cif::Item>> &atoms)
Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vector<cif::Item>> &atom_info)
{
// sanity check
std::string compoundID;
for (auto &atom : atoms)
for (auto &atom : atom_info)
{
for (auto info : atom)
{
......@@ -2205,24 +2365,8 @@ Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vec
cif::Datablock &db = datablock();
auto bi = std::find_if(mBranches.begin(), mBranches.end(), [asym_id](Branch &b) { return b.asymID() == asym_id; });
if (bi == mBranches.end())
throw std::logic_error("Create a branch first!");
Branch &branch = *bi;
// auto &branch = mBranches.emplace_back(*this, asym_id);
int sugarNum = branch.size() + 1;
auto &sugar = branch.emplace_back(branch, compoundID, asym_id, sugarNum);
auto entity_id = createEntityForBranch(branch);
for (auto &sugar : branch)
{
for (auto atom : sugar.atoms())
atom.set_property("label_entity_id", entity_id);
}
auto tmp_entity_id = db["entity"].getUniqueID("");
auto &atom_site = db["atom_site"];
......@@ -2232,7 +2376,9 @@ Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vec
ai.emplace_back(std::move(i));
};
for (auto &atom : atoms)
std::vector<Atom> atoms;
for (auto &atom : atom_info)
{
auto atom_id = atom_site.getUniqueID("");
......@@ -2241,19 +2387,51 @@ Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vec
appendUnlessSet(atom, { "label_comp_id", compoundID} );
appendUnlessSet(atom, { "label_asym_id", asym_id} );
appendUnlessSet(atom, { "label_seq_id", "."} );
appendUnlessSet(atom, { "label_entity_id", entity_id} );
appendUnlessSet(atom, { "label_entity_id", tmp_entity_id} );
appendUnlessSet(atom, { "auth_comp_id", compoundID} );
appendUnlessSet(atom, { "auth_asym_id", asym_id} );
appendUnlessSet(atom, { "auth_seq_id", sugarNum} );
// appendUnlessSet(atom, { "auth_seq_id", sugarNum} );
appendUnlessSet(atom, { "pdbx_PDB_model_num", 1} );
appendUnlessSet(atom, { "label_alt_id", ""} );
auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end());
auto &newAtom = emplace_atom(std::make_shared<Atom::AtomImpl>(db, atom_id, row));
sugar.addAtom(newAtom);
atoms.push_back(newAtom);
}
return extendBranch(asym_id, atoms);
}
Branch& Structure::extendBranch(const std::string &asym_id, std::vector<Atom> &atoms)
{
using namespace cif::literals;
std::string compoundID = atoms.front().labelCompID();
cif::Datablock &db = datablock();
auto bi = std::find_if(mBranches.begin(), mBranches.end(), [asym_id](Branch &b) { return b.asymID() == asym_id; });
if (bi == mBranches.end())
throw std::logic_error("Create a branch first!");
Branch &branch = *bi;
int sugarNum = branch.size() + 1;
auto &sugar = branch.emplace_back(branch, compoundID, asym_id, sugarNum);
auto entity_id = createEntityForBranch(branch);
for (auto &sugar : branch)
{
for (auto atom : sugar.atoms())
atom.set_property("label_entity_id", entity_id);
}
for (auto &atom : atoms)
sugar.addAtom(atom);
auto &pdbx_branch_scheme = db["pdbx_branch_scheme"];
pdbx_branch_scheme.erase("asym_id"_key == asym_id);
......
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define BOOST_TEST_ALTERNATIVE_INIT_API
#include <boost/test/included/unit_test.hpp>
#include <stdexcept>
#include "cif++/Cif++.hpp"
#include "cif++/Structure.hpp"
// --------------------------------------------------------------------
cif::File operator""_cf(const char* text, size_t length)
{
struct membuf : public std::streambuf
{
membuf(char* text, size_t length)
{
this->setg(text, text, text + length);
}
} buffer(const_cast<char*>(text), length);
std::istream is(&buffer);
return cif::File(is);
}
// --------------------------------------------------------------------
std::filesystem::path gTestDir = std::filesystem::current_path();
bool init_unit_test()
{
cif::VERBOSE = 1;
// not a test, just initialize test dir
if (boost::unit_test::framework::master_test_suite().argc == 2)
gTestDir = boost::unit_test::framework::master_test_suite().argv[1];
// do this now, avoids the need for installing
cif::addFileResource("mmcif_pdbx_v50.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx_v50.dic");
// initialize CCD location
cif::addFileResource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");
mmcif::CompoundFactory::instance().pushDictionary(gTestDir / "HEM.cif");
return true;
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(sugar_name_1)
{
using namespace cif::literals;
const std::filesystem::path example(gTestDir / "1juh.cif.gz");
mmcif::File file(example.string());
mmcif::Structure s(file);
auto &db = s.datablock();
auto &entity = db["entity"];
auto &branches = s.branches();
BOOST_CHECK_EQUAL(branches.size(), 4);
for (auto &branch : branches)
{
auto entityID = branch.front().entityID();
auto name = entity.find1<std::string>("id"_key == entityID, "pdbx_description");
BOOST_CHECK_EQUAL(branch.name(), name);
}
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(create_sugar_1)
{
using namespace cif::literals;
const std::filesystem::path example(gTestDir / "1juh.cif.gz");
mmcif::File file(example.string());
mmcif::Structure s(file);
// collect atoms from asym L first
auto &NAG = s.getResidue("L");
auto nagAtoms = NAG.atoms();
s.removeResidue(NAG);
auto &branch = s.createBranch(nagAtoms);
BOOST_CHECK_EQUAL(branch.name(), "2-acetamido-2-deoxy-beta-D-glucopyranose");
BOOST_CHECK_EQUAL(branch.size(), 1);
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(create_sugar_2)
{
using namespace cif::literals;
const std::filesystem::path example(gTestDir / "1juh.cif.gz");
mmcif::File file(example.string());
mmcif::Structure s(file);
// Get branch for H
auto &bH = s.getBranchByAsymID("H");
BOOST_CHECK_EQUAL(bH.size(), 2);
auto a_sH1 = bH[0].atoms();
auto a_sH2 = bH[1].atoms();
s.removeBranch(bH);
auto &bN = s.createBranch(a_sH1);
s.extendBranch(bN.asymID(), a_sH2);
BOOST_CHECK_EQUAL(bN.name(), "2-acetamido-2-deoxy-beta-D-glucopyranose-(1-4)-2-acetamido-2-deoxy-beta-D-glucopyranose");
BOOST_CHECK_EQUAL(bN.size(), 2);
file.save(gTestDir / "test-create_sugar_2.cif");
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment