Commit 49ba714a by Maarten L. Hekkelman

- structure id stuff

- added cif::null
- more tests
parent 2d2b26f7
...@@ -11,4 +11,5 @@ data/components.cif* ...@@ -11,4 +11,5 @@ data/components.cif*
CMakeSettings.json CMakeSettings.json
msvc/ msvc/
Testing/ Testing/
rsrc/feature-request.txt
test/1cbs.cif
...@@ -1183,6 +1183,9 @@ struct Empty ...@@ -1183,6 +1183,9 @@ struct Empty
{ {
}; };
/// \brief A helper to make it possible to have conditions like ("id"_key == cif::null)
inline constexpr Empty null = Empty();
struct Key struct Key
{ {
explicit Key(const std::string &itemTag) explicit Key(const std::string &itemTag)
......
...@@ -241,8 +241,6 @@ class Atom ...@@ -241,8 +241,6 @@ class Atom
private: private:
friend class Structure; friend class Structure;
void setID(int id);
const AtomImpl &impl() const const AtomImpl &impl() const
{ {
if (not mImpl) if (not mImpl)
...@@ -578,8 +576,8 @@ inline bool operator&(StructureOpenOptions a, StructureOpenOptions b) ...@@ -578,8 +576,8 @@ inline bool operator&(StructureOpenOptions a, StructureOpenOptions b)
class Structure class Structure
{ {
public: public:
Structure(File &p, size_t modelNr = 1, StructureOpenOptions options = {}) Structure(cif::File &p, size_t modelNr = 1, StructureOpenOptions options = {})
: Structure(p.firstDatablock(), modelNr, options) : Structure(p.front(), modelNr, options)
{ {
} }
...@@ -592,7 +590,7 @@ class Structure ...@@ -592,7 +590,7 @@ class Structure
~Structure(); ~Structure();
const AtomView &atoms() const { return mAtoms; } const AtomView &atoms() const { return mAtoms; }
AtomView &atoms() { return mAtoms; } // AtomView &atoms() { return mAtoms; }
AtomView waters() const; AtomView waters() const;
...@@ -614,7 +612,7 @@ class Structure ...@@ -614,7 +612,7 @@ class Structure
const std::vector<Residue> &nonPolymers() const { return mNonPolymers; } const std::vector<Residue> &nonPolymers() const { return mNonPolymers; }
Atom getAtomByID(std::string id) const; Atom getAtomByID(const std::string &id) const;
// Atom getAtomByLocation(Point pt, float maxDistance) const; // Atom getAtomByLocation(Point pt, float maxDistance) const;
Atom getAtomByLabel(const std::string &atomID, const std::string &asymID, Atom getAtomByLabel(const std::string &atomID, const std::string &asymID,
...@@ -705,10 +703,6 @@ class Structure ...@@ -705,10 +703,6 @@ class Structure
/// \param seq_id The sequence ID /// \param seq_id The sequence ID
void removeResidue(const std::string &asym_id, int seq_id); void removeResidue(const std::string &asym_id, int seq_id);
/// \brief To sort the atoms in order of model > asym-id > res-id > atom-id
/// Will asssign new atom_id's to all atoms. Be carefull
void sortAtoms();
/// \brief Translate the coordinates of all atoms in the structure by \a t /// \brief Translate the coordinates of all atoms in the structure by \a t
void translate(Point t); void translate(Point t);
...@@ -748,9 +742,18 @@ class Structure ...@@ -748,9 +742,18 @@ class Structure
void loadAtomsForModel(StructureOpenOptions options); void loadAtomsForModel(StructureOpenOptions options);
template<typename... Args>
Atom& emplace_atom(Args ...args)
{
return emplace_atom(Atom{std::forward<Args>(args)...});
}
Atom &emplace_atom(Atom &&atom);
cif::Datablock &mDb; cif::Datablock &mDb;
size_t mModelNr; size_t mModelNr;
AtomView mAtoms; AtomView mAtoms;
std::vector<size_t> mAtomIndex;
std::list<Polymer> mPolymers; std::list<Polymer> mPolymers;
std::list<Branch> mBranches; std::list<Branch> mBranches;
std::vector<Residue> mNonPolymers; std::vector<Residue> mNonPolymers;
......
...@@ -113,9 +113,9 @@ int Atom::AtomImpl::compare(const AtomImpl &b) const ...@@ -113,9 +113,9 @@ int Atom::AtomImpl::compare(const AtomImpl &b) const
if (d == 0) if (d == 0)
d = mSeqID - b.mSeqID; d = mSeqID - b.mSeqID;
if (d == 0) if (d == 0)
d = mAtomID.compare(b.mAtomID);
if (d == 0)
d = mAuthSeqID.compare(b.mAuthSeqID); d = mAuthSeqID.compare(b.mAuthSeqID);
if (d == 0)
d = mAtomID.compare(b.mAtomID);
return d; return d;
} }
...@@ -359,11 +359,6 @@ bool Atom::operator==(const Atom &rhs) const ...@@ -359,11 +359,6 @@ bool Atom::operator==(const Atom &rhs) const
(&mImpl->mDb == &rhs.mImpl->mDb and mImpl->mID == rhs.mImpl->mID); (&mImpl->mDb == &rhs.mImpl->mDb and mImpl->mID == rhs.mImpl->mID);
} }
void Atom::setID(int id)
{
mImpl->mID = std::to_string(id);
}
std::ostream &operator<<(std::ostream &os, const Atom &atom) std::ostream &operator<<(std::ostream &os, const Atom &atom)
{ {
os << atom.labelCompID() << ' ' << atom.labelAsymID() << ':' << atom.labelSeqID() << ' ' << atom.labelAtomID(); os << atom.labelCompID() << ' ' << atom.labelAsymID() << ':' << atom.labelSeqID() << ' ' << atom.labelAtomID();
...@@ -1369,7 +1364,7 @@ void Structure::loadAtomsForModel(StructureOpenOptions options) ...@@ -1369,7 +1364,7 @@ void Structure::loadAtomsForModel(StructureOpenOptions options)
if ((options bitand StructureOpenOptions::SkipHydrogen) and type_symbol == "H") if ((options bitand StructureOpenOptions::SkipHydrogen) and type_symbol == "H")
continue; continue;
mAtoms.emplace_back(std::make_shared<Atom::AtomImpl>(db, id, a)); emplace_atom(std::make_shared<Atom::AtomImpl>(db, id, a));
} }
} }
...@@ -1379,7 +1374,7 @@ Structure::Structure(const Structure &s) ...@@ -1379,7 +1374,7 @@ Structure::Structure(const Structure &s)
{ {
mAtoms.reserve(s.mAtoms.size()); mAtoms.reserve(s.mAtoms.size());
for (auto &atom : s.mAtoms) for (auto &atom : s.mAtoms)
mAtoms.emplace_back(atom.clone()); emplace_atom(atom.clone());
loadData(); loadData();
} }
...@@ -1472,19 +1467,6 @@ void Structure::loadData() ...@@ -1472,19 +1467,6 @@ void Structure::loadData()
} }
} }
void Structure::sortAtoms()
{
sort(mAtoms.begin(), mAtoms.end(), [](auto &a, auto &b)
{ return a.compare(b) < 0; });
int id = 1;
for (auto &atom : mAtoms)
{
atom.setID(id);
++id;
}
}
AtomView Structure::waters() const AtomView Structure::waters() const
{ {
AtomView result; AtomView result;
...@@ -1514,14 +1496,50 @@ AtomView Structure::waters() const ...@@ -1514,14 +1496,50 @@ AtomView Structure::waters() const
return result; return result;
} }
Atom Structure::getAtomByID(std::string id) const Atom Structure::getAtomByID(const std::string &id) const
{ {
for (auto &atom : mAtoms) // int L = 0, R = mAtoms.size() - 1;
// while (L <= R)
// {
// int i = (L + R) / 2;
// int d = mAtoms[i].id().compare(id);
// if (d == 0)
// return mAtoms[i];
// if (d < 0)
// L = i + 1;
// else
// R = i - 1;
// }
assert(mAtoms.size() == mAtomIndex.size());
int L = 0, R = mAtoms.size() - 1;
while (L <= R)
{ {
if (atom.id() == id) int i = (L + R) / 2;
const Atom &atom = mAtoms[mAtomIndex[i]];
int d = atom.id().compare(id);
if (d == 0)
return atom; return atom;
if (d < 0)
L = i + 1;
else
R = i - 1;
} }
// for (auto &atom : mAtoms)
// {
// if (atom.id() == id)
// return atom;
// }
throw std::out_of_range("Could not find atom with id " + id); throw std::out_of_range("Could not find atom with id " + id);
} }
...@@ -1747,6 +1765,31 @@ std::string Structure::insertCompound(const std::string &compoundID, bool isEnti ...@@ -1747,6 +1765,31 @@ std::string Structure::insertCompound(const std::string &compoundID, bool isEnti
// -------------------------------------------------------------------- // --------------------------------------------------------------------
Atom& Structure::emplace_atom(Atom &&atom)
{
int L = 0, R = mAtomIndex.size() - 1;
while (L <= R)
{
int i = (L + R) / 2;
const Atom &ai = mAtoms[mAtomIndex[i]];
int d = ai.id().compare(atom.id());
if (d == 0)
throw std::runtime_error("Duplicate atom ID " + atom.id());
if (d < 0)
L = i + 1;
else
R = i - 1;
}
mAtomIndex.insert(mAtomIndex.begin() + R + 1, mAtoms.size());
return mAtoms.emplace_back(std::move(atom));
}
void Structure::removeAtom(Atom &a) void Structure::removeAtom(Atom &a)
{ {
cif::Datablock &db = datablock(); cif::Datablock &db = datablock();
...@@ -1765,13 +1808,37 @@ void Structure::removeAtom(Atom &a) ...@@ -1765,13 +1808,37 @@ void Structure::removeAtom(Atom &a)
} }
} }
mAtoms.erase(remove(mAtoms.begin(), mAtoms.end(), a), mAtoms.end()); assert(mAtomIndex.size() == mAtoms.size());
}
void Structure::removeResidue(const std::string &asym_id, int seq_id) int L = 0, R = mAtomIndex.size() - 1;
{ while (L <= R)
{
int i = (L + R) / 2;
const Atom &atom = mAtoms[mAtomIndex[i]];
int d = atom.id().compare(a.id());
if (d == 0)
{
mAtoms.erase(mAtoms.begin() + mAtomIndex[i]);
mAtomIndex.erase(mAtomIndex.begin() + i);
break;
}
if (d < 0)
L = i + 1;
else
R = i - 1;
}
assert(L <= R);
} }
// void Structure::removeResidue(const std::string &asym_id, int seq_id)
// {
// }
void Structure::swapAtoms(Atom &a1, Atom &a2) void Structure::swapAtoms(Atom &a1, Atom &a2)
{ {
cif::Datablock &db = datablock(); cif::Datablock &db = datablock();
...@@ -1964,7 +2031,7 @@ std::string Structure::createNonpoly(const std::string &entity_id, const std::ve ...@@ -1964,7 +2031,7 @@ std::string Structure::createNonpoly(const std::string &entity_id, const std::ve
{"pdbx_PDB_model_num", 1} {"pdbx_PDB_model_num", 1}
}); });
auto &newAtom = mAtoms.emplace_back(std::make_shared<Atom::AtomImpl>(db, atom_id, row)); auto &newAtom = emplace_atom(std::make_shared<Atom::AtomImpl>(db, atom_id, row));
res.addAtom(newAtom); res.addAtom(newAtom);
} }
...@@ -2031,7 +2098,7 @@ std::string Structure::createNonpoly(const std::string &entity_id, std::vector<s ...@@ -2031,7 +2098,7 @@ std::string Structure::createNonpoly(const std::string &entity_id, std::vector<s
auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end()); auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end());
auto &newAtom = mAtoms.emplace_back(std::make_shared<Atom::AtomImpl>(db, atom_id, row)); auto &newAtom = emplace_atom(std::make_shared<Atom::AtomImpl>(db, atom_id, row));
res.addAtom(newAtom); res.addAtom(newAtom);
} }
...@@ -2109,7 +2176,7 @@ Branch& Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms) ...@@ -2109,7 +2176,7 @@ Branch& Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end()); auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end());
auto &newAtom = mAtoms.emplace_back(std::make_shared<Atom::AtomImpl>(db, atom_id, row)); auto &newAtom = emplace_atom(std::make_shared<Atom::AtomImpl>(db, atom_id, row));
sugar.addAtom(newAtom); sugar.addAtom(newAtom);
} }
...@@ -2202,7 +2269,7 @@ Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vec ...@@ -2202,7 +2269,7 @@ Branch& Structure::extendBranch(const std::string &asym_id, std::vector<std::vec
auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end()); auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end());
auto &newAtom = mAtoms.emplace_back(std::make_shared<Atom::AtomImpl>(db, atom_id, row)); auto &newAtom = emplace_atom(std::make_shared<Atom::AtomImpl>(db, atom_id, row));
sugar.addAtom(newAtom); sugar.addAtom(newAtom);
} }
......
...@@ -210,3 +210,115 @@ _struct_asym.details ? ...@@ -210,3 +210,115 @@ _struct_asym.details ?
// } // }
// } // }
// } // }
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(test_atom_id)
{
auto data = R"(
data_TEST
#
_pdbx_nonpoly_scheme.asym_id A
_pdbx_nonpoly_scheme.ndb_seq_num 1
_pdbx_nonpoly_scheme.entity_id 1
_pdbx_nonpoly_scheme.mon_id HEM
_pdbx_nonpoly_scheme.pdb_seq_num 1
_pdbx_nonpoly_scheme.auth_seq_num 1
_pdbx_nonpoly_scheme.pdb_mon_id HEM
_pdbx_nonpoly_scheme.auth_mon_id HEM
_pdbx_nonpoly_scheme.pdb_strand_id A
_pdbx_nonpoly_scheme.pdb_ins_code .
#
loop_
_atom_site.id
_atom_site.auth_asym_id
_atom_site.label_alt_id
_atom_site.label_asym_id
_atom_site.label_atom_id
_atom_site.label_comp_id
_atom_site.label_entity_id
_atom_site.label_seq_id
_atom_site.type_symbol
_atom_site.group_PDB
_atom_site.pdbx_PDB_ins_code
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.B_iso_or_equiv
_atom_site.pdbx_formal_charge
_atom_site.auth_seq_id
_atom_site.auth_comp_id
_atom_site.auth_atom_id
_atom_site.pdbx_PDB_model_num
1 A ? A CHA HEM 1 . C HETATM ? -5.248 39.769 -0.250 1.00 7.67 ? 1 HEM CHA 1
3 A ? A CHB HEM 1 . C HETATM ? -3.774 36.790 3.280 1.00 7.05 ? 1 HEM CHB 1
2 A ? A CHC HEM 1 . C HETATM ? -2.879 33.328 0.013 1.00 7.69 ? 1 HEM CHC 1
4 A ? A CHD HEM 1 . C HETATM ? -4.342 36.262 -3.536 1.00 8.00 ? 1 HEM CHD 1
#
_chem_comp.id HEM
_chem_comp.type NON-POLYMER
_chem_comp.name 'PROTOPORPHYRIN IX CONTAINING FE'
_chem_comp.formula 'C34 H32 Fe N4 O4'
_chem_comp.formula_weight 616.487000
#
_pdbx_entity_nonpoly.entity_id 1
_pdbx_entity_nonpoly.name 'PROTOPORPHYRIN IX CONTAINING FE'
_pdbx_entity_nonpoly.comp_id HEM
#
_entity.id 1
_entity.type non-polymer
_entity.pdbx_description 'PROTOPORPHYRIN IX CONTAINING FE'
_entity.formula_weight 616.487000
#
_struct_asym.id A
_struct_asym.entity_id 1
_struct_asym.pdbx_blank_PDB_chainid_flag N
_struct_asym.pdbx_modified N
_struct_asym.details ?
#
)"_cf;
data.loadDictionary("mmcif_pdbx_v50.dic");
mmcif::Structure s(data);
BOOST_CHECK_EQUAL(s.getAtomByID("1").authAtomID(), "CHA");
BOOST_CHECK_EQUAL(s.getAtomByID("2").authAtomID(), "CHC");
BOOST_CHECK_EQUAL(s.getAtomByID("3").authAtomID(), "CHB");
BOOST_CHECK_EQUAL(s.getAtomByID("4").authAtomID(), "CHD");
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(atom_numbers_1)
{
const std::filesystem::path test1(gTestDir / ".." / "examples" / "1cbs.cif.gz");
mmcif::File file(test1.string());
mmcif::Structure structure(file);
auto &db = file.data();
auto &atoms = structure.atoms();
auto ai = atoms.begin();
for (const auto &[id, label_asym_id, label_seq_id, label_atom_id, auth_seq_id, label_comp_id] :
db["atom_site"].rows<std::string,std::string,int,std::string,std::string,std::string>("id", "label_asym_id", "label_seq_id", "label_atom_id", "auth_seq_id", "label_comp_id"))
{
auto atom = structure.getAtomByID(id);
BOOST_CHECK_EQUAL(atom.labelAsymID(), label_asym_id);
BOOST_CHECK_EQUAL(atom.labelSeqID(), label_seq_id);
BOOST_CHECK_EQUAL(atom.labelAtomID(), label_atom_id);
BOOST_CHECK_EQUAL(atom.authSeqID(), auth_seq_id);
BOOST_CHECK_EQUAL(atom.labelCompID(), label_comp_id);
BOOST_ASSERT(ai != atoms.end());
BOOST_CHECK_EQUAL(ai->id(), id);
++ai;
}
BOOST_ASSERT(ai == atoms.end());
}
\ No newline at end of file
...@@ -156,6 +156,35 @@ _test.value ...@@ -156,6 +156,35 @@ _test.value
BOOST_CHECK(t2.front()["name"].as<std::string>() == "mies"); BOOST_CHECK(t2.front()["name"].as<std::string>() == "mies");
} }
BOOST_AUTO_TEST_CASE(ut3)
{
using namespace cif::literals;
auto f = R"(data_TEST
#
loop_
_test.id
_test.name
_test.value
1 aap 1.0
2 noot 1.1
3 mies 1.2
4 boom .
5 roos ?
)"_cf;
auto &db = f.firstDatablock();
BOOST_CHECK(db.getName() == "TEST");
auto &test = db["test"];
BOOST_CHECK(test.size() == 5);
BOOST_CHECK(test.exists("value"_key == cif::null));
BOOST_CHECK(test.find("value"_key == cif::null).size() == 2);
}
// -------------------------------------------------------------------- // --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(d1) BOOST_AUTO_TEST_CASE(d1)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment