Commit 1242ce1d by maarten

backup voor het weekend

git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@224 a1961a4f-ab94-4bcc-80e8-33b5a54de466
parent 7dedb46d
...@@ -122,6 +122,8 @@ class Compound ...@@ -122,6 +122,8 @@ class Compound
bool isIsomerOf(const Compound& c) const; bool isIsomerOf(const Compound& c) const;
std::vector<std::string> isomers() const;
private: private:
// Entity& mEntity; // Entity& mEntity;
std::string mId; std::string mId;
......
...@@ -9,12 +9,16 @@ ...@@ -9,12 +9,16 @@
#include <boost/filesystem/fstream.hpp> #include <boost/filesystem/fstream.hpp>
#include <boost/thread.hpp> #include <boost/thread.hpp>
#include <zeep/xml/document.hpp>
#include "cif++/Cif++.h" #include "cif++/Cif++.h"
#include "cif++/Compound.h" #include "cif++/Compound.h"
#include "cif++/mrsrc.h"
using namespace std; using namespace std;
namespace ba = boost::algorithm; namespace ba = boost::algorithm;
namespace fs = boost::filesystem; namespace fs = boost::filesystem;
namespace zx = zeep::xml;
namespace libcif namespace libcif
{ {
...@@ -67,6 +71,88 @@ struct CompoundBondLess ...@@ -67,6 +71,88 @@ struct CompoundBondLess
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// Isomers in the ccp4 dictionary
struct IsomerSet
{
vector<string> compounds;
template<typename Archive>
void serialize(Archive& ar, unsigned long version)
{
ar & zx::make_element_nvp("compound", compounds);
}
};
struct IsomerSets
{
vector<IsomerSet> isomers;
template<typename Archive>
void serialize(Archive& ar, unsigned long version)
{
ar & zx::make_element_nvp("isomer-set", isomers);
}
};
class IsomerDB
{
public:
static IsomerDB& instance();
size_t count(const string& compound) const;
const vector<string>& operator[](const string& compound) const;
private:
IsomerDB();
IsomerSets mData;
// map<string,size_t> mIndex;
};
IsomerDB::IsomerDB()
{
mrsrc::rsrc isomers("isomers.xml");
if (not isomers)
throw runtime_error("Missing isomers.xml resource");
zx::document doc(string(isomers.data(), isomers.size()));
zx::deserializer d(doc.root());
d & zx::make_element_nvp("isomers", mData);
}
IsomerDB& IsomerDB::instance()
{
static IsomerDB sInstance;
return sInstance;
}
size_t IsomerDB::count(const string& compound) const
{
size_t n = 0;
for (auto& d: mData.isomers)
{
if (find(d.compounds.begin(), d.compounds.end(), compound) != d.compounds.end())
++n;
}
return n;
}
const vector<string>& IsomerDB::operator[](const string& compound) const
{
for (auto& d: mData.isomers)
{
if (find(d.compounds.begin(), d.compounds.end(), compound) != d.compounds.end())
return d.compounds;
}
throw runtime_error("No isomer set found containing " + compound);
}
// --------------------------------------------------------------------
// Brute force comparison of two structures, when they are isomers the // Brute force comparison of two structures, when they are isomers the
// mapping between the atoms of both is returned // mapping between the atoms of both is returned
// This is not an optimal solution, but it works good enough for now // This is not an optimal solution, but it works good enough for now
...@@ -76,6 +162,7 @@ struct Node ...@@ -76,6 +162,7 @@ struct Node
string id; string id;
AtomType symbol; AtomType symbol;
vector<tuple<size_t,CompoundBondType>> links; vector<tuple<size_t,CompoundBondType>> links;
size_t hydrogens = 0;
}; };
// Check to see if the nodes a[iA] and b[iB] are the start of a similar sub structure // Check to see if the nodes a[iA] and b[iB] are the start of a similar sub structure
...@@ -115,9 +202,22 @@ bool SubStructuresAreIsomeric( ...@@ -115,9 +202,22 @@ bool SubStructuresAreIsomeric(
tie(lA, typeA) = na.links[i]; assert(lA < a.size()); tie(lA, typeA) = na.links[i]; assert(lA < a.size());
tie(lB, typeB) = nb.links[ilb[i]]; assert(lB < b.size()); tie(lB, typeB) = nb.links[ilb[i]]; assert(lB < b.size());
if (typeA != typeB or visitedA[lA] != visitedB[lB] or a[lA].symbol != b[lB].symbol or a[lA].links.size() != b[lB].links.size()) if (typeA != typeB or visitedA[lA] != visitedB[lB])
{
result = false; result = false;
else if (not visitedA[lA]) break;
}
auto& la = a[lA];
auto& lb = b[lB];
if (la.symbol != lb.symbol or la.hydrogens != lb.hydrogens or la.links.size() != lb.links.size())
{
result = false;
break;
}
if (not visitedA[lA])
result = SubStructuresAreIsomeric(a, b, lA, lB, visitedA, visitedB, m); result = SubStructuresAreIsomeric(a, b, lA, lB, visitedA, visitedB, m);
} }
...@@ -158,7 +258,14 @@ bool StructuresAreIsomeric(vector<CompoundAtom> atomsA, const vector<CompoundBon ...@@ -158,7 +258,14 @@ bool StructuresAreIsomeric(vector<CompoundAtom> atomsA, const vector<CompoundBon
size_t atom1 = ma.at(bondA.atomID[0]); size_t atom1 = ma.at(bondA.atomID[0]);
size_t atom2 = ma.at(bondA.atomID[1]); size_t atom2 = ma.at(bondA.atomID[1]);
if (a[atom2].symbol == H)
a[atom1].hydrogens += 1;
else
a[atom1].links.emplace_back(atom2, bondA.type); a[atom1].links.emplace_back(atom2, bondA.type);
if (a[atom1].symbol == H)
a[atom2].hydrogens += 1;
else
a[atom2].links.emplace_back(atom1, bondA.type); a[atom2].links.emplace_back(atom1, bondA.type);
} }
...@@ -173,21 +280,26 @@ bool StructuresAreIsomeric(vector<CompoundAtom> atomsA, const vector<CompoundBon ...@@ -173,21 +280,26 @@ bool StructuresAreIsomeric(vector<CompoundAtom> atomsA, const vector<CompoundBon
size_t atom1 = mb.at(bondB.atomID[0]); size_t atom1 = mb.at(bondB.atomID[0]);
size_t atom2 = mb.at(bondB.atomID[1]); size_t atom2 = mb.at(bondB.atomID[1]);
if (b[atom2].symbol == H)
b[atom1].hydrogens += 1;
else
b[atom1].links.emplace_back(atom2, bondB.type); b[atom1].links.emplace_back(atom2, bondB.type);
if (b[atom1].symbol == H)
b[atom2].hydrogens += 1;
else
b[atom2].links.emplace_back(atom1, bondB.type); b[atom2].links.emplace_back(atom1, bondB.type);
} }
size_t N = atomsA.size(); size_t N = atomsA.size();
size_t ia = find_if(a.begin(), a.end(), [](auto& a) { return a.symbol != libcif::H; }) - a.begin(); size_t ia = 0;
if (ia == N)
throw runtime_error("This compound contains no atoms other than Hydrogen?");
bool result = false; bool result = false;
// try each atom in B to see if it can be traced to be similar to A starting at zero // try each atom in B to see if it can be traced to be similar to A starting at zero
for (size_t ib = 0; ib < N; ++ib) for (size_t ib = 0; ib < N; ++ib)
{ {
if (b[ib].symbol != a[ia].symbol or a[ib].links.size() != b[ia].links.size()) if (b[ib].symbol != a[ia].symbol or a[ia].hydrogens != b[ib].hydrogens or a[ia].links.size() != b[ib].links.size())
continue; continue;
vector<bool> va(N, false), vb(N, false); vector<bool> va(N, false), vb(N, false);
...@@ -613,4 +725,22 @@ bool Compound::isIsomerOf(const Compound& c) const ...@@ -613,4 +725,22 @@ bool Compound::isIsomerOf(const Compound& c) const
return result; return result;
} }
vector<string> Compound::isomers() const
{
vector<string> result;
auto& db = IsomerDB::instance();
if (db.count(mId))
{
result = db[mId];
auto i = find(result.begin(), result.end(), mId);
assert(i != result.end());
result.erase(i);
}
return result;
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment