Commit 915d6504 by Maarten L. Hekkelman

First steps, remove CCP4 info from compound

parent 5e63ca7a
...@@ -289,9 +289,10 @@ namespace detail ...@@ -289,9 +289,10 @@ namespace detail
void swap(ItemReference& b); void swap(ItemReference& b);
template<typename T = std::string> template<typename T = std::string>
T as() const auto as() const
{ {
return item_value_as<T>::convert(*this); using value_type = std::remove_cv_t<std::remove_reference_t<T>>;
return item_value_as<value_type>::convert(*this);
} }
template<typename T> template<typename T>
...@@ -337,11 +338,13 @@ namespace detail ...@@ -337,11 +338,13 @@ namespace detail
template<typename T> template<typename T>
struct ItemReference::item_value_as<T, std::enable_if_t<std::is_floating_point_v<T>>> struct ItemReference::item_value_as<T, std::enable_if_t<std::is_floating_point_v<T>>>
{ {
static T convert(const ItemReference& ref) using value_type = std::remove_reference_t<std::remove_cv_t<T>>;
static value_type convert(const ItemReference& ref)
{ {
T result = {}; value_type result = {};
if (not ref.empty()) if (not ref.empty())
result = static_cast<T>(std::stod(ref.c_str())); result = static_cast<value_type>(std::stod(ref.c_str()));
return result; return result;
} }
...@@ -376,7 +379,7 @@ namespace detail ...@@ -376,7 +379,7 @@ namespace detail
}; };
template<typename T> template<typename T>
struct ItemReference::item_value_as<T, std::enable_if_t<std::is_integral_v<T> and std::is_unsigned_v<T>>> struct ItemReference::item_value_as<T, std::enable_if_t<std::is_integral_v<T> and std::is_unsigned_v<T> and not std::is_same_v<T, bool>>>
{ {
static T convert(const ItemReference& ref) static T convert(const ItemReference& ref)
{ {
...@@ -417,7 +420,7 @@ namespace detail ...@@ -417,7 +420,7 @@ namespace detail
}; };
template<typename T> template<typename T>
struct ItemReference::item_value_as<T, std::enable_if_t<std::is_integral_v<T> and std::is_signed_v<T>>> struct ItemReference::item_value_as<T, std::enable_if_t<std::is_integral_v<T> and std::is_signed_v<T> and not std::is_same_v<T, bool>>>
{ {
static T convert(const ItemReference& ref) static T convert(const ItemReference& ref)
{ {
...@@ -482,6 +485,25 @@ namespace detail ...@@ -482,6 +485,25 @@ namespace detail
} }
}; };
template<typename T>
struct ItemReference::item_value_as<T, std::enable_if_t<std::is_same_v<T, bool>>>
{
static bool convert(const ItemReference& ref)
{
bool result = false;
if (not ref.empty())
result = iequals(ref.c_str(), "y");
return result;
}
static int compare(const ItemReference& ref, bool value, bool icase)
{
bool rv = convert(ref);
return value && rv ? 0
: (rv < value ? -1 : 1);
}
};
template<size_t N> template<size_t N>
struct ItemReference::item_value_as<char[N]> struct ItemReference::item_value_as<char[N]>
{ {
......
...@@ -26,10 +26,13 @@ ...@@ -26,10 +26,13 @@
#pragma once #pragma once
/// \file This file contains the definition for the class Compound, encapsulating
/// the information found for compounds in the CCD.
#include <map>
#include <set> #include <set>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
#include <map>
#include "cif++/AtomType.hpp" #include "cif++/AtomType.hpp"
#include "cif++/Cif++.hpp" #include "cif++/Cif++.hpp"
...@@ -38,310 +41,156 @@ namespace mmcif ...@@ -38,310 +41,156 @@ namespace mmcif
{ {
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// The chemical composition of the structure in an mmCIF file is
// defined in the class composition. A compositon consists of
// entities. Each Entity can be either a polymer, a non-polymer
// a macrolide or a water molecule.
// Entities themselves are made up of compounds. And compounds
// contain CompoundAtom records for each atom.
class Compound; class Compound;
class Link;
struct CompoundAtom; struct CompoundAtom;
enum BondType { singleBond, doubleBond, tripleBond, delocalizedBond }; /// \brief The bond type as defined in the CCD, possible values taken from the mmcif_pdbx_v50 file
enum class BondType
// -------------------------------------------------------------------- {
// struct containing information about an atom in a chemical compound sing, // 'single bond'
// This information comes from the CCP4 monomer library. doub, // 'double bond'
trip, // 'triple bond'
struct CompoundAtom quad, // 'quadruple bond'
{ arom, // 'aromatic bond'
std::string id; poly, // 'polymeric bond'
AtomType typeSymbol; delo, // 'delocalized double bond'
std::string typeEnergy; pi, // 'pi bond'
float partialCharge;
};
// --------------------------------------------------------------------
// struct containing information about the bonds
// This information comes from the CCP4 monomer library.
struct CompoundBond
{
std::string atomID[2];
BondType type;
float distance;
float esd;
};
// --------------------------------------------------------------------
// struct containing information about the bond-angles
// This information comes from the CCP4 monomer library.
struct CompoundAngle
{
std::string atomID[3];
float angle;
float esd;
}; };
// -------------------------------------------------------------------- std::string to_string(BondType bondType);
// struct containing information about the bond-angles BondType from_string(const std::string& bondType);
// This information comes from the CCP4 monomer library.
struct CompoundTorsion /// --------------------------------------------------------------------
{ /// \brief struct containing information about an atom in a chemical compound.
std::string atomID[4]; /// This is a subset of the available information. Contact the author if you need more fields.
float angle;
float esd;
int period;
};
// -------------------------------------------------------------------- struct CompoundAtom
// struct containing information about the bond-angles
// This information comes from the CCP4 monomer library.
struct CompoundPlane
{ {
std::string id; std::string id;
std::vector<std::string> atomID; AtomType typeSymbol;
float esd; int charge;
bool aromatic;
bool leavingAtom;
bool stereoConfig;
float x, y, z;
}; };
// -------------------------------------------------------------------- /// --------------------------------------------------------------------
// struct containing information about a chiral centre /// \brief struct containing information about the bonds
// This information comes from the CCP4 monomer library.
enum ChiralVolumeSign { negativ, positiv, both };
struct CompoundChiralCentre struct CompoundBond
{ {
std::string id; std::string atomID[2];
std::string atomIDCentre; BondType type;
std::string atomID[3]; bool aromatic, stereoConfig;
ChiralVolumeSign volumeSign;
}; };
// -------------------------------------------------------------------- /// --------------------------------------------------------------------
// a class that contains information about a chemical compound. /// \brief a class that contains information about a chemical compound.
// This information is derived from the ccp4 monomer library by default. /// This information is derived from the CDD by default.
// To create compounds, you'd best use the factory method. ///
/// To create compounds, you use the factory method. You can add your own
/// compound definitions by calling the addExtraComponents function and
/// pass it a valid CCD formatted file.
class Compound class Compound
{ {
public: public:
Compound(cif::Datablock &db);
Compound(const std::string& file, const std::string& id, const std::string& name,
const std::string& group);
// factory method, create a Compound based on the three letter code
// (for amino acids) or the one-letter code (for bases) or the
// code as it is known in the CCP4 monomer library.
static const Compound* create(const std::string& id);
// this second factory method can create a Compound even if it is not
// recorded in the library. It will take the values from the CCP4 lib
// unless the value passed to this function is not empty.
static const Compound* create(const std::string& id, const std::string& name,
const std::string& type, const std::string& formula);
// add an additional path to the monomer library.
static void addMonomerLibraryPath(const std::string& dir);
// accessors
std::string id() const { return mID; }
std::string name() const { return mName; }
std::string type() const;
std::string group() const { return mGroup; }
std::vector<CompoundAtom> atoms() const { return mAtoms; }
std::vector<CompoundBond> bonds() const { return mBonds; }
std::vector<CompoundAngle> angles() const { return mAngles; }
std::vector<CompoundChiralCentre> chiralCentres() const
{ return mChiralCentres; }
std::vector<CompoundPlane> planes() const { return mPlanes; }
std::vector<CompoundTorsion> torsions() const { return mTorsions; }
CompoundAtom getAtomByID(const std::string& atomID) const;
bool atomsBonded(const std::string& atomId_1, const std::string& atomId_2) const;
float atomBondValue(const std::string& atomId_1, const std::string& atomId_2) const;
float bondAngle(const std::string& atomId_1, const std::string& atomId_2, const std::string& atomId_3) const;
float chiralVolume(const std::string& centreID) const;
std::string formula() const;
float formulaWeight() const;
int charge() const;
bool isWater() const;
bool isSugar() const;
std::vector<std::string> isomers() const;
bool isIsomerOf(const Compound& c) const;
std::vector<std::tuple<std::string,std::string>> mapToIsomer(const Compound& c) const;
private:
~Compound(); ~Compound();
cif::File mCF; /// \brief factory method, create a Compound based on the three letter code
/// (for amino acids) or the one-letter code (for bases) or the
std::string mID; /// code as it is known in the CCD.
std::string mName;
std::string mGroup;
std::vector<CompoundAtom> mAtoms;
std::vector<CompoundBond> mBonds;
std::vector<CompoundAngle> mAngles;
std::vector<CompoundTorsion>mTorsions;
std::vector<CompoundChiralCentre>
mChiralCentres;
std::vector<CompoundPlane> mPlanes;
};
// --------------------------------------------------------------------
// struct containing information about the bonds
// This information comes from the CCP4 monomer library.
struct LinkAtom
{
int compID;
std::string atomID;
bool operator==(const LinkAtom& rhs) const { return compID == rhs.compID and atomID == rhs.atomID; }
};
struct LinkBond
{
LinkAtom atom[2];
BondType type;
float distance;
float esd;
};
// --------------------------------------------------------------------
// struct containing information about the bond-angles
// This information comes from the CCP4 monomer library.
struct LinkAngle
{
LinkAtom atom[3];
float angle;
float esd;
};
// --------------------------------------------------------------------
// struct containing information about the bond-torsions
// This information comes from the CCP4 monomer library.
struct LinkTorsion static const Compound *create(const std::string &id);
{
LinkAtom atom[4];
float angle;
float esd;
int period;
};
// -------------------------------------------------------------------- // /// this second factory method can create a Compound even if it is not
// struct containing information about the bond-angles // /// recorded in the library. It will take the values from the CCP4 lib
// This information comes from the CCP4 monomer library. // /// unless the value passed to this function is not empty.
// static const Compound* create(const std::string& id, const std::string& name,
// const std::string& type, const std::string& formula);
struct LinkPlane /// \brief Create compounds based on the data in the file \a components
{ ///
std::string id; /// It is often required to add information about unknown components.
std::vector<LinkAtom> atoms; /// This file parses either a CCP4 or a CCD formatted components file
float esd; ///
}; /// \param components The mmCIF file containing the components
/// \result An array containing the ID's of the added components
static std::vector<std::string> addExtraComponents(const std::filesystem::path &components);
// -------------------------------------------------------------------- // accessors
// struct containing information about a chiral centre
// This information comes from the CCP4 monomer library.
struct LinkChiralCentre std::string id() const { return mID; }
{ std::string name() const { return mName; }
std::string id; std::string type() const { return mType; }
LinkAtom atomCentre; std::string formula() const { return mFormula; }
LinkAtom atom[3]; float formulaWeight() const { return mFormulaWeight; }
ChiralVolumeSign volumeSign; int formalCharge() const { return mFormalCharge; }
};
// -------------------------------------------------------------------- const std::vector<CompoundAtom> &atoms() const { return mAtoms; }
// a class that contains information about a chemical link between compounds. const std::vector<CompoundBond> &bonds() const { return mBonds; }
// This information is derived from the ccp4 monomer library by default.
class Link CompoundAtom getAtomByID(const std::string &atomID) const;
{
public:
Link(cif::Datablock& db); bool atomsBonded(const std::string &atomId_1, const std::string &atomId_2) const;
// float atomBondValue(const std::string &atomId_1, const std::string &atomId_2) const;
// float bondAngle(const std::string &atomId_1, const std::string &atomId_2, const std::string &atomId_3) const;
// float chiralVolume(const std::string &centreID) const;
// Factory method. bool isWater() const
static const Link& create(const std::string& id); {
return mID == "HOH" or mID == "H2O" or mID == "WAT";
}
// accessors
std::string id() const { return mID; }
std::vector<LinkBond> bonds() const { return mBonds; }
std::vector<LinkAngle> angles() const { return mAngles; }
std::vector<LinkChiralCentre> chiralCentres() const { return mChiralCentres; }
std::vector<LinkPlane> planes() const { return mPlanes; }
std::vector<LinkTorsion> torsions() const { return mTorsions; }
float atomBondValue(const LinkAtom& atomId_1, const LinkAtom& atomId_2) const;
float bondAngle(const LinkAtom& atomId_1, const LinkAtom& atomId_2, const LinkAtom& atomId_3) const;
float chiralVolume(const std::string& id) const;
private: private:
~Link(); std::string mID;
std::string mName;
std::string mID; std::string mType;
std::vector<LinkBond> mBonds; std::string mFormula;
std::vector<LinkAngle> mAngles; float mFormulaWeight;
std::vector<LinkTorsion> mTorsions; int mFormalCharge;
std::vector<LinkChiralCentre> mChiralCentres; std::vector<CompoundAtom> mAtoms;
std::vector<LinkPlane> mPlanes; std::vector<CompoundBond> mBonds;
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// Factory class for Compound and Link objects // Factory class for Compound and Link objects
extern const std::map<std::string,char> kAAMap, kBaseMap; extern const std::map<std::string, char> kAAMap, kBaseMap;
class CompoundFactory class CompoundFactory
{ {
public: public:
static void init(bool useThreadLocalInstanceOnly); static void init(bool useThreadLocalInstanceOnly);
static CompoundFactory& instance(); static CompoundFactory &instance();
static void clear(); static void clear();
void pushDictionary(const std::string& inDictFile); void pushDictionary(const std::string &inDictFile);
void popDictionary(); void popDictionary();
bool isKnownPeptide(const std::string& res_name) const; bool isKnownPeptide(const std::string &res_name) const;
bool isKnownBase(const std::string& res_name) const; bool isKnownBase(const std::string &res_name) const;
std::string unalias(const std::string& res_name) const; const Compound *get(std::string id);
const Compound *create(std::string id);
const Compound* get(std::string id);
const Compound* create(std::string id);
const Link* getLink(std::string id);
const Link* createLink(std::string id);
~CompoundFactory(); ~CompoundFactory();
private: private:
CompoundFactory(); CompoundFactory();
CompoundFactory(const CompoundFactory&) = delete; CompoundFactory(const CompoundFactory &) = delete;
CompoundFactory& operator=(const CompoundFactory&) = delete; CompoundFactory &operator=(const CompoundFactory &) = delete;
static CompoundFactory* sInstance; static CompoundFactory *sInstance;
static thread_local std::unique_ptr<CompoundFactory> tlInstance; static thread_local std::unique_ptr<CompoundFactory> tlInstance;
static bool sUseThreadLocalInstance; static bool sUseThreadLocalInstance;
class CompoundFactoryImpl* mImpl; class CompoundFactoryImpl *mImpl;
}; };
} } // namespace mmcif
...@@ -141,9 +141,6 @@ class Atom ...@@ -141,9 +141,6 @@ class Atom
// access data in compound for this atom // access data in compound for this atom
// the energy-type field
std::string energyType() const;
// convenience routine // convenience routine
bool isBackBone() const bool isBackBone() const
{ {
...@@ -243,10 +240,6 @@ class Residue ...@@ -243,10 +240,6 @@ class Residue
bool isWater() const { return mCompoundID == "HOH"; } bool isWater() const { return mCompoundID == "HOH"; }
bool isSugar() const;
bool isPyranose() const;
bool isFuranose() const;
const Structure& structure() const { return *mStructure; } const Structure& structure() const { return *mStructure; }
bool empty() const { return mStructure == nullptr; } bool empty() const { return mStructure == nullptr; }
......
...@@ -29,8 +29,8 @@ ...@@ -29,8 +29,8 @@
#endif #endif
#include <map> #include <map>
#include <numeric>
#include <mutex> #include <mutex>
#include <numeric>
#include <shared_mutex> #include <shared_mutex>
#include <boost/algorithm/string.hpp> #include <boost/algorithm/string.hpp>
...@@ -39,9 +39,9 @@ ...@@ -39,9 +39,9 @@
#include <fstream> #include <fstream>
#include "cif++/Cif++.hpp" #include "cif++/Cif++.hpp"
#include "cif++/Point.hpp"
#include "cif++/Compound.hpp"
#include "cif++/CifUtils.hpp" #include "cif++/CifUtils.hpp"
#include "cif++/Compound.hpp"
#include "cif++/Point.hpp"
namespace ba = boost::algorithm; namespace ba = boost::algorithm;
namespace fs = std::filesystem; namespace fs = std::filesystem;
...@@ -50,11 +50,41 @@ namespace mmcif ...@@ -50,11 +50,41 @@ namespace mmcif
{ {
// -------------------------------------------------------------------- // --------------------------------------------------------------------
std::string to_string(BondType bondType)
{
switch (bondType)
{
case BondType::sing: return "sing";
case BondType::doub: return "doub";
case BondType::trip: return "trip";
case BondType::quad: return "quad";
case BondType::arom: return "arom";
case BondType::poly: return "poly";
case BondType::delo: return "delo";
case BondType::pi: return "pi";
}
}
BondType from_string(const std::string& bondType)
{
if (cif::iequals(bondType, "sing")) return BondType::sing;
if (cif::iequals(bondType, "doub")) return BondType::doub;
if (cif::iequals(bondType, "trip")) return BondType::trip;
if (cif::iequals(bondType, "quad")) return BondType::quad;
if (cif::iequals(bondType, "arom")) return BondType::arom;
if (cif::iequals(bondType, "poly")) return BondType::poly;
if (cif::iequals(bondType, "delo")) return BondType::delo;
if (cif::iequals(bondType, "pi")) return BondType::pi;
throw std::invalid_argument("Invalid bondType: " + bondType);
}
// --------------------------------------------------------------------
// Compound helper classes // Compound helper classes
struct CompoundAtomLess struct CompoundAtomLess
{ {
bool operator()(const CompoundAtom& a, const CompoundAtom& b) const bool operator()(const CompoundAtom &a, const CompoundAtom &b) const
{ {
int d = a.id.compare(b.id); int d = a.id.compare(b.id);
if (d == 0) if (d == 0)
...@@ -65,512 +95,59 @@ struct CompoundAtomLess ...@@ -65,512 +95,59 @@ struct CompoundAtomLess
struct CompoundBondLess struct CompoundBondLess
{ {
bool operator()(const CompoundBond& a, const CompoundBond& b) const bool operator()(const CompoundBond &a, const CompoundBond &b) const
{ {
int d = a.atomID[0].compare(b.atomID[0]); int d = a.atomID[0].compare(b.atomID[0]);
if (d == 0) if (d == 0)
d = a.atomID[1].compare(b.atomID[1]); d = a.atomID[1].compare(b.atomID[1]);
if (d == 0) if (d == 0)
d = a.type - b.type; d = static_cast<int>(a.type) - static_cast<int>(b.type);
return d < 0; return d < 0;
} }
}; };
// --------------------------------------------------------------------
// Isomers in the ccp4 dictionary
struct IsomerSet
{
std::vector<std::string> compounds;
};
struct IsomerSets
{
std::vector<IsomerSet> isomers;
};
class IsomerDB
{
public:
static IsomerDB& instance();
size_t count(const std::string& compound) const;
const std::vector<std::string>& operator[](const std::string& compound) const;
private:
IsomerDB();
IsomerSets mData;
};
IsomerDB::IsomerDB()
{
#if defined(CACHE_DIR)
fs::path isomersFile = fs::path(CACHE_DIR) / "isomers.txt";
if (not fs::exists(isomersFile))
std::cerr << "Could not locate isomers.txt in " CACHE_DIR << std::endl;
else
{
std::ifstream is(isomersFile);
std::string line;
while (std::getline(is, line))
{
IsomerSet compounds;
ba::split(compounds.compounds, line, ba::is_any_of(":"));
if (not compounds.compounds.empty())
mData.isomers.emplace_back(std::move(compounds));
}
}
#endif
}
IsomerDB& IsomerDB::instance()
{
static IsomerDB sInstance;
return sInstance;
}
size_t IsomerDB::count(const std::string& compound) const
{
size_t n = 0;
for (auto& d: mData.isomers)
{
if (find(d.compounds.begin(), d.compounds.end(), compound) != d.compounds.end())
++n;
}
return n;
}
const std::vector<std::string>& IsomerDB::operator[](const std::string& compound) const
{
for (auto& d: mData.isomers)
{
if (find(d.compounds.begin(), d.compounds.end(), compound) != d.compounds.end())
return d.compounds;
}
throw std::runtime_error("No isomer set found containing " + compound);
}
// --------------------------------------------------------------------
// Brute force comparison of two structures, when they are isomers the
// mapping between the atoms of both is returned
// This is not an optimal solution, but it works good enough for now
struct Node
{
std::string id;
AtomType symbol;
std::vector<std::tuple<size_t,BondType>> links;
size_t hydrogens = 0;
};
// Check to see if the nodes a[iA] and b[iB] are the start of a similar sub structure
bool SubStructuresAreIsomeric(
const std::vector<Node>& a, const std::vector<Node>& b, size_t iA, size_t iB,
std::vector<bool> visitedA, std::vector<bool> visitedB, std::vector<std::tuple<std::string,std::string>>& outMapping)
{
auto& na = a[iA];
auto& nb = b[iB];
size_t N = na.links.size();
assert(na.symbol == nb.symbol);
assert(nb.links.size() == N);
// we're optimistic today
bool result = true;
visitedA[iA] = true;
visitedB[iB] = true;
// we now have two sets of links to compare.
// Compare each permutation of the second set of indices with the first
std::vector<size_t> ilb(N);
iota(ilb.begin(), ilb.end(), 0);
for (;;)
{
result = true;
std::vector<std::tuple<std::string,std::string>> m;
for (size_t i = 0; result and i < N; ++i)
{
size_t lA, lB;
BondType typeA, typeB;
std::tie(lA, typeA) = na.links[i]; assert(lA < a.size());
std::tie(lB, typeB) = nb.links[ilb[i]]; assert(lB < b.size());
if (typeA != typeB or visitedA[lA] != visitedB[lB])
{
result = false;
break;
}
auto& la = a[lA];
auto& lb = b[lB];
if (la.symbol != lb.symbol or la.hydrogens != lb.hydrogens or la.links.size() != lb.links.size())
{
result = false;
break;
}
if (not visitedA[lA])
result = SubStructuresAreIsomeric(a, b, lA, lB, visitedA, visitedB, m);
}
if (result)
{
outMapping.insert(outMapping.end(), m.begin(), m.end());
break;
}
if (not next_permutation(ilb.begin(), ilb.end()))
break;
}
if (result and na.id != nb.id)
outMapping.emplace_back(na.id, nb.id);
return result;
}
bool StructuresAreIsomeric(std::vector<CompoundAtom> atomsA, const std::vector<CompoundBond>& bondsA,
std::vector<CompoundAtom> atomsB, const std::vector<CompoundBond>& bondsB,
std::vector<std::tuple<std::string,std::string>>& outMapping)
{
assert(atomsA.size() == atomsB.size());
assert(bondsA.size() == bondsB.size());
std::vector<Node> a, b;
std::map<std::string,size_t> ma, mb;
for (auto& atomA: atomsA)
{
ma[atomA.id] = a.size();
a.push_back({atomA.id, atomA.typeSymbol});
}
for (auto& bondA: bondsA)
{
size_t atom1 = ma.at(bondA.atomID[0]);
size_t atom2 = ma.at(bondA.atomID[1]);
if (a[atom2].symbol == H)
a[atom1].hydrogens += 1;
else
a[atom1].links.emplace_back(atom2, bondA.type);
if (a[atom1].symbol == H)
a[atom2].hydrogens += 1;
else
a[atom2].links.emplace_back(atom1, bondA.type);
}
for (auto& atomB: atomsB)
{
mb[atomB.id] = b.size();
b.push_back({atomB.id, atomB.typeSymbol});
}
for (auto& bondB: bondsB)
{
size_t atom1 = mb.at(bondB.atomID[0]);
size_t atom2 = mb.at(bondB.atomID[1]);
if (b[atom2].symbol == H)
b[atom1].hydrogens += 1;
else
b[atom1].links.emplace_back(atom2, bondB.type);
if (b[atom1].symbol == H)
b[atom2].hydrogens += 1;
else
b[atom2].links.emplace_back(atom1, bondB.type);
}
size_t N = atomsA.size();
size_t ia = 0;
bool result = false;
// try each atom in B to see if it can be traced to be similar to A starting at zero
for (size_t ib = 0; ib < N; ++ib)
{
if (b[ib].symbol != a[ia].symbol or a[ia].hydrogens != b[ib].hydrogens or a[ia].links.size() != b[ib].links.size())
continue;
std::vector<bool> va(N, false), vb(N, false);
if (SubStructuresAreIsomeric(a, b, ia, ib, va, vb, outMapping))
{
result = true;
break;
}
}
return result;
}
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// Compound // Compound
Compound::Compound(const std::string& file, const std::string& id, Compound::Compound(cif::Datablock &db)
const std::string& name, const std::string& group)
: mID(id), mName(name), mGroup(group)
{ {
try auto &chemComp = db["chem_comp"];
{
mCF.load(file);
// locate the datablock
auto& db = mCF["comp_" + id];
auto& compoundAtoms = db["chem_comp_atom"];
for (auto row: compoundAtoms)
{
std::string id, symbol, energy;
float charge;
cif::tie(id, symbol, energy, charge) = row.get("atom_id", "type_symbol", "type_energy", "partial_charge");
mAtoms.push_back({
id, AtomTypeTraits(symbol).type(), energy, charge
});
}
sort(mAtoms.begin(), mAtoms.end(), CompoundAtomLess());
auto& compBonds = db["chem_comp_bond"];
for (auto row: compBonds)
{
CompoundBond b;
std::string type, aromatic;
cif::tie(b.atomID[0], b.atomID[1], type, b.distance, b.esd) =
row.get("atom_id_1", "atom_id_2", "type", "value_dist", "value_dist_esd");
using cif::iequals;
if (iequals(type, "single") or iequals(type, "sing")) b.type = singleBond;
else if (iequals(type, "double") or iequals(type, "doub")) b.type = doubleBond;
else if (iequals(type, "triple") or iequals(type, "trip")) b.type = tripleBond;
else if (iequals(type, "deloc") or iequals(type, "aromat") or iequals(type, "aromatic"))
b.type = delocalizedBond;
else
{
if (cif::VERBOSE)
std::cerr << "Unimplemented chem_comp_bond.type " << type << " in " << id << std::endl;
b.type = singleBond;
}
if (b.atomID[0] > b.atomID[1])
swap(b.atomID[0], b.atomID[1]);
mBonds.push_back(b);
}
sort(mBonds.begin(), mBonds.end(), CompoundBondLess());
for (auto row: db["chem_comp_angle"]) if (chemComp.size() != 1)
{ throw std::runtime_error("Invalid compound file, chem_comp should contain a single row");
CompoundAngle a;
cif::tie(a.atomID[0], a.atomID[1], a.atomID[2], a.angle, a.esd) =
row.get("atom_id_1", "atom_id_2", "atom_id_3", "value_angle", "value_angle_esd");
mAngles.push_back(a);
}
for (auto row: db["chem_comp_tor"]) cif::tie(mID, mName, mType, mFormula, mFormulaWeight, mFormalCharge) =
{ chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge");
CompoundTorsion a;
cif::tie(a.atomID[0], a.atomID[1], a.atomID[2], a.atomID[3], a.angle, a.esd, a.period) =
row.get("atom_id_1", "atom_id_2", "atom_id_3", "atom_id_4", "value_angle", "value_angle_esd", "period");
mTorsions.push_back(a);
}
for (auto row: db["chem_comp_chir"]) auto &chemCompAtom = db["chem_comp_atom"];
{ for (auto row: chemCompAtom)
CompoundChiralCentre cc;
std::string volumeSign;
cif::tie(cc.id, cc.atomIDCentre, cc.atomID[0],
cc.atomID[1], cc.atomID[2], volumeSign) =
row.get("id", "atom_id_centre", "atom_id_1",
"atom_id_2", "atom_id_3", "volume_sign");
if (volumeSign == "negativ" or volumeSign == "negative")
cc.volumeSign = negativ;
else if (volumeSign == "positiv" or volumeSign == "positive")
cc.volumeSign = positiv;
else if (volumeSign == "both")
cc.volumeSign = both;
else
{
if (cif::VERBOSE)
std::cerr << "Unimplemented chem_comp_chir.volume_sign " << volumeSign << " in " << id << std::endl;
continue;
}
mChiralCentres.push_back(cc);
}
auto& compPlanes = db["chem_comp_plane_atom"];
for (auto row: compPlanes)
{
std::string atom_id, plane_id;
float esd;
cif::tie(atom_id, plane_id, esd) = row.get("atom_id", "plane_id", "dist_esd");
auto i = find_if(mPlanes.begin(), mPlanes.end(), [&](auto& p) { return p.id == plane_id;});
if (i == mPlanes.end())
mPlanes.emplace_back(CompoundPlane{ plane_id, { atom_id }, esd });
else
i->atomID.push_back(atom_id);
}
}
catch (const std::exception& ex)
{ {
std::cerr << "Error loading ccp4 file for " << id << " from file " << file << std::endl; CompoundAtom atom;
throw; std::string typeSymbol;
cif::tie(atom.id, typeSymbol, atom.charge, atom.aromatic, atom.leavingAtom, atom.stereoConfig, atom.x, atom.y, atom.z) =
row.get("id", "type_symbol", "charge", "pdbx_aromatic_flag", "pdbx_leaving_atom_flag", "pdbx_stereo_config",
"model_Cartn_x", "model_Cartn_y", "model_Cartn_z");
atom.typeSymbol = AtomTypeTraits(typeSymbol).type();
mAtoms.push_back(std::move(atom));
} }
}
std::string Compound::formula() const auto &chemCompBond = db["chem_comp_bond"];
{ for (auto row: chemCompBond)
std::string result;
std::map<std::string,uint32_t> atoms;
float chargeSum = 0;
for (auto r: mAtoms)
{
atoms[AtomTypeTraits(r.typeSymbol).symbol()] += 1;
chargeSum += r.partialCharge;
}
auto c = atoms.find("C");
if (c != atoms.end())
{
result = "C";
if (c->second > 1)
result += std::to_string(c->second);
atoms.erase(c);
auto h = atoms.find("H");
if (h != atoms.end())
{
result += " H";
if (h->second > 1)
result += std::to_string(h->second);
atoms.erase(h);
}
}
for (auto a: atoms)
{ {
if (not result.empty()) CompoundBond bond;
result += ' '; std::string valueOrder;
cif::tie(bond.atomID[0], bond.atomID[1], valueOrder, bond.aromatic, bond.stereoConfig)
result += a.first; = row.get("atom_id_1", "atom_id_2", "value_order", "pdbx_aromatic_flag", "pdbx_stereo_config");
if (a.second > 1) bond.type = from_string(valueOrder);
result += std::to_string(a.second); mBonds.push_back(std::move(bond));
} }
int charge = lrint(chargeSum);
if (charge != 0)
result += ' ' + std::to_string(charge);
return result;
}
float Compound::formulaWeight() const
{
float result = 0;
for (auto r: mAtoms)
result += AtomTypeTraits(r.typeSymbol).weight();
return result;
}
int Compound::charge() const
{
float result = 0;
for (auto r: mAtoms)
result += r.partialCharge;
return lrint(result);
}
std::string Compound::type() const
{
std::string result;
// known groups are (counted from ccp4 monomer dictionary)
// D-pyranose
// DNA
// L-PEPTIDE LINKING
// L-SACCHARIDE
// L-peptide
// L-pyranose
// M-peptide
// NON-POLYMER
// P-peptide
// RNA
// furanose
// non-polymer
// non_polymer
// peptide
// pyranose
// saccharide
if (cif::iequals(mID, "gly"))
result = "peptide linking";
else if (cif::iequals(mGroup, "l-peptide") or cif::iequals(mGroup, "L-peptide linking") or cif::iequals(mGroup, "peptide"))
result = "L-peptide linking";
else if (cif::iequals(mGroup, "DNA"))
result = "DNA linking";
else if (cif::iequals(mGroup, "RNA"))
result = "RNA linking";
// else
// result = mGroup;
return result;
}
bool Compound::isWater() const
{
return mID == "HOH" or mID == "H2O";
}
bool Compound::isSugar() const
{
return cif::iequals(mGroup, "furanose") or cif::iequals(mGroup, "pyranose");
} }
CompoundAtom Compound::getAtomByID(const std::string& atomID) const CompoundAtom Compound::getAtomByID(const std::string &atomID) const
{ {
CompoundAtom result = {}; CompoundAtom result = {};
for (auto& a: mAtoms) for (auto &a : mAtoms)
{ {
if (a.id == atomID) if (a.id == atomID)
{ {
...@@ -579,13 +156,13 @@ CompoundAtom Compound::getAtomByID(const std::string& atomID) const ...@@ -579,13 +156,13 @@ CompoundAtom Compound::getAtomByID(const std::string& atomID) const
} }
} }
if (result.id != atomID) if (result.id != atomID)
throw std::out_of_range("No atom " + atomID + " in Compound " + mID); throw std::out_of_range("No atom " + atomID + " in Compound " + mID);
return result; return result;
} }
const Compound* Compound::create(const std::string& id) const Compound *Compound::create(const std::string &id)
{ {
auto result = CompoundFactory::instance().get(id); auto result = CompoundFactory::instance().get(id);
if (result == nullptr) if (result == nullptr)
...@@ -593,149 +170,44 @@ const Compound* Compound::create(const std::string& id) ...@@ -593,149 +170,44 @@ const Compound* Compound::create(const std::string& id)
return result; return result;
} }
bool Compound::atomsBonded(const std::string& atomId_1, const std::string& atomId_2) const bool Compound::atomsBonded(const std::string &atomId_1, const std::string &atomId_2) const
{ {
auto i = find_if(mBonds.begin(), mBonds.end(), auto i = find_if(mBonds.begin(), mBonds.end(),
[&](const CompoundBond& b) [&](const CompoundBond &b) {
{ return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2) or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2)
or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
}); });
return i != mBonds.end(); return i != mBonds.end();
} }
float Compound::atomBondValue(const std::string& atomId_1, const std::string& atomId_2) const // float Compound::atomBondValue(const std::string &atomId_1, const std::string &atomId_2) const
{ // {
auto i = find_if(mBonds.begin(), mBonds.end(), // auto i = find_if(mBonds.begin(), mBonds.end(),
[&](const CompoundBond& b) // [&](const CompoundBond &b) {
{ // return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2) or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2) // });
or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
});
return i != mBonds.end() ? i->distance : 0;
}
bool Compound::isIsomerOf(const Compound& c) const // return i != mBonds.end() ? i->distance : 0;
{ // }
bool result = false;
for (;;)
{
// easy tests first
if (mID == c.mID)
{
result = true;
break;
}
if (mAtoms.size() != c.mAtoms.size())
break;
if (mBonds.size() != c.mBonds.size())
break;
if (mChiralCentres.size() != c.mChiralCentres.size())
break;
// same number of atoms of each type?
std::map<AtomType,int> aTypeCount, bTypeCount;
bool sameAtomNames = true;
for (size_t i = 0; i < mAtoms.size(); ++i)
{
auto& a = mAtoms[i];
auto& b = c.mAtoms[i];
aTypeCount[a.typeSymbol] += 1;
bTypeCount[b.typeSymbol] += 1;
if (a.id != b.id or a.typeSymbol != b.typeSymbol)
sameAtomNames = false;
}
if (not sameAtomNames and aTypeCount != bTypeCount)
break;
bool sameBonds = sameAtomNames;
for (size_t i = 0; sameBonds and i < mBonds.size(); ++i)
{
sameBonds =
mBonds[i].atomID[0] == c.mBonds[i].atomID[0] and
mBonds[i].atomID[1] == c.mBonds[i].atomID[1] and
mBonds[i].type == c.mBonds[i].type;
}
if (sameBonds)
{
result = true;
break;
}
// implement rest of tests
std::vector<std::tuple<std::string,std::string>> mapping;
result = StructuresAreIsomeric(mAtoms, mBonds, c.mAtoms, c.mBonds, mapping);
if (cif::VERBOSE and result)
{
for (auto& m: mapping)
std::cerr << " " << std::get<0>(m) << " => " << std::get<1>(m) << std::endl;
}
break;
}
return result;
}
std::vector<std::tuple<std::string,std::string>> Compound::mapToIsomer(const Compound& c) const // float Compound::bondAngle(const std::string &atomId_1, const std::string &atomId_2, const std::string &atomId_3) const
{ // {
std::vector<std::tuple<std::string,std::string>> result; // float result = nanf("1");
bool check = StructuresAreIsomeric(mAtoms, mBonds, c.mAtoms, c.mBonds, result);
if (not check)
throw std::runtime_error("Compounds " + id() + " and " + c.id() + " are not isomers in call to mapToIsomer");
return result;
}
std::vector<std::string> Compound::isomers() const // for (auto &a : mAngles)
{ // {
std::vector<std::string> result; // if (not(a.atomID[1] == atomId_2 and
// ((a.atomID[0] == atomId_1 and a.atomID[2] == atomId_3) or
auto& db = IsomerDB::instance(); // (a.atomID[2] == atomId_1 and a.atomID[0] == atomId_3))))
if (db.count(mID)) // continue;
{
result = db[mID];
auto i = find(result.begin(), result.end(), mID);
assert(i != result.end());
result.erase(i);
}
return result;
}
float Compound::bondAngle(const std::string& atomId_1, const std::string& atomId_2, const std::string& atomId_3) const // result = a.angle;
{ // break;
float result = nanf("1"); // }
for (auto& a: mAngles) // return result;
{ // }
if (not (a.atomID[1] == atomId_2 and
((a.atomID[0] == atomId_1 and a.atomID[2] == atomId_3) or
(a.atomID[2] == atomId_1 and a.atomID[0] == atomId_3))))
continue;
result = a.angle;
break;
}
return result;
}
//static float calcC(float a, float b, float alpha) //static float calcC(float a, float b, float alpha)
//{ //{
...@@ -743,288 +215,85 @@ float Compound::bondAngle(const std::string& atomId_1, const std::string& atomId ...@@ -743,288 +215,85 @@ float Compound::bondAngle(const std::string& atomId_1, const std::string& atomId
// float d = sqrt(b * b - f * f); // float d = sqrt(b * b - f * f);
// float e = a - d; // float e = a - d;
// float c = sqrt(f * f + e * e); // float c = sqrt(f * f + e * e);
// //
// return c; // return c;
//} //}
float Compound::chiralVolume(const std::string& centreID) const // float Compound::chiralVolume(const std::string &centreID) const
{ // {
float result = 0; // float result = 0;
for (auto& cv: mChiralCentres)
{
if (cv.id != centreID)
continue;
// calculate the expected chiral volume
// the edges
float a = atomBondValue(cv.atomIDCentre, cv.atomID[0]);
float b = atomBondValue(cv.atomIDCentre, cv.atomID[1]);
float c = atomBondValue(cv.atomIDCentre, cv.atomID[2]);
// the angles for the top of the tetrahedron
float alpha = bondAngle(cv.atomID[0], cv.atomIDCentre, cv.atomID[1]);
float beta = bondAngle(cv.atomID[1], cv.atomIDCentre, cv.atomID[2]);
float gamma = bondAngle(cv.atomID[2], cv.atomIDCentre, cv.atomID[0]);
float cosa = cos(alpha * kPI / 180);
float cosb = cos(beta * kPI / 180);
float cosc = cos(gamma * kPI / 180);
result = (a * b * c * sqrt(1 + 2 * cosa * cosb * cosc - (cosa * cosa) - (cosb * cosb) - (cosc * cosc))) / 6;
if (cv.volumeSign == negativ)
result = -result;
break;
}
return result;
}
// -------------------------------------------------------------------- // for (auto &cv : mChiralCentres)
// {
// if (cv.id != centreID)
// continue;
Link::Link(cif::Datablock& db) // // calculate the expected chiral volume
{
mID = db.getName();
auto& linkBonds = db["chem_link_bond"];
for (auto row: linkBonds)
{
LinkBond b;
std::string type, aromatic;
cif::tie(b.atom[0].compID, b.atom[0].atomID,
b.atom[1].compID, b.atom[1].atomID, type, b.distance, b.esd) =
row.get("atom_1_comp_id", "atom_id_1", "atom_2_comp_id", "atom_id_2",
"type", "value_dist", "value_dist_esd");
using cif::iequals;
if (iequals(type, "single") or iequals(type, "sing")) b.type = singleBond;
else if (iequals(type, "double") or iequals(type, "doub")) b.type = doubleBond;
else if (iequals(type, "triple") or iequals(type, "trip")) b.type = tripleBond;
else if (iequals(type, "deloc") or iequals(type, "aromat") or iequals(type, "aromatic"))
b.type = delocalizedBond;
else
{
if (cif::VERBOSE)
std::cerr << "Unimplemented chem_link_bond.type " << type << " in " << mID << std::endl;
b.type = singleBond;
}
// if (b.atom[0] > b.atom[1])
// swap(b.atom[0], b.atom[1]);
mBonds.push_back(b);
}
// sort(mBonds.begin(), mBonds.end(), LinkBondLess());
auto& linkAngles = db["chem_link_angle"]; // // the edges
for (auto row: linkAngles)
{
LinkAngle a;
cif::tie(a.atom[0].compID, a.atom[0].atomID, a.atom[1].compID, a.atom[1].atomID,
a.atom[2].compID, a.atom[2].atomID, a.angle, a.esd) =
row.get("atom_1_comp_id", "atom_id_1", "atom_2_comp_id", "atom_id_2",
"atom_3_comp_id", "atom_id_3", "value_angle", "value_angle_esd");
mAngles.push_back(a);
}
for (auto row: db["chem_link_tor"]) // float a = atomBondValue(cv.atomIDCentre, cv.atomID[0]);
{ // float b = atomBondValue(cv.atomIDCentre, cv.atomID[1]);
LinkTorsion a; // float c = atomBondValue(cv.atomIDCentre, cv.atomID[2]);
cif::tie(a.atom[0].compID, a.atom[0].atomID, a.atom[1].compID, a.atom[1].atomID,
a.atom[2].compID, a.atom[2].atomID, a.atom[3].compID, a.atom[3].atomID,
a.angle, a.esd, a.period) =
row.get("atom_1_comp_id", "atom_id_1", "atom_2_comp_id", "atom_id_2",
"atom_3_comp_id", "atom_id_3", "atom_4_comp_id", "atom_id_4",
"value_angle", "value_angle_esd", "period");
mTorsions.push_back(a);
}
auto& linkChir = db["chem_link_chir"]; // // the angles for the top of the tetrahedron
for (auto row: linkChir)
{
LinkChiralCentre cc;
std::string volumeSign;
cif::tie(cc.id, cc.atomCentre.compID, cc.atomCentre.atomID,
cc.atom[0].compID, cc.atom[0].atomID,
cc.atom[1].compID, cc.atom[1].atomID,
cc.atom[2].compID, cc.atom[2].atomID,
volumeSign) =
row.get("id", "atom_centre_comp_id", "atom_id_centre",
"atom_1_comp_id", "atom_id_1", "atom_2_comp_id", "atom_id_2",
"atom_3_comp_id", "atom_id_3", "volume_sign");
if (volumeSign == "negativ" or volumeSign == "negative")
cc.volumeSign = negativ;
else if (volumeSign == "positiv" or volumeSign == "positive")
cc.volumeSign = positiv;
else if (volumeSign == "both")
cc.volumeSign = both;
else
{
if (cif::VERBOSE)
std::cerr << "Unimplemented chem_link_chir.volume_sign " << volumeSign << " in " << mID << std::endl;
continue;
}
mChiralCentres.push_back(cc);
}
auto& linkPlanes = db["chem_link_plane"]; // float alpha = bondAngle(cv.atomID[0], cv.atomIDCentre, cv.atomID[1]);
for (auto row: linkPlanes) // float beta = bondAngle(cv.atomID[1], cv.atomIDCentre, cv.atomID[2]);
{ // float gamma = bondAngle(cv.atomID[2], cv.atomIDCentre, cv.atomID[0]);
int compID;
std::string atomID, planeID;
float esd;
cif::tie(planeID, compID, atomID, esd) = row.get("plane_id", "atom_comp_id", "atom_id", "dist_esd");
auto i = find_if(mPlanes.begin(), mPlanes.end(), [&](auto& p) { return p.id == planeID;});
if (i == mPlanes.end())
{
std::vector<LinkAtom> atoms{LinkAtom{ compID, atomID }};
mPlanes.emplace_back(LinkPlane{ planeID, move(atoms), esd });
}
else
i->atoms.push_back({ compID, atomID });
}
}
const Link& Link::create(const std::string& id) // float cosa = cos(alpha * kPI / 180);
{ // float cosb = cos(beta * kPI / 180);
auto result = CompoundFactory::instance().getLink(id); // float cosc = cos(gamma * kPI / 180);
if (result == nullptr)
result = CompoundFactory::instance().createLink(id);
if (result == nullptr)
throw std::runtime_error("Link with id " + id + " not found");
return *result;
}
Link::~Link() // result = (a * b * c * sqrt(1 + 2 * cosa * cosb * cosc - (cosa * cosa) - (cosb * cosb) - (cosc * cosc))) / 6;
{
}
float Link::atomBondValue(const LinkAtom& atom1, const LinkAtom& atom2) const // if (cv.volumeSign == negativ)
{ // result = -result;
auto i = find_if(mBonds.begin(), mBonds.end(),
[&](auto& b)
{
return (b.atom[0] == atom1 and b.atom[1] == atom2)
or (b.atom[0] == atom2 and b.atom[1] == atom1);
});
return i != mBonds.end() ? i->distance : 0;
}
float Link::bondAngle(const LinkAtom& atom1, const LinkAtom& atom2, const LinkAtom& atom3) const // break;
{ // }
float result = nanf("1");
for (auto& a: mAngles)
{
if (not (a.atom[1] == atom2 and
((a.atom[0] == atom1 and a.atom[2] == atom3) or
(a.atom[2] == atom1 and a.atom[0] == atom3))))
continue;
result = a.angle;
break;
}
return result;
}
float Link::chiralVolume(const std::string& centreID) const // return result;
{ // }
float result = 0;
for (auto& cv: mChiralCentres)
{
if (cv.id != centreID)
continue;
// calculate the expected chiral volume
// the edges
float a = atomBondValue(cv.atomCentre, cv.atom[0]);
float b = atomBondValue(cv.atomCentre, cv.atom[1]);
float c = atomBondValue(cv.atomCentre, cv.atom[2]);
// the angles for the top of the tetrahedron
float alpha = bondAngle(cv.atom[0], cv.atomCentre, cv.atom[1]);
float beta = bondAngle(cv.atom[1], cv.atomCentre, cv.atom[2]);
float gamma = bondAngle(cv.atom[2], cv.atomCentre, cv.atom[0]);
float cosa = cos(alpha * kPI / 180);
float cosb = cos(beta * kPI / 180);
float cosc = cos(gamma * kPI / 180);
result = (a * b * c * sqrt(1 + 2 * cosa * cosb * cosc - (cosa * cosa) - (cosb * cosb) - (cosc * cosc))) / 6;
if (cv.volumeSign == negativ)
result = -result;
break;
}
return result;
}
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// a factory class to generate compounds // a factory class to generate compounds
const std::map<std::string,char> kAAMap{ const std::map<std::string, char> kAAMap{
{ "ALA", 'A' }, {"ALA", 'A'},
{ "ARG", 'R' }, {"ARG", 'R'},
{ "ASN", 'N' }, {"ASN", 'N'},
{ "ASP", 'D' }, {"ASP", 'D'},
{ "CYS", 'C' }, {"CYS", 'C'},
{ "GLN", 'Q' }, {"GLN", 'Q'},
{ "GLU", 'E' }, {"GLU", 'E'},
{ "GLY", 'G' }, {"GLY", 'G'},
{ "HIS", 'H' }, {"HIS", 'H'},
{ "ILE", 'I' }, {"ILE", 'I'},
{ "LEU", 'L' }, {"LEU", 'L'},
{ "LYS", 'K' }, {"LYS", 'K'},
{ "MET", 'M' }, {"MET", 'M'},
{ "PHE", 'F' }, {"PHE", 'F'},
{ "PRO", 'P' }, {"PRO", 'P'},
{ "SER", 'S' }, {"SER", 'S'},
{ "THR", 'T' }, {"THR", 'T'},
{ "TRP", 'W' }, {"TRP", 'W'},
{ "TYR", 'Y' }, {"TYR", 'Y'},
{ "VAL", 'V' }, {"VAL", 'V'},
{ "GLX", 'Z' }, {"GLX", 'Z'},
{ "ASX", 'B' } {"ASX", 'B'}};
};
const std::map<std::string, char> kBaseMap{
const std::map<std::string,char> kBaseMap{ {"A", 'A'},
{ "A", 'A' }, {"C", 'C'},
{ "C", 'C' }, {"G", 'G'},
{ "G", 'G' }, {"T", 'T'},
{ "T", 'T' }, {"U", 'U'},
{ "U", 'U' }, {"DA", 'A'},
{ "DA", 'A' }, {"DC", 'C'},
{ "DC", 'C' }, {"DG", 'G'},
{ "DG", 'G' }, {"DT", 'T'}};
{ "DT", 'T' }
};
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -1033,20 +302,17 @@ class CompoundFactoryImpl ...@@ -1033,20 +302,17 @@ class CompoundFactoryImpl
public: public:
CompoundFactoryImpl(); CompoundFactoryImpl();
CompoundFactoryImpl(const std::string& file, CompoundFactoryImpl* next); CompoundFactoryImpl(const std::string &file, CompoundFactoryImpl *next);
~CompoundFactoryImpl() ~CompoundFactoryImpl()
{ {
delete mNext; delete mNext;
} }
Compound* get(std::string id); Compound *get(std::string id);
Compound* create(std::string id); Compound *create(std::string id);
const Link* getLink(std::string id); CompoundFactoryImpl *pop()
const Link* createLink(std::string id);
CompoundFactoryImpl* pop()
{ {
auto result = mNext; auto result = mNext;
mNext = nullptr; mNext = nullptr;
...@@ -1054,93 +320,65 @@ class CompoundFactoryImpl ...@@ -1054,93 +320,65 @@ class CompoundFactoryImpl
return result; return result;
} }
std::string unalias(const std::string& resName) const bool isKnownPeptide(const std::string &resName)
{
std::string result = resName;
auto& e = const_cast<cif::File&>(mFile)["comp_synonym_list"];
for (auto& synonym: e["chem_comp_synonyms"])
{
if (ba::iequals(synonym["comp_alternative_id"].as<std::string>(), resName) == false)
continue;
result = synonym["comp_id"].as<std::string>();
ba::trim(result);
break;
}
if (result.empty() and mNext)
result = mNext->unalias(resName);
return result;
}
bool isKnownPeptide(const std::string& resName)
{ {
return mKnownPeptides.count(resName) or return mKnownPeptides.count(resName) or
(mNext != nullptr and mNext->isKnownPeptide(resName)); (mNext != nullptr and mNext->isKnownPeptide(resName));
} }
bool isKnownBase(const std::string& resName) bool isKnownBase(const std::string &resName)
{ {
return mKnownBases.count(resName) or return mKnownBases.count(resName) or
(mNext != nullptr and mNext->isKnownBase(resName)); (mNext != nullptr and mNext->isKnownBase(resName));
} }
private: private:
std::shared_timed_mutex mMutex; std::shared_timed_mutex mMutex;
std::string mPath; std::string mPath;
std::vector<Compound*> mCompounds; std::vector<Compound *> mCompounds;
std::vector<const Link*> mLinks; std::set<std::string> mKnownPeptides;
std::set<std::string> mKnownPeptides; std::set<std::string> mKnownBases;
std::set<std::string> mKnownBases; std::set<std::string> mMissing;
std::set<std::string> mMissing; CompoundFactoryImpl *mNext = nullptr;
cif::File mFile;
CompoundFactoryImpl* mNext = nullptr;
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
CompoundFactoryImpl::CompoundFactoryImpl() CompoundFactoryImpl::CompoundFactoryImpl()
{ {
for (const auto&[ key, value]: kAAMap) for (const auto &[key, value] : kAAMap)
mKnownPeptides.insert(key); mKnownPeptides.insert(key);
for (const auto&[ key, value]: kBaseMap) for (const auto &[key, value] : kBaseMap)
mKnownBases.insert(key); mKnownBases.insert(key);
} }
CompoundFactoryImpl::CompoundFactoryImpl(const std::string& file, CompoundFactoryImpl* next) CompoundFactoryImpl::CompoundFactoryImpl(const std::string &file, CompoundFactoryImpl *next)
: mPath(file), mFile(file), mNext(next) : mPath(file)
, mNext(next)
{ {
const std::regex peptideRx("(?:[lmp]-)?peptide", std::regex::icase); cif::File cifFile(file);
if (not cifFile.isValid())
throw std::runtime_error("Invalid compound file");
auto& cat = mFile.firstDatablock()["chem_comp"]; for (auto &db: cifFile)
for (auto& chemComp: cat)
{ {
std::string group, threeLetterCode; auto compound = std::make_unique<Compound>(db);
cif::tie(group, threeLetterCode) = chemComp.get("group", "three_letter_code"); mCompounds.push_back(compound.release());
if (std::regex_match(group, peptideRx))
mKnownPeptides.insert(threeLetterCode);
else if (ba::iequals(group, "DNA") or ba::iequals(group, "RNA"))
mKnownBases.insert(threeLetterCode);
} }
} }
Compound* CompoundFactoryImpl::get(std::string id) Compound *CompoundFactoryImpl::get(std::string id)
{ {
std::shared_lock lock(mMutex); std::shared_lock lock(mMutex);
ba::to_upper(id); ba::to_upper(id);
Compound* result = nullptr; Compound *result = nullptr;
for (auto cmp: mCompounds) for (auto cmp : mCompounds)
{ {
if (cmp->id() == id) if (cmp->id() == id)
{ {
...@@ -1148,119 +386,72 @@ Compound* CompoundFactoryImpl::get(std::string id) ...@@ -1148,119 +386,72 @@ Compound* CompoundFactoryImpl::get(std::string id)
break; break;
} }
} }
if (result == nullptr and mNext != nullptr) if (result == nullptr and mNext != nullptr)
result = mNext->get(id); result = mNext->get(id);
return result; return result;
} }
Compound* CompoundFactoryImpl::create(std::string id) Compound *CompoundFactoryImpl::create(std::string id)
{ {
ba::to_upper(id); ba::to_upper(id);
Compound* result = get(id); Compound *result = get(id);
if (result == nullptr and mMissing.count(id) == 0 and not mFile.empty()) // if (result == nullptr and mMissing.count(id) == 0 and not mFile.empty())
{ // {
std::unique_lock lock(mMutex); // std::unique_lock lock(mMutex);
auto& cat = mFile.firstDatablock()["chem_comp"]; // auto &cat = mFile.firstDatablock()["chem_comp"];
auto rs = cat.find(cif::Key("three_letter_code") == id); // auto rs = cat.find(cif::Key("three_letter_code") == id);
if (not rs.empty())
{
auto row = rs.front();
std::string name, group;
uint32_t numberAtomsAll, numberAtomsNh;
cif::tie(name, group, numberAtomsAll, numberAtomsNh) =
row.get("name", "group", "number_atoms_all", "number_atoms_nh");
ba::trim(name);
ba::trim(group);
if (mFile.get("comp_" + id) == nullptr)
{
auto clibd_mon = fs::path(getenv("CLIBD_MON"));
fs::path resFile = clibd_mon / ba::to_lower_copy(id.substr(0, 1)) / (id + ".cif");
if (not fs::exists(resFile) and (id == "COM" or id == "CON" or "PRN")) // seriously...
resFile = clibd_mon / ba::to_lower_copy(id.substr(0, 1)) / (id + '_' + id + ".cif");
if (not fs::exists(resFile))
mMissing.insert(id);
else
{
mCompounds.push_back(new Compound(resFile.string(), id, name, group));
result = mCompounds.back();
}
}
else
{
mCompounds.push_back(new Compound(mPath, id, name, group));
result = mCompounds.back();
}
}
if (result == nullptr and mNext != nullptr)
result = mNext->create(id);
}
return result;
}
const Link* CompoundFactoryImpl::getLink(std::string id) // if (not rs.empty())
{ // {
std::shared_lock lock(mMutex); // auto row = rs.front();
ba::to_upper(id); // std::string name, group;
// uint32_t numberAtomsAll, numberAtomsNh;
// cif::tie(name, group, numberAtomsAll, numberAtomsNh) =
// row.get("name", "group", "number_atoms_all", "number_atoms_nh");
const Link* result = nullptr; // ba::trim(name);
// ba::trim(group);
for (auto link: mLinks)
{
if (link->id() == id)
{
result = link;
break;
}
}
if (result == nullptr and mNext != nullptr)
result = mNext->getLink(id);
return result;
}
const Link* CompoundFactoryImpl::createLink(std::string id) // if (mFile.get("comp_" + id) == nullptr)
{ // {
ba::to_upper(id); // auto clibd_mon = fs::path(getenv("CLIBD_MON"));
const Link* result = getLink(id); // fs::path resFile = clibd_mon / ba::to_lower_copy(id.substr(0, 1)) / (id + ".cif");
if (result == nullptr) // if (not fs::exists(resFile) and (id == "COM" or id == "CON" or "PRN")) // seriously...
{ // resFile = clibd_mon / ba::to_lower_copy(id.substr(0, 1)) / (id + '_' + id + ".cif");
std::unique_lock lock(mMutex);
// if (not fs::exists(resFile))
// mMissing.insert(id);
// else
// {
// mCompounds.push_back(new Compound(resFile.string(), id, name, group));
// result = mCompounds.back();
// }
// }
// else
// {
// mCompounds.push_back(new Compound(mPath, id, name, group));
// result = mCompounds.back();
// }
// }
// if (result == nullptr and mNext != nullptr)
// result = mNext->create(id);
// }
auto db = mFile.get("link_" + id);
if (db != nullptr)
{
result = new Link(*db);
mLinks.push_back(result);
}
if (result == nullptr and mNext != nullptr)
result = mNext->createLink(id);
}
return result; return result;
} }
// -------------------------------------------------------------------- // --------------------------------------------------------------------
CompoundFactory* CompoundFactory::sInstance; CompoundFactory *CompoundFactory::sInstance;
thread_local std::unique_ptr<CompoundFactory> CompoundFactory::tlInstance; thread_local std::unique_ptr<CompoundFactory> CompoundFactory::tlInstance;
bool CompoundFactory::sUseThreadLocalInstance; bool CompoundFactory::sUseThreadLocalInstance;
...@@ -1272,21 +463,6 @@ void CompoundFactory::init(bool useThreadLocalInstanceOnly) ...@@ -1272,21 +463,6 @@ void CompoundFactory::init(bool useThreadLocalInstanceOnly)
CompoundFactory::CompoundFactory() CompoundFactory::CompoundFactory()
: mImpl(nullptr) : mImpl(nullptr)
{ {
const char* clibdMon = getenv("CLIBD_MON");
if (clibdMon != nullptr)
{
fs::path db = fs::path(clibdMon) / "list" / "mon_lib_list.cif";
if (fs::exists(db))
pushDictionary(db.string());
}
if (mImpl == nullptr)
{
if (cif::VERBOSE)
std::cerr << "Could not load the mon_lib_list.cif file from CCP4, please make sure you have installed CCP4 and sourced the environment." << std::endl;
mImpl = new CompoundFactoryImpl();
}
} }
CompoundFactory::~CompoundFactory() CompoundFactory::~CompoundFactory()
...@@ -1294,7 +470,7 @@ CompoundFactory::~CompoundFactory() ...@@ -1294,7 +470,7 @@ CompoundFactory::~CompoundFactory()
delete mImpl; delete mImpl;
} }
CompoundFactory& CompoundFactory::instance() CompoundFactory &CompoundFactory::instance()
{ {
if (sUseThreadLocalInstance) if (sUseThreadLocalInstance)
{ {
...@@ -1321,20 +497,20 @@ void CompoundFactory::clear() ...@@ -1321,20 +497,20 @@ void CompoundFactory::clear()
} }
} }
void CompoundFactory::pushDictionary(const std::string& inDictFile) void CompoundFactory::pushDictionary(const std::string &inDictFile)
{ {
if (not fs::exists(inDictFile)) if (not fs::exists(inDictFile))
throw std::runtime_error("file not found: " + inDictFile); throw std::runtime_error("file not found: " + inDictFile);
// ifstream file(inDictFile); // ifstream file(inDictFile);
// if (not file.is_open()) // if (not file.is_open())
// throw std::runtime_error("Could not open peptide list " + inDictFile); // throw std::runtime_error("Could not open peptide list " + inDictFile);
try try
{ {
mImpl = new CompoundFactoryImpl(inDictFile, mImpl); mImpl = new CompoundFactoryImpl(inDictFile, mImpl);
} }
catch (const std::exception& ex) catch (const std::exception &ex)
{ {
std::cerr << "Error loading dictionary " << inDictFile << std::endl; std::cerr << "Error loading dictionary " << inDictFile << std::endl;
throw; throw;
...@@ -1348,39 +524,32 @@ void CompoundFactory::popDictionary() ...@@ -1348,39 +524,32 @@ void CompoundFactory::popDictionary()
} }
// id is the three letter code // id is the three letter code
const Compound* CompoundFactory::get(std::string id) const Compound *CompoundFactory::get(std::string id)
{ {
return mImpl->get(id); return mImpl->get(id);
} }
const Compound* CompoundFactory::create(std::string id) const Compound *CompoundFactory::create(std::string id)
{ {
return mImpl->create(id); return mImpl->create(id);
} }
const Link* CompoundFactory::getLink(std::string id) bool CompoundFactory::isKnownPeptide(const std::string &resName) const
{
return mImpl->getLink(id);
}
const Link* CompoundFactory::createLink(std::string id)
{
return mImpl->createLink(id);
}
bool CompoundFactory::isKnownPeptide(const std::string& resName) const
{ {
return mImpl->isKnownPeptide(resName); return mImpl->isKnownPeptide(resName);
} }
bool CompoundFactory::isKnownBase(const std::string& resName) const bool CompoundFactory::isKnownBase(const std::string &resName) const
{ {
return mImpl->isKnownBase(resName); return mImpl->isKnownBase(resName);
} }
std::string CompoundFactory::unalias(const std::string& resName) const // --------------------------------------------------------------------
std::vector<std::string> Compound::addExtraComponents(const std::filesystem::path &components)
{ {
return mImpl->unalias(resName);
}
} }
} // namespace mmcif
...@@ -457,7 +457,6 @@ class PDBFileParser ...@@ -457,7 +457,6 @@ class PDBFileParser
char iCode; char iCode;
int numHetAtoms = 0; int numHetAtoms = 0;
std::string text; std::string text;
bool sugar = false;
std::string asymID; std::string asymID;
std::vector<PDBRecord*> atoms; std::vector<PDBRecord*> atoms;
bool processed = false; bool processed = false;
...@@ -467,14 +466,6 @@ class PDBFileParser ...@@ -467,14 +466,6 @@ class PDBFileParser
HET(const std::string& hetID, char chainID, int seqNum, char iCode, int numHetAtoms = 0, const std::string& text = {}) HET(const std::string& hetID, char chainID, int seqNum, char iCode, int numHetAtoms = 0, const std::string& text = {})
: hetID(hetID), chainID(chainID), seqNum(seqNum), iCode(iCode), numHetAtoms(numHetAtoms), text(text) : hetID(hetID), chainID(chainID), seqNum(seqNum), iCode(iCode), numHetAtoms(numHetAtoms), text(text)
{ {
// just in case we don't have a CCP4 available
if (hetID == "MAN" or hetID == "BMA" or hetID == "NAG" or hetID == "NDG" or hetID == "FUC" or hetID == "FUL")
sugar = true;
else
{
auto compound = CompoundFactory::instance().create(hetID);
sugar = compound ? compound->isSugar() : false;
}
} }
}; };
...@@ -4374,33 +4365,38 @@ void PDBFileParser::ConstructEntities() ...@@ -4374,33 +4365,38 @@ void PDBFileParser::ConstructEntities()
mMod2parent.count(cc) ? mMod2parent[cc] : cc mMod2parent.count(cc) ? mMod2parent[cc] : cc
); );
std::string formula = mFormuls[cc]; std::string name;
if (formula.empty() and compound != nullptr) std::string formula;
formula = compound->formula(); std::string type;
else
{
const std::regex rx(R"(\d+\((.+)\))");
std::smatch m;
if (std::regex_match(formula, m, rx))
formula = m[1].str();
}
std::string name = mHetnams[cc];
if (name.empty() and compound != nullptr)
name = compound->name();
std::string type = "other";
std::string nstd = "."; std::string nstd = ".";
std::string formulaWeight;
if (compound != nullptr) if (compound != nullptr)
{ {
name = compound->name();
type = compound->type(); type = compound->type();
if (type.empty())
type = "NON-POLYMER";
if (iequals(type, "L-peptide linking") or iequals(type, "peptide linking")) if (iequals(type, "L-peptide linking") or iequals(type, "peptide linking"))
nstd = "y"; nstd = "y";
formula = compound->formula();
formulaWeight = std::to_string(compound->formulaWeight());
}
if (name.empty())
name = mHetnams[cc];
if (type.empty())
type = "NON-POLYMER";
if (formula.empty())
{
formula = mFormuls[cc];
const std::regex rx(R"(\d+\((.+)\))");
std::smatch m;
if (std::regex_match(formula, m, rx))
formula = m[1].str();
} }
if (modResSet.count(cc)) if (modResSet.count(cc))
...@@ -4410,6 +4406,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4410,6 +4406,7 @@ void PDBFileParser::ConstructEntities()
{ "id", cc }, { "id", cc },
{ "name", name }, { "name", name },
{ "formula", formula }, { "formula", formula },
{ "formula_weight", formulaWeight },
{ "mon_nstd_flag", nstd }, { "mon_nstd_flag", nstd },
{ "type", type } { "type", type }
}); });
......
...@@ -566,16 +566,6 @@ int Atom::charge() const ...@@ -566,16 +566,6 @@ int Atom::charge() const
return property<int>("pdbx_formal_charge"); return property<int>("pdbx_formal_charge");
} }
std::string Atom::energyType() const
{
std::string result;
if (impl() and impl()->mCompound)
result = impl()->mCompound->getAtomByID(impl()->mAtomID).typeEnergy;
return result;
}
float Atom::uIso() const float Atom::uIso() const
{ {
float result; float result;
...@@ -1013,21 +1003,6 @@ bool Residue::isEntity() const ...@@ -1013,21 +1003,6 @@ bool Residue::isEntity() const
return a1.size() == a2.size(); return a1.size() == a2.size();
} }
bool Residue::isSugar() const
{
return compound().isSugar();
}
bool Residue::isPyranose() const
{
return cif::iequals(compound().group(), "pyranose");
}
bool Residue::isFuranose() const
{
return cif::iequals(compound().group(), "furanose");
}
std::string Residue::authID() const std::string Residue::authID() const
{ {
std::string result; std::string result;
...@@ -2101,7 +2076,7 @@ void Structure::insertCompound(const std::string& compoundID, bool isEntity) ...@@ -2101,7 +2076,7 @@ void Structure::insertCompound(const std::string& compoundID, bool isEntity)
{ {
auto compound = Compound::create(compoundID); auto compound = Compound::create(compoundID);
if (compound == nullptr) if (compound == nullptr)
throw std::runtime_error("Trying to insert unknown compound " + compoundID + " (not found in CCP4 monomers lib)"); throw std::runtime_error("Trying to insert unknown compound " + compoundID + " (not found in CCD)");
cif::Datablock& db = *mFile.impl().mDb; cif::Datablock& db = *mFile.impl().mDb;
...@@ -2113,6 +2088,7 @@ void Structure::insertCompound(const std::string& compoundID, bool isEntity) ...@@ -2113,6 +2088,7 @@ void Structure::insertCompound(const std::string& compoundID, bool isEntity)
{ "id", compoundID }, { "id", compoundID },
{ "name", compound->name() }, { "name", compound->name() },
{ "formula", compound->formula() }, { "formula", compound->formula() },
{ "formula_weight", compound->formulaWeight() },
{ "type", compound->type() } { "type", compound->type() }
}); });
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment