Commit 915d6504 by Maarten L. Hekkelman

First steps, remove CCP4 info from compound

parent 5e63ca7a
......@@ -289,9 +289,10 @@ namespace detail
void swap(ItemReference& b);
template<typename T = std::string>
T as() const
auto as() const
{
return item_value_as<T>::convert(*this);
using value_type = std::remove_cv_t<std::remove_reference_t<T>>;
return item_value_as<value_type>::convert(*this);
}
template<typename T>
......@@ -337,11 +338,13 @@ namespace detail
template<typename T>
struct ItemReference::item_value_as<T, std::enable_if_t<std::is_floating_point_v<T>>>
{
static T convert(const ItemReference& ref)
using value_type = std::remove_reference_t<std::remove_cv_t<T>>;
static value_type convert(const ItemReference& ref)
{
T result = {};
value_type result = {};
if (not ref.empty())
result = static_cast<T>(std::stod(ref.c_str()));
result = static_cast<value_type>(std::stod(ref.c_str()));
return result;
}
......@@ -376,7 +379,7 @@ namespace detail
};
template<typename T>
struct ItemReference::item_value_as<T, std::enable_if_t<std::is_integral_v<T> and std::is_unsigned_v<T>>>
struct ItemReference::item_value_as<T, std::enable_if_t<std::is_integral_v<T> and std::is_unsigned_v<T> and not std::is_same_v<T, bool>>>
{
static T convert(const ItemReference& ref)
{
......@@ -417,7 +420,7 @@ namespace detail
};
template<typename T>
struct ItemReference::item_value_as<T, std::enable_if_t<std::is_integral_v<T> and std::is_signed_v<T>>>
struct ItemReference::item_value_as<T, std::enable_if_t<std::is_integral_v<T> and std::is_signed_v<T> and not std::is_same_v<T, bool>>>
{
static T convert(const ItemReference& ref)
{
......@@ -482,6 +485,25 @@ namespace detail
}
};
template<typename T>
struct ItemReference::item_value_as<T, std::enable_if_t<std::is_same_v<T, bool>>>
{
static bool convert(const ItemReference& ref)
{
bool result = false;
if (not ref.empty())
result = iequals(ref.c_str(), "y");
return result;
}
static int compare(const ItemReference& ref, bool value, bool icase)
{
bool rv = convert(ref);
return value && rv ? 0
: (rv < value ? -1 : 1);
}
};
template<size_t N>
struct ItemReference::item_value_as<char[N]>
{
......
......@@ -26,10 +26,13 @@
#pragma once
/// \file This file contains the definition for the class Compound, encapsulating
/// the information found for compounds in the CCD.
#include <map>
#include <set>
#include <tuple>
#include <vector>
#include <map>
#include "cif++/AtomType.hpp"
#include "cif++/Cif++.hpp"
......@@ -38,310 +41,156 @@ namespace mmcif
{
// --------------------------------------------------------------------
// The chemical composition of the structure in an mmCIF file is
// defined in the class composition. A compositon consists of
// entities. Each Entity can be either a polymer, a non-polymer
// a macrolide or a water molecule.
// Entities themselves are made up of compounds. And compounds
// contain CompoundAtom records for each atom.
class Compound;
class Link;
struct CompoundAtom;
enum BondType { singleBond, doubleBond, tripleBond, delocalizedBond };
// --------------------------------------------------------------------
// struct containing information about an atom in a chemical compound
// This information comes from the CCP4 monomer library.
struct CompoundAtom
{
std::string id;
AtomType typeSymbol;
std::string typeEnergy;
float partialCharge;
};
// --------------------------------------------------------------------
// struct containing information about the bonds
// This information comes from the CCP4 monomer library.
struct CompoundBond
{
std::string atomID[2];
BondType type;
float distance;
float esd;
};
// --------------------------------------------------------------------
// struct containing information about the bond-angles
// This information comes from the CCP4 monomer library.
struct CompoundAngle
{
std::string atomID[3];
float angle;
float esd;
/// \brief The bond type as defined in the CCD, possible values taken from the mmcif_pdbx_v50 file
enum class BondType
{
sing, // 'single bond'
doub, // 'double bond'
trip, // 'triple bond'
quad, // 'quadruple bond'
arom, // 'aromatic bond'
poly, // 'polymeric bond'
delo, // 'delocalized double bond'
pi, // 'pi bond'
};
// --------------------------------------------------------------------
// struct containing information about the bond-angles
// This information comes from the CCP4 monomer library.
std::string to_string(BondType bondType);
BondType from_string(const std::string& bondType);
struct CompoundTorsion
{
std::string atomID[4];
float angle;
float esd;
int period;
};
/// --------------------------------------------------------------------
/// \brief struct containing information about an atom in a chemical compound.
/// This is a subset of the available information. Contact the author if you need more fields.
// --------------------------------------------------------------------
// struct containing information about the bond-angles
// This information comes from the CCP4 monomer library.
struct CompoundPlane
struct CompoundAtom
{
std::string id;
std::vector<std::string> atomID;
float esd;
std::string id;
AtomType typeSymbol;
int charge;
bool aromatic;
bool leavingAtom;
bool stereoConfig;
float x, y, z;
};
// --------------------------------------------------------------------
// struct containing information about a chiral centre
// This information comes from the CCP4 monomer library.
enum ChiralVolumeSign { negativ, positiv, both };
/// --------------------------------------------------------------------
/// \brief struct containing information about the bonds
struct CompoundChiralCentre
struct CompoundBond
{
std::string id;
std::string atomIDCentre;
std::string atomID[3];
ChiralVolumeSign volumeSign;
std::string atomID[2];
BondType type;
bool aromatic, stereoConfig;
};
// --------------------------------------------------------------------
// a class that contains information about a chemical compound.
// This information is derived from the ccp4 monomer library by default.
// To create compounds, you'd best use the factory method.
/// --------------------------------------------------------------------
/// \brief a class that contains information about a chemical compound.
/// This information is derived from the CDD by default.
///
/// To create compounds, you use the factory method. You can add your own
/// compound definitions by calling the addExtraComponents function and
/// pass it a valid CCD formatted file.
class Compound
{
public:
Compound(const std::string& file, const std::string& id, const std::string& name,
const std::string& group);
// factory method, create a Compound based on the three letter code
// (for amino acids) or the one-letter code (for bases) or the
// code as it is known in the CCP4 monomer library.
static const Compound* create(const std::string& id);
// this second factory method can create a Compound even if it is not
// recorded in the library. It will take the values from the CCP4 lib
// unless the value passed to this function is not empty.
static const Compound* create(const std::string& id, const std::string& name,
const std::string& type, const std::string& formula);
// add an additional path to the monomer library.
static void addMonomerLibraryPath(const std::string& dir);
// accessors
std::string id() const { return mID; }
std::string name() const { return mName; }
std::string type() const;
std::string group() const { return mGroup; }
std::vector<CompoundAtom> atoms() const { return mAtoms; }
std::vector<CompoundBond> bonds() const { return mBonds; }
std::vector<CompoundAngle> angles() const { return mAngles; }
std::vector<CompoundChiralCentre> chiralCentres() const
{ return mChiralCentres; }
std::vector<CompoundPlane> planes() const { return mPlanes; }
std::vector<CompoundTorsion> torsions() const { return mTorsions; }
CompoundAtom getAtomByID(const std::string& atomID) const;
bool atomsBonded(const std::string& atomId_1, const std::string& atomId_2) const;
float atomBondValue(const std::string& atomId_1, const std::string& atomId_2) const;
float bondAngle(const std::string& atomId_1, const std::string& atomId_2, const std::string& atomId_3) const;
float chiralVolume(const std::string& centreID) const;
std::string formula() const;
float formulaWeight() const;
int charge() const;
bool isWater() const;
bool isSugar() const;
std::vector<std::string> isomers() const;
bool isIsomerOf(const Compound& c) const;
std::vector<std::tuple<std::string,std::string>> mapToIsomer(const Compound& c) const;
private:
Compound(cif::Datablock &db);
~Compound();
cif::File mCF;
std::string mID;
std::string mName;
std::string mGroup;
std::vector<CompoundAtom> mAtoms;
std::vector<CompoundBond> mBonds;
std::vector<CompoundAngle> mAngles;
std::vector<CompoundTorsion>mTorsions;
std::vector<CompoundChiralCentre>
mChiralCentres;
std::vector<CompoundPlane> mPlanes;
};
// --------------------------------------------------------------------
// struct containing information about the bonds
// This information comes from the CCP4 monomer library.
struct LinkAtom
{
int compID;
std::string atomID;
bool operator==(const LinkAtom& rhs) const { return compID == rhs.compID and atomID == rhs.atomID; }
};
struct LinkBond
{
LinkAtom atom[2];
BondType type;
float distance;
float esd;
};
// --------------------------------------------------------------------
// struct containing information about the bond-angles
// This information comes from the CCP4 monomer library.
struct LinkAngle
{
LinkAtom atom[3];
float angle;
float esd;
};
// --------------------------------------------------------------------
// struct containing information about the bond-torsions
// This information comes from the CCP4 monomer library.
/// \brief factory method, create a Compound based on the three letter code
/// (for amino acids) or the one-letter code (for bases) or the
/// code as it is known in the CCD.
struct LinkTorsion
{
LinkAtom atom[4];
float angle;
float esd;
int period;
};
static const Compound *create(const std::string &id);
// --------------------------------------------------------------------
// struct containing information about the bond-angles
// This information comes from the CCP4 monomer library.
// /// this second factory method can create a Compound even if it is not
// /// recorded in the library. It will take the values from the CCP4 lib
// /// unless the value passed to this function is not empty.
// static const Compound* create(const std::string& id, const std::string& name,
// const std::string& type, const std::string& formula);
struct LinkPlane
{
std::string id;
std::vector<LinkAtom> atoms;
float esd;
};
/// \brief Create compounds based on the data in the file \a components
///
/// It is often required to add information about unknown components.
/// This file parses either a CCP4 or a CCD formatted components file
///
/// \param components The mmCIF file containing the components
/// \result An array containing the ID's of the added components
static std::vector<std::string> addExtraComponents(const std::filesystem::path &components);
// --------------------------------------------------------------------
// struct containing information about a chiral centre
// This information comes from the CCP4 monomer library.
// accessors
struct LinkChiralCentre
{
std::string id;
LinkAtom atomCentre;
LinkAtom atom[3];
ChiralVolumeSign volumeSign;
};
std::string id() const { return mID; }
std::string name() const { return mName; }
std::string type() const { return mType; }
std::string formula() const { return mFormula; }
float formulaWeight() const { return mFormulaWeight; }
int formalCharge() const { return mFormalCharge; }
// --------------------------------------------------------------------
// a class that contains information about a chemical link between compounds.
// This information is derived from the ccp4 monomer library by default.
const std::vector<CompoundAtom> &atoms() const { return mAtoms; }
const std::vector<CompoundBond> &bonds() const { return mBonds; }
class Link
{
public:
CompoundAtom getAtomByID(const std::string &atomID) const;
Link(cif::Datablock& db);
bool atomsBonded(const std::string &atomId_1, const std::string &atomId_2) const;
// float atomBondValue(const std::string &atomId_1, const std::string &atomId_2) const;
// float bondAngle(const std::string &atomId_1, const std::string &atomId_2, const std::string &atomId_3) const;
// float chiralVolume(const std::string &centreID) const;
// Factory method.
static const Link& create(const std::string& id);
bool isWater() const
{
return mID == "HOH" or mID == "H2O" or mID == "WAT";
}
// accessors
std::string id() const { return mID; }
std::vector<LinkBond> bonds() const { return mBonds; }
std::vector<LinkAngle> angles() const { return mAngles; }
std::vector<LinkChiralCentre> chiralCentres() const { return mChiralCentres; }
std::vector<LinkPlane> planes() const { return mPlanes; }
std::vector<LinkTorsion> torsions() const { return mTorsions; }
float atomBondValue(const LinkAtom& atomId_1, const LinkAtom& atomId_2) const;
float bondAngle(const LinkAtom& atomId_1, const LinkAtom& atomId_2, const LinkAtom& atomId_3) const;
float chiralVolume(const std::string& id) const;
private:
~Link();
std::string mID;
std::vector<LinkBond> mBonds;
std::vector<LinkAngle> mAngles;
std::vector<LinkTorsion> mTorsions;
std::vector<LinkChiralCentre> mChiralCentres;
std::vector<LinkPlane> mPlanes;
std::string mID;
std::string mName;
std::string mType;
std::string mFormula;
float mFormulaWeight;
int mFormalCharge;
std::vector<CompoundAtom> mAtoms;
std::vector<CompoundBond> mBonds;
};
// --------------------------------------------------------------------
// Factory class for Compound and Link objects
extern const std::map<std::string,char> kAAMap, kBaseMap;
extern const std::map<std::string, char> kAAMap, kBaseMap;
class CompoundFactory
{
public:
static void init(bool useThreadLocalInstanceOnly);
static CompoundFactory& instance();
static CompoundFactory &instance();
static void clear();
void pushDictionary(const std::string& inDictFile);
void pushDictionary(const std::string &inDictFile);
void popDictionary();
bool isKnownPeptide(const std::string& res_name) const;
bool isKnownBase(const std::string& res_name) const;
bool isKnownPeptide(const std::string &res_name) const;
bool isKnownBase(const std::string &res_name) const;
std::string unalias(const std::string& res_name) const;
const Compound* get(std::string id);
const Compound* create(std::string id);
const Link* getLink(std::string id);
const Link* createLink(std::string id);
const Compound *get(std::string id);
const Compound *create(std::string id);
~CompoundFactory();
private:
CompoundFactory();
CompoundFactory(const CompoundFactory&) = delete;
CompoundFactory& operator=(const CompoundFactory&) = delete;
static CompoundFactory* sInstance;
CompoundFactory(const CompoundFactory &) = delete;
CompoundFactory &operator=(const CompoundFactory &) = delete;
static CompoundFactory *sInstance;
static thread_local std::unique_ptr<CompoundFactory> tlInstance;
static bool sUseThreadLocalInstance;
class CompoundFactoryImpl* mImpl;
class CompoundFactoryImpl *mImpl;
};
}
} // namespace mmcif
......@@ -141,9 +141,6 @@ class Atom
// access data in compound for this atom
// the energy-type field
std::string energyType() const;
// convenience routine
bool isBackBone() const
{
......@@ -243,10 +240,6 @@ class Residue
bool isWater() const { return mCompoundID == "HOH"; }
bool isSugar() const;
bool isPyranose() const;
bool isFuranose() const;
const Structure& structure() const { return *mStructure; }
bool empty() const { return mStructure == nullptr; }
......
......@@ -29,8 +29,8 @@
#endif
#include <map>
#include <numeric>
#include <mutex>
#include <numeric>
#include <shared_mutex>
#include <boost/algorithm/string.hpp>
......@@ -39,9 +39,9 @@
#include <fstream>
#include "cif++/Cif++.hpp"
#include "cif++/Point.hpp"
#include "cif++/Compound.hpp"
#include "cif++/CifUtils.hpp"
#include "cif++/Compound.hpp"
#include "cif++/Point.hpp"
namespace ba = boost::algorithm;
namespace fs = std::filesystem;
......@@ -50,11 +50,41 @@ namespace mmcif
{
// --------------------------------------------------------------------
std::string to_string(BondType bondType)
{
switch (bondType)
{
case BondType::sing: return "sing";
case BondType::doub: return "doub";
case BondType::trip: return "trip";
case BondType::quad: return "quad";
case BondType::arom: return "arom";
case BondType::poly: return "poly";
case BondType::delo: return "delo";
case BondType::pi: return "pi";
}
}
BondType from_string(const std::string& bondType)
{
if (cif::iequals(bondType, "sing")) return BondType::sing;
if (cif::iequals(bondType, "doub")) return BondType::doub;
if (cif::iequals(bondType, "trip")) return BondType::trip;
if (cif::iequals(bondType, "quad")) return BondType::quad;
if (cif::iequals(bondType, "arom")) return BondType::arom;
if (cif::iequals(bondType, "poly")) return BondType::poly;
if (cif::iequals(bondType, "delo")) return BondType::delo;
if (cif::iequals(bondType, "pi")) return BondType::pi;
throw std::invalid_argument("Invalid bondType: " + bondType);
}
// --------------------------------------------------------------------
// Compound helper classes
struct CompoundAtomLess
{
bool operator()(const CompoundAtom& a, const CompoundAtom& b) const
bool operator()(const CompoundAtom &a, const CompoundAtom &b) const
{
int d = a.id.compare(b.id);
if (d == 0)
......@@ -65,512 +95,59 @@ struct CompoundAtomLess
struct CompoundBondLess
{
bool operator()(const CompoundBond& a, const CompoundBond& b) const
bool operator()(const CompoundBond &a, const CompoundBond &b) const
{
int d = a.atomID[0].compare(b.atomID[0]);
if (d == 0)
d = a.atomID[1].compare(b.atomID[1]);
if (d == 0)
d = a.type - b.type;
d = static_cast<int>(a.type) - static_cast<int>(b.type);
return d < 0;
}
};
// --------------------------------------------------------------------
// Isomers in the ccp4 dictionary
struct IsomerSet
{
std::vector<std::string> compounds;
};
struct IsomerSets
{
std::vector<IsomerSet> isomers;
};
class IsomerDB
{
public:
static IsomerDB& instance();
size_t count(const std::string& compound) const;
const std::vector<std::string>& operator[](const std::string& compound) const;
private:
IsomerDB();
IsomerSets mData;
};
IsomerDB::IsomerDB()
{
#if defined(CACHE_DIR)
fs::path isomersFile = fs::path(CACHE_DIR) / "isomers.txt";
if (not fs::exists(isomersFile))
std::cerr << "Could not locate isomers.txt in " CACHE_DIR << std::endl;
else
{
std::ifstream is(isomersFile);
std::string line;
while (std::getline(is, line))
{
IsomerSet compounds;
ba::split(compounds.compounds, line, ba::is_any_of(":"));
if (not compounds.compounds.empty())
mData.isomers.emplace_back(std::move(compounds));
}
}
#endif
}
IsomerDB& IsomerDB::instance()
{
static IsomerDB sInstance;
return sInstance;
}
size_t IsomerDB::count(const std::string& compound) const
{
size_t n = 0;
for (auto& d: mData.isomers)
{
if (find(d.compounds.begin(), d.compounds.end(), compound) != d.compounds.end())
++n;
}
return n;
}
const std::vector<std::string>& IsomerDB::operator[](const std::string& compound) const
{
for (auto& d: mData.isomers)
{
if (find(d.compounds.begin(), d.compounds.end(), compound) != d.compounds.end())
return d.compounds;
}
throw std::runtime_error("No isomer set found containing " + compound);
}
// --------------------------------------------------------------------
// Brute force comparison of two structures, when they are isomers the
// mapping between the atoms of both is returned
// This is not an optimal solution, but it works good enough for now
struct Node
{
std::string id;
AtomType symbol;
std::vector<std::tuple<size_t,BondType>> links;
size_t hydrogens = 0;
};
// Check to see if the nodes a[iA] and b[iB] are the start of a similar sub structure
bool SubStructuresAreIsomeric(
const std::vector<Node>& a, const std::vector<Node>& b, size_t iA, size_t iB,
std::vector<bool> visitedA, std::vector<bool> visitedB, std::vector<std::tuple<std::string,std::string>>& outMapping)
{
auto& na = a[iA];
auto& nb = b[iB];
size_t N = na.links.size();
assert(na.symbol == nb.symbol);
assert(nb.links.size() == N);
// we're optimistic today
bool result = true;
visitedA[iA] = true;
visitedB[iB] = true;
// we now have two sets of links to compare.
// Compare each permutation of the second set of indices with the first
std::vector<size_t> ilb(N);
iota(ilb.begin(), ilb.end(), 0);
for (;;)
{
result = true;
std::vector<std::tuple<std::string,std::string>> m;
for (size_t i = 0; result and i < N; ++i)
{
size_t lA, lB;
BondType typeA, typeB;
std::tie(lA, typeA) = na.links[i]; assert(lA < a.size());
std::tie(lB, typeB) = nb.links[ilb[i]]; assert(lB < b.size());
if (typeA != typeB or visitedA[lA] != visitedB[lB])
{
result = false;
break;
}
auto& la = a[lA];
auto& lb = b[lB];
if (la.symbol != lb.symbol or la.hydrogens != lb.hydrogens or la.links.size() != lb.links.size())
{
result = false;
break;
}
if (not visitedA[lA])
result = SubStructuresAreIsomeric(a, b, lA, lB, visitedA, visitedB, m);
}
if (result)
{
outMapping.insert(outMapping.end(), m.begin(), m.end());
break;
}
if (not next_permutation(ilb.begin(), ilb.end()))
break;
}
if (result and na.id != nb.id)
outMapping.emplace_back(na.id, nb.id);
return result;
}
bool StructuresAreIsomeric(std::vector<CompoundAtom> atomsA, const std::vector<CompoundBond>& bondsA,
std::vector<CompoundAtom> atomsB, const std::vector<CompoundBond>& bondsB,
std::vector<std::tuple<std::string,std::string>>& outMapping)
{
assert(atomsA.size() == atomsB.size());
assert(bondsA.size() == bondsB.size());
std::vector<Node> a, b;
std::map<std::string,size_t> ma, mb;
for (auto& atomA: atomsA)
{
ma[atomA.id] = a.size();
a.push_back({atomA.id, atomA.typeSymbol});
}
for (auto& bondA: bondsA)
{
size_t atom1 = ma.at(bondA.atomID[0]);
size_t atom2 = ma.at(bondA.atomID[1]);
if (a[atom2].symbol == H)
a[atom1].hydrogens += 1;
else
a[atom1].links.emplace_back(atom2, bondA.type);
if (a[atom1].symbol == H)
a[atom2].hydrogens += 1;
else
a[atom2].links.emplace_back(atom1, bondA.type);
}
for (auto& atomB: atomsB)
{
mb[atomB.id] = b.size();
b.push_back({atomB.id, atomB.typeSymbol});
}
for (auto& bondB: bondsB)
{
size_t atom1 = mb.at(bondB.atomID[0]);
size_t atom2 = mb.at(bondB.atomID[1]);
if (b[atom2].symbol == H)
b[atom1].hydrogens += 1;
else
b[atom1].links.emplace_back(atom2, bondB.type);
if (b[atom1].symbol == H)
b[atom2].hydrogens += 1;
else
b[atom2].links.emplace_back(atom1, bondB.type);
}
size_t N = atomsA.size();
size_t ia = 0;
bool result = false;
// try each atom in B to see if it can be traced to be similar to A starting at zero
for (size_t ib = 0; ib < N; ++ib)
{
if (b[ib].symbol != a[ia].symbol or a[ia].hydrogens != b[ib].hydrogens or a[ia].links.size() != b[ib].links.size())
continue;
std::vector<bool> va(N, false), vb(N, false);
if (SubStructuresAreIsomeric(a, b, ia, ib, va, vb, outMapping))
{
result = true;
break;
}
}
return result;
}
// --------------------------------------------------------------------
// Compound
Compound::Compound(const std::string& file, const std::string& id,
const std::string& name, const std::string& group)
: mID(id), mName(name), mGroup(group)
Compound::Compound(cif::Datablock &db)
{
try
{
mCF.load(file);
// locate the datablock
auto& db = mCF["comp_" + id];
auto& compoundAtoms = db["chem_comp_atom"];
for (auto row: compoundAtoms)
{
std::string id, symbol, energy;
float charge;
cif::tie(id, symbol, energy, charge) = row.get("atom_id", "type_symbol", "type_energy", "partial_charge");
mAtoms.push_back({
id, AtomTypeTraits(symbol).type(), energy, charge
});
}
sort(mAtoms.begin(), mAtoms.end(), CompoundAtomLess());
auto& compBonds = db["chem_comp_bond"];
for (auto row: compBonds)
{
CompoundBond b;
std::string type, aromatic;
cif::tie(b.atomID[0], b.atomID[1], type, b.distance, b.esd) =
row.get("atom_id_1", "atom_id_2", "type", "value_dist", "value_dist_esd");
using cif::iequals;
if (iequals(type, "single") or iequals(type, "sing")) b.type = singleBond;
else if (iequals(type, "double") or iequals(type, "doub")) b.type = doubleBond;
else if (iequals(type, "triple") or iequals(type, "trip")) b.type = tripleBond;
else if (iequals(type, "deloc") or iequals(type, "aromat") or iequals(type, "aromatic"))
b.type = delocalizedBond;
else
{
if (cif::VERBOSE)
std::cerr << "Unimplemented chem_comp_bond.type " << type << " in " << id << std::endl;
b.type = singleBond;
}
if (b.atomID[0] > b.atomID[1])
swap(b.atomID[0], b.atomID[1]);
mBonds.push_back(b);
}
sort(mBonds.begin(), mBonds.end(), CompoundBondLess());
auto &chemComp = db["chem_comp"];
for (auto row: db["chem_comp_angle"])
{
CompoundAngle a;
cif::tie(a.atomID[0], a.atomID[1], a.atomID[2], a.angle, a.esd) =
row.get("atom_id_1", "atom_id_2", "atom_id_3", "value_angle", "value_angle_esd");
mAngles.push_back(a);
}
if (chemComp.size() != 1)
throw std::runtime_error("Invalid compound file, chem_comp should contain a single row");
for (auto row: db["chem_comp_tor"])
{
CompoundTorsion a;
cif::tie(a.atomID[0], a.atomID[1], a.atomID[2], a.atomID[3], a.angle, a.esd, a.period) =
row.get("atom_id_1", "atom_id_2", "atom_id_3", "atom_id_4", "value_angle", "value_angle_esd", "period");
mTorsions.push_back(a);
}
cif::tie(mID, mName, mType, mFormula, mFormulaWeight, mFormalCharge) =
chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge");
for (auto row: db["chem_comp_chir"])
{
CompoundChiralCentre cc;
std::string volumeSign;
cif::tie(cc.id, cc.atomIDCentre, cc.atomID[0],
cc.atomID[1], cc.atomID[2], volumeSign) =
row.get("id", "atom_id_centre", "atom_id_1",
"atom_id_2", "atom_id_3", "volume_sign");
if (volumeSign == "negativ" or volumeSign == "negative")
cc.volumeSign = negativ;
else if (volumeSign == "positiv" or volumeSign == "positive")
cc.volumeSign = positiv;
else if (volumeSign == "both")
cc.volumeSign = both;
else
{
if (cif::VERBOSE)
std::cerr << "Unimplemented chem_comp_chir.volume_sign " << volumeSign << " in " << id << std::endl;
continue;
}
mChiralCentres.push_back(cc);
}
auto& compPlanes = db["chem_comp_plane_atom"];
for (auto row: compPlanes)
{
std::string atom_id, plane_id;
float esd;
cif::tie(atom_id, plane_id, esd) = row.get("atom_id", "plane_id", "dist_esd");
auto i = find_if(mPlanes.begin(), mPlanes.end(), [&](auto& p) { return p.id == plane_id;});
if (i == mPlanes.end())
mPlanes.emplace_back(CompoundPlane{ plane_id, { atom_id }, esd });
else
i->atomID.push_back(atom_id);
}
}
catch (const std::exception& ex)
auto &chemCompAtom = db["chem_comp_atom"];
for (auto row: chemCompAtom)
{
std::cerr << "Error loading ccp4 file for " << id << " from file " << file << std::endl;
throw;
CompoundAtom atom;
std::string typeSymbol;
cif::tie(atom.id, typeSymbol, atom.charge, atom.aromatic, atom.leavingAtom, atom.stereoConfig, atom.x, atom.y, atom.z) =
row.get("id", "type_symbol", "charge", "pdbx_aromatic_flag", "pdbx_leaving_atom_flag", "pdbx_stereo_config",
"model_Cartn_x", "model_Cartn_y", "model_Cartn_z");
atom.typeSymbol = AtomTypeTraits(typeSymbol).type();
mAtoms.push_back(std::move(atom));
}
}
std::string Compound::formula() const
{
std::string result;
std::map<std::string,uint32_t> atoms;
float chargeSum = 0;
for (auto r: mAtoms)
{
atoms[AtomTypeTraits(r.typeSymbol).symbol()] += 1;
chargeSum += r.partialCharge;
}
auto c = atoms.find("C");
if (c != atoms.end())
{
result = "C";
if (c->second > 1)
result += std::to_string(c->second);
atoms.erase(c);
auto h = atoms.find("H");
if (h != atoms.end())
{
result += " H";
if (h->second > 1)
result += std::to_string(h->second);
atoms.erase(h);
}
}
for (auto a: atoms)
auto &chemCompBond = db["chem_comp_bond"];
for (auto row: chemCompBond)
{
if (not result.empty())
result += ' ';
result += a.first;
if (a.second > 1)
result += std::to_string(a.second);
CompoundBond bond;
std::string valueOrder;
cif::tie(bond.atomID[0], bond.atomID[1], valueOrder, bond.aromatic, bond.stereoConfig)
= row.get("atom_id_1", "atom_id_2", "value_order", "pdbx_aromatic_flag", "pdbx_stereo_config");
bond.type = from_string(valueOrder);
mBonds.push_back(std::move(bond));
}
int charge = lrint(chargeSum);
if (charge != 0)
result += ' ' + std::to_string(charge);
return result;
}
float Compound::formulaWeight() const
{
float result = 0;
for (auto r: mAtoms)
result += AtomTypeTraits(r.typeSymbol).weight();
return result;
}
int Compound::charge() const
{
float result = 0;
for (auto r: mAtoms)
result += r.partialCharge;
return lrint(result);
}
std::string Compound::type() const
{
std::string result;
// known groups are (counted from ccp4 monomer dictionary)
// D-pyranose
// DNA
// L-PEPTIDE LINKING
// L-SACCHARIDE
// L-peptide
// L-pyranose
// M-peptide
// NON-POLYMER
// P-peptide
// RNA
// furanose
// non-polymer
// non_polymer
// peptide
// pyranose
// saccharide
if (cif::iequals(mID, "gly"))
result = "peptide linking";
else if (cif::iequals(mGroup, "l-peptide") or cif::iequals(mGroup, "L-peptide linking") or cif::iequals(mGroup, "peptide"))
result = "L-peptide linking";
else if (cif::iequals(mGroup, "DNA"))
result = "DNA linking";
else if (cif::iequals(mGroup, "RNA"))
result = "RNA linking";
// else
// result = mGroup;
return result;
}
bool Compound::isWater() const
{
return mID == "HOH" or mID == "H2O";
}
bool Compound::isSugar() const
{
return cif::iequals(mGroup, "furanose") or cif::iequals(mGroup, "pyranose");
}
CompoundAtom Compound::getAtomByID(const std::string& atomID) const
CompoundAtom Compound::getAtomByID(const std::string &atomID) const
{
CompoundAtom result = {};
for (auto& a: mAtoms)
for (auto &a : mAtoms)
{
if (a.id == atomID)
{
......@@ -579,13 +156,13 @@ CompoundAtom Compound::getAtomByID(const std::string& atomID) const
}
}
if (result.id != atomID)
if (result.id != atomID)
throw std::out_of_range("No atom " + atomID + " in Compound " + mID);
return result;
}
const Compound* Compound::create(const std::string& id)
const Compound *Compound::create(const std::string &id)
{
auto result = CompoundFactory::instance().get(id);
if (result == nullptr)
......@@ -593,149 +170,44 @@ const Compound* Compound::create(const std::string& id)
return result;
}
bool Compound::atomsBonded(const std::string& atomId_1, const std::string& atomId_2) const
bool Compound::atomsBonded(const std::string &atomId_1, const std::string &atomId_2) const
{
auto i = find_if(mBonds.begin(), mBonds.end(),
[&](const CompoundBond& b)
{
return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2)
or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
[&](const CompoundBond &b) {
return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2) or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
});
return i != mBonds.end();
}
float Compound::atomBondValue(const std::string& atomId_1, const std::string& atomId_2) const
{
auto i = find_if(mBonds.begin(), mBonds.end(),
[&](const CompoundBond& b)
{
return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2)
or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
});
return i != mBonds.end() ? i->distance : 0;
}
// float Compound::atomBondValue(const std::string &atomId_1, const std::string &atomId_2) const
// {
// auto i = find_if(mBonds.begin(), mBonds.end(),
// [&](const CompoundBond &b) {
// return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2) or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
// });
bool Compound::isIsomerOf(const Compound& c) const
{
bool result = false;
for (;;)
{
// easy tests first
if (mID == c.mID)
{
result = true;
break;
}
if (mAtoms.size() != c.mAtoms.size())
break;
if (mBonds.size() != c.mBonds.size())
break;
if (mChiralCentres.size() != c.mChiralCentres.size())
break;
// return i != mBonds.end() ? i->distance : 0;
// }
// same number of atoms of each type?
std::map<AtomType,int> aTypeCount, bTypeCount;
bool sameAtomNames = true;
for (size_t i = 0; i < mAtoms.size(); ++i)
{
auto& a = mAtoms[i];
auto& b = c.mAtoms[i];
aTypeCount[a.typeSymbol] += 1;
bTypeCount[b.typeSymbol] += 1;
if (a.id != b.id or a.typeSymbol != b.typeSymbol)
sameAtomNames = false;
}
if (not sameAtomNames and aTypeCount != bTypeCount)
break;
bool sameBonds = sameAtomNames;
for (size_t i = 0; sameBonds and i < mBonds.size(); ++i)
{
sameBonds =
mBonds[i].atomID[0] == c.mBonds[i].atomID[0] and
mBonds[i].atomID[1] == c.mBonds[i].atomID[1] and
mBonds[i].type == c.mBonds[i].type;
}
if (sameBonds)
{
result = true;
break;
}
// implement rest of tests
std::vector<std::tuple<std::string,std::string>> mapping;
result = StructuresAreIsomeric(mAtoms, mBonds, c.mAtoms, c.mBonds, mapping);
if (cif::VERBOSE and result)
{
for (auto& m: mapping)
std::cerr << " " << std::get<0>(m) << " => " << std::get<1>(m) << std::endl;
}
break;
}
return result;
}
std::vector<std::tuple<std::string,std::string>> Compound::mapToIsomer(const Compound& c) const
{
std::vector<std::tuple<std::string,std::string>> result;
bool check = StructuresAreIsomeric(mAtoms, mBonds, c.mAtoms, c.mBonds, result);
if (not check)
throw std::runtime_error("Compounds " + id() + " and " + c.id() + " are not isomers in call to mapToIsomer");
return result;
}
// float Compound::bondAngle(const std::string &atomId_1, const std::string &atomId_2, const std::string &atomId_3) const
// {
// float result = nanf("1");
std::vector<std::string> Compound::isomers() const
{
std::vector<std::string> result;
auto& db = IsomerDB::instance();
if (db.count(mID))
{
result = db[mID];
auto i = find(result.begin(), result.end(), mID);
assert(i != result.end());
result.erase(i);
}
return result;
}
// for (auto &a : mAngles)
// {
// if (not(a.atomID[1] == atomId_2 and
// ((a.atomID[0] == atomId_1 and a.atomID[2] == atomId_3) or
// (a.atomID[2] == atomId_1 and a.atomID[0] == atomId_3))))
// continue;
float Compound::bondAngle(const std::string& atomId_1, const std::string& atomId_2, const std::string& atomId_3) const
{
float result = nanf("1");
for (auto& a: mAngles)
{
if (not (a.atomID[1] == atomId_2 and
((a.atomID[0] == atomId_1 and a.atomID[2] == atomId_3) or
(a.atomID[2] == atomId_1 and a.atomID[0] == atomId_3))))
continue;
result = a.angle;
break;
}
return result;
}
// result = a.angle;
// break;
// }
// return result;
// }
//static float calcC(float a, float b, float alpha)
//{
......@@ -743,288 +215,85 @@ float Compound::bondAngle(const std::string& atomId_1, const std::string& atomId
// float d = sqrt(b * b - f * f);
// float e = a - d;
// float c = sqrt(f * f + e * e);
//
//
// return c;
//}
float Compound::chiralVolume(const std::string& centreID) const
{
float result = 0;
for (auto& cv: mChiralCentres)
{
if (cv.id != centreID)
continue;
// calculate the expected chiral volume
// the edges
float a = atomBondValue(cv.atomIDCentre, cv.atomID[0]);
float b = atomBondValue(cv.atomIDCentre, cv.atomID[1]);
float c = atomBondValue(cv.atomIDCentre, cv.atomID[2]);
// the angles for the top of the tetrahedron
float alpha = bondAngle(cv.atomID[0], cv.atomIDCentre, cv.atomID[1]);
float beta = bondAngle(cv.atomID[1], cv.atomIDCentre, cv.atomID[2]);
float gamma = bondAngle(cv.atomID[2], cv.atomIDCentre, cv.atomID[0]);
float cosa = cos(alpha * kPI / 180);
float cosb = cos(beta * kPI / 180);
float cosc = cos(gamma * kPI / 180);
result = (a * b * c * sqrt(1 + 2 * cosa * cosb * cosc - (cosa * cosa) - (cosb * cosb) - (cosc * cosc))) / 6;
if (cv.volumeSign == negativ)
result = -result;
break;
}
return result;
}
// float Compound::chiralVolume(const std::string &centreID) const
// {
// float result = 0;
// --------------------------------------------------------------------
// for (auto &cv : mChiralCentres)
// {
// if (cv.id != centreID)
// continue;
Link::Link(cif::Datablock& db)
{
mID = db.getName();
auto& linkBonds = db["chem_link_bond"];
for (auto row: linkBonds)
{
LinkBond b;
std::string type, aromatic;
cif::tie(b.atom[0].compID, b.atom[0].atomID,
b.atom[1].compID, b.atom[1].atomID, type, b.distance, b.esd) =
row.get("atom_1_comp_id", "atom_id_1", "atom_2_comp_id", "atom_id_2",
"type", "value_dist", "value_dist_esd");
using cif::iequals;
if (iequals(type, "single") or iequals(type, "sing")) b.type = singleBond;
else if (iequals(type, "double") or iequals(type, "doub")) b.type = doubleBond;
else if (iequals(type, "triple") or iequals(type, "trip")) b.type = tripleBond;
else if (iequals(type, "deloc") or iequals(type, "aromat") or iequals(type, "aromatic"))
b.type = delocalizedBond;
else
{
if (cif::VERBOSE)
std::cerr << "Unimplemented chem_link_bond.type " << type << " in " << mID << std::endl;
b.type = singleBond;
}
// if (b.atom[0] > b.atom[1])
// swap(b.atom[0], b.atom[1]);
mBonds.push_back(b);
}
// sort(mBonds.begin(), mBonds.end(), LinkBondLess());
// // calculate the expected chiral volume
auto& linkAngles = db["chem_link_angle"];
for (auto row: linkAngles)
{
LinkAngle a;
cif::tie(a.atom[0].compID, a.atom[0].atomID, a.atom[1].compID, a.atom[1].atomID,
a.atom[2].compID, a.atom[2].atomID, a.angle, a.esd) =
row.get("atom_1_comp_id", "atom_id_1", "atom_2_comp_id", "atom_id_2",
"atom_3_comp_id", "atom_id_3", "value_angle", "value_angle_esd");
mAngles.push_back(a);
}
// // the edges
for (auto row: db["chem_link_tor"])
{
LinkTorsion a;
cif::tie(a.atom[0].compID, a.atom[0].atomID, a.atom[1].compID, a.atom[1].atomID,
a.atom[2].compID, a.atom[2].atomID, a.atom[3].compID, a.atom[3].atomID,
a.angle, a.esd, a.period) =
row.get("atom_1_comp_id", "atom_id_1", "atom_2_comp_id", "atom_id_2",
"atom_3_comp_id", "atom_id_3", "atom_4_comp_id", "atom_id_4",
"value_angle", "value_angle_esd", "period");
mTorsions.push_back(a);
}
// float a = atomBondValue(cv.atomIDCentre, cv.atomID[0]);
// float b = atomBondValue(cv.atomIDCentre, cv.atomID[1]);
// float c = atomBondValue(cv.atomIDCentre, cv.atomID[2]);
auto& linkChir = db["chem_link_chir"];
for (auto row: linkChir)
{
LinkChiralCentre cc;
std::string volumeSign;
cif::tie(cc.id, cc.atomCentre.compID, cc.atomCentre.atomID,
cc.atom[0].compID, cc.atom[0].atomID,
cc.atom[1].compID, cc.atom[1].atomID,
cc.atom[2].compID, cc.atom[2].atomID,
volumeSign) =
row.get("id", "atom_centre_comp_id", "atom_id_centre",
"atom_1_comp_id", "atom_id_1", "atom_2_comp_id", "atom_id_2",
"atom_3_comp_id", "atom_id_3", "volume_sign");
if (volumeSign == "negativ" or volumeSign == "negative")
cc.volumeSign = negativ;
else if (volumeSign == "positiv" or volumeSign == "positive")
cc.volumeSign = positiv;
else if (volumeSign == "both")
cc.volumeSign = both;
else
{
if (cif::VERBOSE)
std::cerr << "Unimplemented chem_link_chir.volume_sign " << volumeSign << " in " << mID << std::endl;
continue;
}
mChiralCentres.push_back(cc);
}
// // the angles for the top of the tetrahedron
auto& linkPlanes = db["chem_link_plane"];
for (auto row: linkPlanes)
{
int compID;
std::string atomID, planeID;
float esd;
cif::tie(planeID, compID, atomID, esd) = row.get("plane_id", "atom_comp_id", "atom_id", "dist_esd");
auto i = find_if(mPlanes.begin(), mPlanes.end(), [&](auto& p) { return p.id == planeID;});
if (i == mPlanes.end())
{
std::vector<LinkAtom> atoms{LinkAtom{ compID, atomID }};
mPlanes.emplace_back(LinkPlane{ planeID, move(atoms), esd });
}
else
i->atoms.push_back({ compID, atomID });
}
}
// float alpha = bondAngle(cv.atomID[0], cv.atomIDCentre, cv.atomID[1]);
// float beta = bondAngle(cv.atomID[1], cv.atomIDCentre, cv.atomID[2]);
// float gamma = bondAngle(cv.atomID[2], cv.atomIDCentre, cv.atomID[0]);
const Link& Link::create(const std::string& id)
{
auto result = CompoundFactory::instance().getLink(id);
if (result == nullptr)
result = CompoundFactory::instance().createLink(id);
if (result == nullptr)
throw std::runtime_error("Link with id " + id + " not found");
return *result;
}
// float cosa = cos(alpha * kPI / 180);
// float cosb = cos(beta * kPI / 180);
// float cosc = cos(gamma * kPI / 180);
Link::~Link()
{
}
// result = (a * b * c * sqrt(1 + 2 * cosa * cosb * cosc - (cosa * cosa) - (cosb * cosb) - (cosc * cosc))) / 6;
float Link::atomBondValue(const LinkAtom& atom1, const LinkAtom& atom2) const
{
auto i = find_if(mBonds.begin(), mBonds.end(),
[&](auto& b)
{
return (b.atom[0] == atom1 and b.atom[1] == atom2)
or (b.atom[0] == atom2 and b.atom[1] == atom1);
});
return i != mBonds.end() ? i->distance : 0;
}
// if (cv.volumeSign == negativ)
// result = -result;
float Link::bondAngle(const LinkAtom& atom1, const LinkAtom& atom2, const LinkAtom& atom3) const
{
float result = nanf("1");
for (auto& a: mAngles)
{
if (not (a.atom[1] == atom2 and
((a.atom[0] == atom1 and a.atom[2] == atom3) or
(a.atom[2] == atom1 and a.atom[0] == atom3))))
continue;
result = a.angle;
break;
}
return result;
}
// break;
// }
float Link::chiralVolume(const std::string& centreID) const
{
float result = 0;
for (auto& cv: mChiralCentres)
{
if (cv.id != centreID)
continue;
// calculate the expected chiral volume
// the edges
float a = atomBondValue(cv.atomCentre, cv.atom[0]);
float b = atomBondValue(cv.atomCentre, cv.atom[1]);
float c = atomBondValue(cv.atomCentre, cv.atom[2]);
// the angles for the top of the tetrahedron
float alpha = bondAngle(cv.atom[0], cv.atomCentre, cv.atom[1]);
float beta = bondAngle(cv.atom[1], cv.atomCentre, cv.atom[2]);
float gamma = bondAngle(cv.atom[2], cv.atomCentre, cv.atom[0]);
float cosa = cos(alpha * kPI / 180);
float cosb = cos(beta * kPI / 180);
float cosc = cos(gamma * kPI / 180);
result = (a * b * c * sqrt(1 + 2 * cosa * cosb * cosc - (cosa * cosa) - (cosb * cosb) - (cosc * cosc))) / 6;
if (cv.volumeSign == negativ)
result = -result;
break;
}
return result;
}
// return result;
// }
// --------------------------------------------------------------------
// a factory class to generate compounds
const std::map<std::string,char> kAAMap{
{ "ALA", 'A' },
{ "ARG", 'R' },
{ "ASN", 'N' },
{ "ASP", 'D' },
{ "CYS", 'C' },
{ "GLN", 'Q' },
{ "GLU", 'E' },
{ "GLY", 'G' },
{ "HIS", 'H' },
{ "ILE", 'I' },
{ "LEU", 'L' },
{ "LYS", 'K' },
{ "MET", 'M' },
{ "PHE", 'F' },
{ "PRO", 'P' },
{ "SER", 'S' },
{ "THR", 'T' },
{ "TRP", 'W' },
{ "TYR", 'Y' },
{ "VAL", 'V' },
{ "GLX", 'Z' },
{ "ASX", 'B' }
};
const std::map<std::string,char> kBaseMap{
{ "A", 'A' },
{ "C", 'C' },
{ "G", 'G' },
{ "T", 'T' },
{ "U", 'U' },
{ "DA", 'A' },
{ "DC", 'C' },
{ "DG", 'G' },
{ "DT", 'T' }
};
const std::map<std::string, char> kAAMap{
{"ALA", 'A'},
{"ARG", 'R'},
{"ASN", 'N'},
{"ASP", 'D'},
{"CYS", 'C'},
{"GLN", 'Q'},
{"GLU", 'E'},
{"GLY", 'G'},
{"HIS", 'H'},
{"ILE", 'I'},
{"LEU", 'L'},
{"LYS", 'K'},
{"MET", 'M'},
{"PHE", 'F'},
{"PRO", 'P'},
{"SER", 'S'},
{"THR", 'T'},
{"TRP", 'W'},
{"TYR", 'Y'},
{"VAL", 'V'},
{"GLX", 'Z'},
{"ASX", 'B'}};
const std::map<std::string, char> kBaseMap{
{"A", 'A'},
{"C", 'C'},
{"G", 'G'},
{"T", 'T'},
{"U", 'U'},
{"DA", 'A'},
{"DC", 'C'},
{"DG", 'G'},
{"DT", 'T'}};
// --------------------------------------------------------------------
......@@ -1033,20 +302,17 @@ class CompoundFactoryImpl
public:
CompoundFactoryImpl();
CompoundFactoryImpl(const std::string& file, CompoundFactoryImpl* next);
CompoundFactoryImpl(const std::string &file, CompoundFactoryImpl *next);
~CompoundFactoryImpl()
{
delete mNext;
}
Compound* get(std::string id);
Compound* create(std::string id);
Compound *get(std::string id);
Compound *create(std::string id);
const Link* getLink(std::string id);
const Link* createLink(std::string id);
CompoundFactoryImpl* pop()
CompoundFactoryImpl *pop()
{
auto result = mNext;
mNext = nullptr;
......@@ -1054,93 +320,65 @@ class CompoundFactoryImpl
return result;
}
std::string unalias(const std::string& resName) const
{
std::string result = resName;
auto& e = const_cast<cif::File&>(mFile)["comp_synonym_list"];
for (auto& synonym: e["chem_comp_synonyms"])
{
if (ba::iequals(synonym["comp_alternative_id"].as<std::string>(), resName) == false)
continue;
result = synonym["comp_id"].as<std::string>();
ba::trim(result);
break;
}
if (result.empty() and mNext)
result = mNext->unalias(resName);
return result;
}
bool isKnownPeptide(const std::string& resName)
bool isKnownPeptide(const std::string &resName)
{
return mKnownPeptides.count(resName) or
(mNext != nullptr and mNext->isKnownPeptide(resName));
(mNext != nullptr and mNext->isKnownPeptide(resName));
}
bool isKnownBase(const std::string& resName)
bool isKnownBase(const std::string &resName)
{
return mKnownBases.count(resName) or
(mNext != nullptr and mNext->isKnownBase(resName));
(mNext != nullptr and mNext->isKnownBase(resName));
}
private:
std::shared_timed_mutex mMutex;
std::string mPath;
std::vector<Compound*> mCompounds;
std::vector<const Link*> mLinks;
std::set<std::string> mKnownPeptides;
std::set<std::string> mKnownBases;
std::set<std::string> mMissing;
cif::File mFile;
CompoundFactoryImpl* mNext = nullptr;
std::shared_timed_mutex mMutex;
std::string mPath;
std::vector<Compound *> mCompounds;
std::set<std::string> mKnownPeptides;
std::set<std::string> mKnownBases;
std::set<std::string> mMissing;
CompoundFactoryImpl *mNext = nullptr;
};
// --------------------------------------------------------------------
CompoundFactoryImpl::CompoundFactoryImpl()
{
for (const auto&[ key, value]: kAAMap)
for (const auto &[key, value] : kAAMap)
mKnownPeptides.insert(key);
for (const auto&[ key, value]: kBaseMap)
for (const auto &[key, value] : kBaseMap)
mKnownBases.insert(key);
}
CompoundFactoryImpl::CompoundFactoryImpl(const std::string& file, CompoundFactoryImpl* next)
: mPath(file), mFile(file), mNext(next)
CompoundFactoryImpl::CompoundFactoryImpl(const std::string &file, CompoundFactoryImpl *next)
: mPath(file)
, mNext(next)
{
const std::regex peptideRx("(?:[lmp]-)?peptide", std::regex::icase);
cif::File cifFile(file);
if (not cifFile.isValid())
throw std::runtime_error("Invalid compound file");
auto& cat = mFile.firstDatablock()["chem_comp"];
for (auto& chemComp: cat)
for (auto &db: cifFile)
{
std::string group, threeLetterCode;
cif::tie(group, threeLetterCode) = chemComp.get("group", "three_letter_code");
if (std::regex_match(group, peptideRx))
mKnownPeptides.insert(threeLetterCode);
else if (ba::iequals(group, "DNA") or ba::iequals(group, "RNA"))
mKnownBases.insert(threeLetterCode);
auto compound = std::make_unique<Compound>(db);
mCompounds.push_back(compound.release());
}
}
Compound* CompoundFactoryImpl::get(std::string id)
Compound *CompoundFactoryImpl::get(std::string id)
{
std::shared_lock lock(mMutex);
ba::to_upper(id);
Compound* result = nullptr;
for (auto cmp: mCompounds)
Compound *result = nullptr;
for (auto cmp : mCompounds)
{
if (cmp->id() == id)
{
......@@ -1148,119 +386,72 @@ Compound* CompoundFactoryImpl::get(std::string id)
break;
}
}
if (result == nullptr and mNext != nullptr)
result = mNext->get(id);
return result;
}
Compound* CompoundFactoryImpl::create(std::string id)
Compound *CompoundFactoryImpl::create(std::string id)
{
ba::to_upper(id);
Compound* result = get(id);
if (result == nullptr and mMissing.count(id) == 0 and not mFile.empty())
{
std::unique_lock lock(mMutex);
Compound *result = get(id);
// if (result == nullptr and mMissing.count(id) == 0 and not mFile.empty())
// {
// std::unique_lock lock(mMutex);
auto& cat = mFile.firstDatablock()["chem_comp"];
// auto &cat = mFile.firstDatablock()["chem_comp"];
auto rs = cat.find(cif::Key("three_letter_code") == id);
if (not rs.empty())
{
auto row = rs.front();
std::string name, group;
uint32_t numberAtomsAll, numberAtomsNh;
cif::tie(name, group, numberAtomsAll, numberAtomsNh) =
row.get("name", "group", "number_atoms_all", "number_atoms_nh");
ba::trim(name);
ba::trim(group);
if (mFile.get("comp_" + id) == nullptr)
{
auto clibd_mon = fs::path(getenv("CLIBD_MON"));
fs::path resFile = clibd_mon / ba::to_lower_copy(id.substr(0, 1)) / (id + ".cif");
if (not fs::exists(resFile) and (id == "COM" or id == "CON" or "PRN")) // seriously...
resFile = clibd_mon / ba::to_lower_copy(id.substr(0, 1)) / (id + '_' + id + ".cif");
if (not fs::exists(resFile))
mMissing.insert(id);
else
{
mCompounds.push_back(new Compound(resFile.string(), id, name, group));
result = mCompounds.back();
}
}
else
{
mCompounds.push_back(new Compound(mPath, id, name, group));
result = mCompounds.back();
}
}
if (result == nullptr and mNext != nullptr)
result = mNext->create(id);
}
return result;
}
// auto rs = cat.find(cif::Key("three_letter_code") == id);
const Link* CompoundFactoryImpl::getLink(std::string id)
{
std::shared_lock lock(mMutex);
// if (not rs.empty())
// {
// auto row = rs.front();
ba::to_upper(id);
// std::string name, group;
// uint32_t numberAtomsAll, numberAtomsNh;
// cif::tie(name, group, numberAtomsAll, numberAtomsNh) =
// row.get("name", "group", "number_atoms_all", "number_atoms_nh");
const Link* result = nullptr;
for (auto link: mLinks)
{
if (link->id() == id)
{
result = link;
break;
}
}
if (result == nullptr and mNext != nullptr)
result = mNext->getLink(id);
return result;
}
// ba::trim(name);
// ba::trim(group);
const Link* CompoundFactoryImpl::createLink(std::string id)
{
ba::to_upper(id);
// if (mFile.get("comp_" + id) == nullptr)
// {
// auto clibd_mon = fs::path(getenv("CLIBD_MON"));
const Link* result = getLink(id);
// fs::path resFile = clibd_mon / ba::to_lower_copy(id.substr(0, 1)) / (id + ".cif");
if (result == nullptr)
{
std::unique_lock lock(mMutex);
// if (not fs::exists(resFile) and (id == "COM" or id == "CON" or "PRN")) // seriously...
// resFile = clibd_mon / ba::to_lower_copy(id.substr(0, 1)) / (id + '_' + id + ".cif");
// if (not fs::exists(resFile))
// mMissing.insert(id);
// else
// {
// mCompounds.push_back(new Compound(resFile.string(), id, name, group));
// result = mCompounds.back();
// }
// }
// else
// {
// mCompounds.push_back(new Compound(mPath, id, name, group));
// result = mCompounds.back();
// }
// }
// if (result == nullptr and mNext != nullptr)
// result = mNext->create(id);
// }
auto db = mFile.get("link_" + id);
if (db != nullptr)
{
result = new Link(*db);
mLinks.push_back(result);
}
if (result == nullptr and mNext != nullptr)
result = mNext->createLink(id);
}
return result;
}
// --------------------------------------------------------------------
CompoundFactory* CompoundFactory::sInstance;
CompoundFactory *CompoundFactory::sInstance;
thread_local std::unique_ptr<CompoundFactory> CompoundFactory::tlInstance;
bool CompoundFactory::sUseThreadLocalInstance;
......@@ -1272,21 +463,6 @@ void CompoundFactory::init(bool useThreadLocalInstanceOnly)
CompoundFactory::CompoundFactory()
: mImpl(nullptr)
{
const char* clibdMon = getenv("CLIBD_MON");
if (clibdMon != nullptr)
{
fs::path db = fs::path(clibdMon) / "list" / "mon_lib_list.cif";
if (fs::exists(db))
pushDictionary(db.string());
}
if (mImpl == nullptr)
{
if (cif::VERBOSE)
std::cerr << "Could not load the mon_lib_list.cif file from CCP4, please make sure you have installed CCP4 and sourced the environment." << std::endl;
mImpl = new CompoundFactoryImpl();
}
}
CompoundFactory::~CompoundFactory()
......@@ -1294,7 +470,7 @@ CompoundFactory::~CompoundFactory()
delete mImpl;
}
CompoundFactory& CompoundFactory::instance()
CompoundFactory &CompoundFactory::instance()
{
if (sUseThreadLocalInstance)
{
......@@ -1321,20 +497,20 @@ void CompoundFactory::clear()
}
}
void CompoundFactory::pushDictionary(const std::string& inDictFile)
void CompoundFactory::pushDictionary(const std::string &inDictFile)
{
if (not fs::exists(inDictFile))
throw std::runtime_error("file not found: " + inDictFile);
// ifstream file(inDictFile);
// if (not file.is_open())
// throw std::runtime_error("Could not open peptide list " + inDictFile);
// ifstream file(inDictFile);
// if (not file.is_open())
// throw std::runtime_error("Could not open peptide list " + inDictFile);
try
{
mImpl = new CompoundFactoryImpl(inDictFile, mImpl);
}
catch (const std::exception& ex)
catch (const std::exception &ex)
{
std::cerr << "Error loading dictionary " << inDictFile << std::endl;
throw;
......@@ -1348,39 +524,32 @@ void CompoundFactory::popDictionary()
}
// id is the three letter code
const Compound* CompoundFactory::get(std::string id)
const Compound *CompoundFactory::get(std::string id)
{
return mImpl->get(id);
}
const Compound* CompoundFactory::create(std::string id)
const Compound *CompoundFactory::create(std::string id)
{
return mImpl->create(id);
}
const Link* CompoundFactory::getLink(std::string id)
{
return mImpl->getLink(id);
}
const Link* CompoundFactory::createLink(std::string id)
{
return mImpl->createLink(id);
}
bool CompoundFactory::isKnownPeptide(const std::string& resName) const
bool CompoundFactory::isKnownPeptide(const std::string &resName) const
{
return mImpl->isKnownPeptide(resName);
}
bool CompoundFactory::isKnownBase(const std::string& resName) const
bool CompoundFactory::isKnownBase(const std::string &resName) const
{
return mImpl->isKnownBase(resName);
}
std::string CompoundFactory::unalias(const std::string& resName) const
// --------------------------------------------------------------------
std::vector<std::string> Compound::addExtraComponents(const std::filesystem::path &components)
{
return mImpl->unalias(resName);
}
}
} // namespace mmcif
......@@ -457,7 +457,6 @@ class PDBFileParser
char iCode;
int numHetAtoms = 0;
std::string text;
bool sugar = false;
std::string asymID;
std::vector<PDBRecord*> atoms;
bool processed = false;
......@@ -467,14 +466,6 @@ class PDBFileParser
HET(const std::string& hetID, char chainID, int seqNum, char iCode, int numHetAtoms = 0, const std::string& text = {})
: hetID(hetID), chainID(chainID), seqNum(seqNum), iCode(iCode), numHetAtoms(numHetAtoms), text(text)
{
// just in case we don't have a CCP4 available
if (hetID == "MAN" or hetID == "BMA" or hetID == "NAG" or hetID == "NDG" or hetID == "FUC" or hetID == "FUL")
sugar = true;
else
{
auto compound = CompoundFactory::instance().create(hetID);
sugar = compound ? compound->isSugar() : false;
}
}
};
......@@ -4374,33 +4365,38 @@ void PDBFileParser::ConstructEntities()
mMod2parent.count(cc) ? mMod2parent[cc] : cc
);
std::string formula = mFormuls[cc];
if (formula.empty() and compound != nullptr)
formula = compound->formula();
else
{
const std::regex rx(R"(\d+\((.+)\))");
std::smatch m;
if (std::regex_match(formula, m, rx))
formula = m[1].str();
}
std::string name = mHetnams[cc];
if (name.empty() and compound != nullptr)
name = compound->name();
std::string type = "other";
std::string name;
std::string formula;
std::string type;
std::string nstd = ".";
std::string formulaWeight;
if (compound != nullptr)
{
name = compound->name();
type = compound->type();
if (type.empty())
type = "NON-POLYMER";
if (iequals(type, "L-peptide linking") or iequals(type, "peptide linking"))
nstd = "y";
formula = compound->formula();
formulaWeight = std::to_string(compound->formulaWeight());
}
if (name.empty())
name = mHetnams[cc];
if (type.empty())
type = "NON-POLYMER";
if (formula.empty())
{
formula = mFormuls[cc];
const std::regex rx(R"(\d+\((.+)\))");
std::smatch m;
if (std::regex_match(formula, m, rx))
formula = m[1].str();
}
if (modResSet.count(cc))
......@@ -4410,6 +4406,7 @@ void PDBFileParser::ConstructEntities()
{ "id", cc },
{ "name", name },
{ "formula", formula },
{ "formula_weight", formulaWeight },
{ "mon_nstd_flag", nstd },
{ "type", type }
});
......
......@@ -566,16 +566,6 @@ int Atom::charge() const
return property<int>("pdbx_formal_charge");
}
std::string Atom::energyType() const
{
std::string result;
if (impl() and impl()->mCompound)
result = impl()->mCompound->getAtomByID(impl()->mAtomID).typeEnergy;
return result;
}
float Atom::uIso() const
{
float result;
......@@ -1013,21 +1003,6 @@ bool Residue::isEntity() const
return a1.size() == a2.size();
}
bool Residue::isSugar() const
{
return compound().isSugar();
}
bool Residue::isPyranose() const
{
return cif::iequals(compound().group(), "pyranose");
}
bool Residue::isFuranose() const
{
return cif::iequals(compound().group(), "furanose");
}
std::string Residue::authID() const
{
std::string result;
......@@ -2101,7 +2076,7 @@ void Structure::insertCompound(const std::string& compoundID, bool isEntity)
{
auto compound = Compound::create(compoundID);
if (compound == nullptr)
throw std::runtime_error("Trying to insert unknown compound " + compoundID + " (not found in CCP4 monomers lib)");
throw std::runtime_error("Trying to insert unknown compound " + compoundID + " (not found in CCD)");
cif::Datablock& db = *mFile.impl().mDb;
......@@ -2113,6 +2088,7 @@ void Structure::insertCompound(const std::string& compoundID, bool isEntity)
{ "id", compoundID },
{ "name", compound->name() },
{ "formula", compound->formula() },
{ "formula_weight", compound->formulaWeight() },
{ "type", compound->type() }
});
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment