First steps, remove CCP4 info from compound

915d6504 · Maarten L. Hekkelman · 5e63ca7a · 915d6504 · 915d6504 · 915d6504
Commit 915d6504 authored Apr 13, 2021 by Maarten L. Hekkelman
Hide whitespace changes
Inline Side-by-side

Showing with 399 additions and 1393 deletions

include/cif++/Cif++.hpp
+29 -7

include/cif++/Compound.hpp
+99 -250

include/cif++/Structure.hpp
+0 -7

src/Compound.cpp
+244 -1075

src/PDB2Cif.cpp
+25 -28

src/Structure.cpp
+2 -26

No files found.
--- a/include/cif++/Cif++.hpp
+++ b/include/cif++/Cif++.hpp
@@ -289,9 +289,10 @@ namespace detail
 		void swap(ItemReference& b);
 		
 		template<typename T = std::string>
-		T as() const
+		auto as() const
 		{
-			return item_value_as<T>::convert(*this);
+			using value_type = std::remove_cv_t<std::remove_reference_t<T>>;
+			return item_value_as<value_type>::convert(*this);
 		}

 		template<typename T>
@@ -337,11 +338,13 @@ namespace detail
 	template<typename T>
 	struct ItemReference::item_value_as<T, std::enable_if_t<std::is_floating_point_v<T>>>
 	{
-		static T convert(const ItemReference& ref)
+		using value_type = std::remove_reference_t<std::remove_cv_t<T>>;
+
+		static value_type convert(const ItemReference& ref)
 		{
-			T result = {};
+			value_type result = {};
 			if (not ref.empty())
-				result = static_cast<T>(std::stod(ref.c_str()));
+				result = static_cast<value_type>(std::stod(ref.c_str()));
 			return result;
 		}

@@ -376,7 +379,7 @@ namespace detail
 	};

 	template<typename T>
-	struct ItemReference::item_value_as<T, std::enable_if_t<std::is_integral_v<T> and std::is_unsigned_v<T>>>
+	struct ItemReference::item_value_as<T, std::enable_if_t<std::is_integral_v<T> and std::is_unsigned_v<T> and not std::is_same_v<T, bool>>>
 	{
 		static T convert(const ItemReference& ref)
 		{
@@ -417,7 +420,7 @@ namespace detail
 	};

 	template<typename T>
-	struct ItemReference::item_value_as<T, std::enable_if_t<std::is_integral_v<T> and std::is_signed_v<T>>>
+	struct ItemReference::item_value_as<T, std::enable_if_t<std::is_integral_v<T> and std::is_signed_v<T> and not std::is_same_v<T, bool>>>
 	{
 		static T convert(const ItemReference& ref)
 		{
@@ -482,6 +485,25 @@ namespace detail
 		}
 	};

+	template<typename T>
+	struct ItemReference::item_value_as<T, std::enable_if_t<std::is_same_v<T, bool>>>
+	{
+		static bool convert(const ItemReference& ref)
+		{
+			bool result = false;
+			if (not ref.empty())
+				result = iequals(ref.c_str(), "y");
+			return result;
+		}
+
+		static int compare(const ItemReference& ref, bool value, bool icase)
+		{
+			bool rv = convert(ref);
+			return value && rv ? 0
+				: (rv < value ? -1 : 1);
+		}
+	};
+
 	template<size_t N>
 	struct ItemReference::item_value_as<char[N]>
 	{

--- a/include/cif++/Compound.hpp
+++ b/include/cif++/Compound.hpp
@@ -26,10 +26,13 @@

 #pragma once

+/// \file This file contains the definition for the class Compound, encapsulating
+/// the information found for compounds in the CCD.
+
+#include <map>
 #include <set>
 #include <tuple>
 #include <vector>
-#include <map>

 #include "cif++/AtomType.hpp"
 #include "cif++/Cif++.hpp"
@@ -38,310 +41,156 @@ namespace mmcif
 {

 // --------------------------------------------------------------------
-// The chemical composition of the structure in an mmCIF file is 
-// defined in the class composition. A compositon consists of
-// entities. Each Entity can be either a polymer, a non-polymer
-// a macrolide or a water molecule.
-// Entities themselves are made up of compounds. And compounds
-// contain CompoundAtom records for each atom.

 class Compound;
-class Link;
 struct CompoundAtom;

-enum BondType { singleBond, doubleBond, tripleBond, delocalizedBond };
-
-// --------------------------------------------------------------------
-// struct containing information about an atom in a chemical compound
-// This information comes from the CCP4 monomer library. 
-
-struct CompoundAtom
-{
-	std::string		id;
-	AtomType		typeSymbol;
-	std::string		typeEnergy;
-	float			partialCharge;
-};
-
-// --------------------------------------------------------------------
-// struct containing information about the bonds
-// This information comes from the CCP4 monomer library. 
-
-struct CompoundBond
-{
-	std::string		atomID[2];
-	BondType		type;
-	float			distance;
-	float			esd;
-};
-
-// --------------------------------------------------------------------
-// struct containing information about the bond-angles
-// This information comes from the CCP4 monomer library. 
-
-struct CompoundAngle
-{
-	std::string		atomID[3];
-	float			angle;
-	float			esd;
+/// \brief The bond type as defined in the CCD, possible values taken from the mmcif_pdbx_v50 file
+enum class BondType
+{
+	sing, // 'single bond'
+	doub, // 'double bond'
+	trip, // 'triple bond'
+	quad, // 'quadruple bond'
+	arom, // 'aromatic bond'
+	poly, // 'polymeric bond'
+	delo, // 'delocalized double bond'
+	pi,   // 'pi bond'
 };

-// --------------------------------------------------------------------
-// struct containing information about the bond-angles
-// This information comes from the CCP4 monomer library. 
+std::string to_string(BondType bondType);
+BondType from_string(const std::string& bondType);

-struct CompoundTorsion
-{
-	std::string		atomID[4];
-	float			angle;
-	float			esd;
-	int				period;
-};
+/// --------------------------------------------------------------------
+/// \brief struct containing information about an atom in a chemical compound.
+/// This is a subset of the available information. Contact the author if you need more fields.

-// --------------------------------------------------------------------
-// struct containing information about the bond-angles
-// This information comes from the CCP4 monomer library. 
-
-struct CompoundPlane
+struct CompoundAtom
 {
-	std::string					id;
-	std::vector<std::string>	atomID;
-	float						esd;
+	std::string id;
+	AtomType typeSymbol;
+	int charge;
+	bool aromatic;
+	bool leavingAtom;
+	bool stereoConfig;
+	float x, y, z;
 };

-// --------------------------------------------------------------------
-// struct containing information about a chiral centre
-// This information comes from the CCP4 monomer library. 
-
-enum ChiralVolumeSign { negativ, positiv, both };
+/// --------------------------------------------------------------------
+/// \brief struct containing information about the bonds

-struct CompoundChiralCentre
+struct CompoundBond
 {
-	std::string			id;
-	std::string			atomIDCentre;
-	std::string			atomID[3];
-	ChiralVolumeSign	volumeSign;
+	std::string atomID[2];
+	BondType type;
+	bool aromatic, stereoConfig;
 };

-// --------------------------------------------------------------------
-// a class that contains information about a chemical compound.
-// This information is derived from the ccp4 monomer library by default.
-// To create compounds, you'd best use the factory method.
+/// --------------------------------------------------------------------
+/// \brief a class that contains information about a chemical compound.
+/// This information is derived from the CDD by default.
+///
+/// To create compounds, you use the factory method. You can add your own
+/// compound definitions by calling the addExtraComponents function and
+/// pass it a valid CCD formatted file.

 class Compound
 {
  public:
-
-	Compound(const std::string& file, const std::string& id, const std::string& name,
-		const std::string& group);
-
-	// factory method, create a Compound based on the three letter code
-	// (for amino acids) or the one-letter code (for bases) or the
-	// code as it is known in the CCP4 monomer library.
-
-	static const Compound* create(const std::string& id);
-
-	// this second factory method can create a Compound even if it is not
-	// recorded in the library. It will take the values from the CCP4 lib
-	// unless the value passed to this function is not empty.
-	static const Compound* create(const std::string& id, const std::string& name,
-		const std::string& type, const std::string& formula);
-	
-	// add an additional path to the monomer library.
-	static void addMonomerLibraryPath(const std::string& dir);
-
-	// accessors
-	std::string id() const							{ return mID; }
-	std::string	name() const						{ return mName; }
-	std::string	type() const;
-	std::string group() const						{ return mGroup; }
-	std::vector<CompoundAtom> atoms() const			{ return mAtoms; }
-	std::vector<CompoundBond> bonds() const			{ return mBonds; }
-	std::vector<CompoundAngle> angles() const		{ return mAngles; }
-	std::vector<CompoundChiralCentre> chiralCentres() const
-													{ return mChiralCentres; }
-	std::vector<CompoundPlane> planes() const		{ return mPlanes; }
-	std::vector<CompoundTorsion> torsions() const	{ return mTorsions; }
-	
-	CompoundAtom getAtomByID(const std::string& atomID) const;
-	
-	bool atomsBonded(const std::string& atomId_1, const std::string& atomId_2) const;
-	float atomBondValue(const std::string& atomId_1, const std::string& atomId_2) const;
-	float bondAngle(const std::string& atomId_1, const std::string& atomId_2, const std::string& atomId_3) const;
-	float chiralVolume(const std::string& centreID) const;
-
-	std::string formula() const;
-	float formulaWeight() const;
-	int charge() const;
-	bool isWater() const;
-	bool isSugar() const;
-
-	std::vector<std::string>	isomers() const;
-	bool isIsomerOf(const Compound& c) const;
-	std::vector<std::tuple<std::string,std::string>> mapToIsomer(const Compound& c) const;
-
-  private:
-
+	Compound(cif::Datablock &db);
 	~Compound();

-	cif::File					mCF;
-
-	std::string					mID;
-	std::string					mName;
-	std::string					mGroup;
-	std::vector<CompoundAtom>	mAtoms;
-	std::vector<CompoundBond>	mBonds;
-	std::vector<CompoundAngle>	mAngles;
-	std::vector<CompoundTorsion>mTorsions;
-	std::vector<CompoundChiralCentre>
-								mChiralCentres;
-	std::vector<CompoundPlane>	mPlanes;
-};
-
-// --------------------------------------------------------------------
-// struct containing information about the bonds
-// This information comes from the CCP4 monomer library. 
-
-struct LinkAtom
-{
-	int					compID;
-	std::string			atomID;
-	
-	bool operator==(const LinkAtom& rhs) const { return compID == rhs.compID and atomID == rhs.atomID; }
-};
-
-struct LinkBond
-{
-	LinkAtom			atom[2];
-	BondType			type;
-	float				distance;
-	float				esd;
-};
-
-// --------------------------------------------------------------------
-// struct containing information about the bond-angles
-// This information comes from the CCP4 monomer library. 
-
-struct LinkAngle
-{
-	LinkAtom			atom[3];
-	float				angle;
-	float				esd;
-};
-
-// --------------------------------------------------------------------
-// struct containing information about the bond-torsions
-// This information comes from the CCP4 monomer library. 
+	/// \brief factory method, create a Compound based on the three letter code
+	/// (for amino acids) or the one-letter code (for bases) or the
+	/// code as it is known in the CCD.

-struct LinkTorsion
-{
-	LinkAtom			atom[4];
-	float				angle;
-	float				esd;
-	int					period;
-};
+	static const Compound *create(const std::string &id);

-// --------------------------------------------------------------------
-// struct containing information about the bond-angles
-// This information comes from the CCP4 monomer library. 
+	// /// this second factory method can create a Compound even if it is not
+	// /// recorded in the library. It will take the values from the CCP4 lib
+	// /// unless the value passed to this function is not empty.
+	// static const Compound* create(const std::string& id, const std::string& name,
+	// 	const std::string& type, const std::string& formula);

-struct LinkPlane
-{
-	std::string					id;
-	std::vector<LinkAtom>		atoms;
-	float						esd;
-};
+	/// \brief Create compounds based on the data in the file \a components
+	///
+	/// It is often required to add information about unknown components.
+	/// This file parses either a CCP4 or a CCD formatted components file
+	///
+	/// \param components	The mmCIF file containing the components
+	/// \result				An array containing the ID's of the added components
+	static std::vector<std::string> addExtraComponents(const std::filesystem::path &components);

-// --------------------------------------------------------------------
-// struct containing information about a chiral centre
-// This information comes from the CCP4 monomer library. 
+	// accessors

-struct LinkChiralCentre
-{
-	std::string			id;
-	LinkAtom			atomCentre;
-	LinkAtom			atom[3];
-	ChiralVolumeSign	volumeSign;
-};
+	std::string id() const { return mID; }
+	std::string name() const { return mName; }
+	std::string type() const { return mType; }
+	std::string formula() const { return mFormula; }
+	float formulaWeight() const { return mFormulaWeight; }
+	int formalCharge() const { return mFormalCharge; }

-// --------------------------------------------------------------------
-// a class that contains information about a chemical link between compounds.
-// This information is derived from the ccp4 monomer library by default.
+	const std::vector<CompoundAtom> &atoms() const { return mAtoms; }
+	const std::vector<CompoundBond> &bonds() const { return mBonds; }

-class Link
-{
-  public:
+	CompoundAtom getAtomByID(const std::string &atomID) const;

-	Link(cif::Datablock& db);
+	bool atomsBonded(const std::string &atomId_1, const std::string &atomId_2) const;
+	// float atomBondValue(const std::string &atomId_1, const std::string &atomId_2) const;
+	// float bondAngle(const std::string &atomId_1, const std::string &atomId_2, const std::string &atomId_3) const;
+	// float chiralVolume(const std::string &centreID) const;

-	// Factory method.
-	static const Link& create(const std::string& id);
+	bool isWater() const
+	{
+		return mID == "HOH" or mID == "H2O" or mID == "WAT";
+	}

-	// accessors
-	std::string id() const								{ return mID; }
-	std::vector<LinkBond> bonds() const					{ return mBonds; }
-	std::vector<LinkAngle> angles() const				{ return mAngles; }
-	std::vector<LinkChiralCentre> chiralCentres() const	{ return mChiralCentres; }
-	std::vector<LinkPlane> planes() const				{ return mPlanes; }
-	std::vector<LinkTorsion> torsions() const			{ return mTorsions; }
-	
-	float atomBondValue(const LinkAtom& atomId_1, const LinkAtom& atomId_2) const;
-	float bondAngle(const LinkAtom& atomId_1, const LinkAtom& atomId_2, const LinkAtom& atomId_3) const;
-	float chiralVolume(const std::string& id) const;
-	
  private:

-	~Link();
-
-	std::string						mID;
-	std::vector<LinkBond>			mBonds;
-	std::vector<LinkAngle>			mAngles;
-	std::vector<LinkTorsion>		mTorsions;
-	std::vector<LinkChiralCentre>	mChiralCentres;
-	std::vector<LinkPlane>			mPlanes;
+	std::string mID;
+	std::string mName;
+	std::string mType;
+	std::string mFormula;
+	float mFormulaWeight;
+	int mFormalCharge;
+	std::vector<CompoundAtom> mAtoms;
+	std::vector<CompoundBond> mBonds;
 };

 // --------------------------------------------------------------------
 // Factory class for Compound and Link objects

-extern const std::map<std::string,char> kAAMap, kBaseMap;
+extern const std::map<std::string, char> kAAMap, kBaseMap;

 class CompoundFactory
 {
  public:
-	
 	static void init(bool useThreadLocalInstanceOnly);
-	static CompoundFactory& instance();
+	static CompoundFactory &instance();
 	static void clear();

-	void pushDictionary(const std::string& inDictFile);
+	void pushDictionary(const std::string &inDictFile);
 	void popDictionary();

-	bool isKnownPeptide(const std::string& res_name) const;
-	bool isKnownBase(const std::string& res_name) const;
+	bool isKnownPeptide(const std::string &res_name) const;
+	bool isKnownBase(const std::string &res_name) const;

-	std::string unalias(const std::string& res_name) const;
-
-	const Compound* get(std::string id);
-	const Compound* create(std::string id);
-	
-	const Link* getLink(std::string id);
-	const Link* createLink(std::string id);
+	const Compound *get(std::string id);
+	const Compound *create(std::string id);

 	~CompoundFactory();

  private:
-
 	CompoundFactory();
-	
-	CompoundFactory(const CompoundFactory&) = delete;
-	CompoundFactory& operator=(const CompoundFactory&) = delete;
-	
-	static CompoundFactory* sInstance;
+
+	CompoundFactory(const CompoundFactory &) = delete;
+	CompoundFactory &operator=(const CompoundFactory &) = delete;
+
+	static CompoundFactory *sInstance;
 	static thread_local std::unique_ptr<CompoundFactory> tlInstance;
 	static bool sUseThreadLocalInstance;

-	class CompoundFactoryImpl* mImpl;
+	class CompoundFactoryImpl *mImpl;
 };

-}
+} // namespace mmcif
--- a/include/cif++/Structure.hpp
+++ b/include/cif++/Structure.hpp
@@ -141,9 +141,6 @@ class Atom
 	
 	// access data in compound for this atom

-	// the energy-type field
-	std::string energyType() const;
-	
 	// convenience routine
 	bool isBackBone() const
 	{
@@ -243,10 +240,6 @@ class Residue
 	
 	bool isWater() const					{ return mCompoundID == "HOH"; }

-	bool isSugar() const;
-	bool isPyranose() const;
-	bool isFuranose() const;
-	
 	const Structure& structure() const		{ return *mStructure; }

 	bool empty() const						{ return mStructure == nullptr; }

--- a/src/Compound.cpp
+++ b/src/Compound.cpp
@@ -29,8 +29,8 @@
 #endif

 #include <map>
-#include <numeric>
 #include <mutex>
+#include <numeric>
 #include <shared_mutex>

 #include <boost/algorithm/string.hpp>
@@ -39,9 +39,9 @@
 #include <fstream>

 #include "cif++/Cif++.hpp"
-#include "cif++/Point.hpp"
-#include "cif++/Compound.hpp"
 #include "cif++/CifUtils.hpp"
+#include "cif++/Compound.hpp"
+#include "cif++/Point.hpp"

 namespace ba = boost::algorithm;
 namespace fs = std::filesystem;
@@ -50,11 +50,41 @@ namespace mmcif
 {

 // --------------------------------------------------------------------
+
+std::string to_string(BondType bondType)
+{
+	switch (bondType)
+	{
+		case BondType::sing:	return "sing";
+		case BondType::doub:	return "doub";
+		case BondType::trip:	return "trip";
+		case BondType::quad:	return "quad";
+		case BondType::arom:	return "arom";
+		case BondType::poly:	return "poly";
+		case BondType::delo:	return "delo";
+		case BondType::pi:		return "pi";
+	}
+}
+
+BondType from_string(const std::string& bondType)
+{
+	if (cif::iequals(bondType, "sing"))	return BondType::sing;
+	if (cif::iequals(bondType, "doub"))	return BondType::doub;
+	if (cif::iequals(bondType, "trip"))	return BondType::trip;
+	if (cif::iequals(bondType, "quad"))	return BondType::quad;
+	if (cif::iequals(bondType, "arom"))	return BondType::arom;
+	if (cif::iequals(bondType, "poly"))	return BondType::poly;
+	if (cif::iequals(bondType, "delo"))	return BondType::delo;
+	if (cif::iequals(bondType, "pi"))	return BondType::pi;
+	throw std::invalid_argument("Invalid bondType: " + bondType);
+}
+
+// --------------------------------------------------------------------
 // Compound helper classes

 struct CompoundAtomLess
 {
-	bool operator()(const CompoundAtom& a, const CompoundAtom& b) const
+	bool operator()(const CompoundAtom &a, const CompoundAtom &b) const
 	{
 		int d = a.id.compare(b.id);
 		if (d == 0)
@@ -65,512 +95,59 @@ struct CompoundAtomLess

 struct CompoundBondLess
 {
-	bool operator()(const CompoundBond& a, const CompoundBond& b) const
+	bool operator()(const CompoundBond &a, const CompoundBond &b) const
 	{
 		int d = a.atomID[0].compare(b.atomID[0]);
 		if (d == 0)
 			d = a.atomID[1].compare(b.atomID[1]);
 		if (d == 0)
-			d = a.type - b.type;
+			d = static_cast<int>(a.type) - static_cast<int>(b.type);
 		return d < 0;
 	}
 };

-// --------------------------------------------------------------------
-// Isomers in the ccp4 dictionary
-
-struct IsomerSet
-{
-	std::vector<std::string>	compounds;
-};
-
-struct IsomerSets
-{
-	std::vector<IsomerSet>	isomers;
-};
-
-class IsomerDB
-{
-  public:
-	
-	static IsomerDB& instance();
-
-	size_t count(const std::string& compound) const;
-	const std::vector<std::string>& operator[](const std::string& compound) const;
-	
-  private:
-	
-	IsomerDB();
-	
-	IsomerSets			mData;
-};
-
-IsomerDB::IsomerDB()
-{
-#if defined(CACHE_DIR)
-	fs::path isomersFile = fs::path(CACHE_DIR) / "isomers.txt";
-
-	if (not fs::exists(isomersFile))
-		std::cerr << "Could not locate isomers.txt in " CACHE_DIR << std::endl;
-	else
-	{
-		std::ifstream is(isomersFile);
-
-		std::string line;
-
-		while (std::getline(is, line))
-		{
-			IsomerSet compounds;
-			ba::split(compounds.compounds, line, ba::is_any_of(":"));
-
-			if (not compounds.compounds.empty())
-				mData.isomers.emplace_back(std::move(compounds));
-		}
-	}
-#endif
-}
-
-IsomerDB& IsomerDB::instance()
-{
-	static IsomerDB sInstance;
-	return sInstance;
-}
-
-size_t IsomerDB::count(const std::string& compound) const
-{
-	size_t n = 0;
-	for (auto& d: mData.isomers)
-	{
-		if (find(d.compounds.begin(), d.compounds.end(), compound) != d.compounds.end())
-			++n;
-	}
-	return n;
-}
-
-const std::vector<std::string>& IsomerDB::operator[](const std::string& compound) const
-{
-	for (auto& d: mData.isomers)
-	{
-		if (find(d.compounds.begin(), d.compounds.end(), compound) != d.compounds.end())
-			return d.compounds;
-	}
-	
-	throw std::runtime_error("No isomer set found containing " + compound);
-}
-
-// --------------------------------------------------------------------
-// Brute force comparison of two structures, when they are isomers the
-// mapping between the atoms of both is returned
-// This is not an optimal solution, but it works good enough for now
-
-struct Node
-{
-	std::string							id;
-	AtomType						symbol;
-	std::vector<std::tuple<size_t,BondType>>	links;
-	size_t							hydrogens = 0;
-};
-
-// Check to see if the nodes a[iA] and b[iB] are the start of a similar sub structure
-bool SubStructuresAreIsomeric(
-	const std::vector<Node>& a, const std::vector<Node>& b, size_t iA, size_t iB,
-	std::vector<bool> visitedA, std::vector<bool> visitedB, std::vector<std::tuple<std::string,std::string>>& outMapping)
-{
-	auto& na = a[iA];
-	auto& nb = b[iB];
-	size_t N = na.links.size();
-	
-	assert(na.symbol == nb.symbol);
-	assert(nb.links.size() == N);
-
-	// we're optimistic today
-	bool result = true;
-	
-	visitedA[iA] = true;
-	visitedB[iB] = true;
-	
-	// we now have two sets of links to compare. 
-	// Compare each permutation of the second set of indices with the first
-	
-	std::vector<size_t> ilb(N);
-	iota(ilb.begin(), ilb.end(), 0);
-	
-	for (;;)
-	{
-		result = true;
-		std::vector<std::tuple<std::string,std::string>> m;
-		
-		for (size_t i = 0; result and i < N; ++i)
-		{
-			size_t lA, lB;
-			BondType typeA, typeB;
-
-			std::tie(lA, typeA) = na.links[i];			assert(lA < a.size());
-			std::tie(lB, typeB) = nb.links[ilb[i]];		assert(lB < b.size());
-			
-			if (typeA != typeB or visitedA[lA] != visitedB[lB])
-			{
-				result = false;
-				break;
-			}
-
-			auto& la = a[lA];
-			auto& lb = b[lB];
-			
-			if (la.symbol != lb.symbol or la.hydrogens != lb.hydrogens or la.links.size() != lb.links.size())
-			{
-				result = false;
-				break;
-			}
-
-			if (not visitedA[lA])
-				result = SubStructuresAreIsomeric(a, b, lA, lB, visitedA, visitedB, m);
-		}
-		
-		if (result)
-		{
-			outMapping.insert(outMapping.end(), m.begin(), m.end());
-			break;
-		}
-
-		if (not next_permutation(ilb.begin(), ilb.end()))
-			break;
-	}
-	
-	if (result and na.id != nb.id)
-		outMapping.emplace_back(na.id, nb.id);
-	
-	return result;
-}
-
-bool StructuresAreIsomeric(std::vector<CompoundAtom> atomsA, const std::vector<CompoundBond>& bondsA,
-	std::vector<CompoundAtom> atomsB, const std::vector<CompoundBond>& bondsB,
-	std::vector<std::tuple<std::string,std::string>>& outMapping)
-{
-	assert(atomsA.size() == atomsB.size());
-	assert(bondsA.size() == bondsB.size());
-
-	std::vector<Node> a, b;
-	std::map<std::string,size_t> ma, mb;
-	
-	for (auto& atomA: atomsA)
-	{
-		ma[atomA.id] = a.size();
-		a.push_back({atomA.id, atomA.typeSymbol});
-	}
-	
-	for (auto& bondA: bondsA)
-	{
-		size_t atom1 = ma.at(bondA.atomID[0]);
-		size_t atom2 = ma.at(bondA.atomID[1]);
-		
-		if (a[atom2].symbol == H)
-			a[atom1].hydrogens += 1;
-		else
-			a[atom1].links.emplace_back(atom2, bondA.type);
-			
-		if (a[atom1].symbol == H)
-			a[atom2].hydrogens += 1;
-		else
-			a[atom2].links.emplace_back(atom1, bondA.type);
-	}
-
-	for (auto& atomB: atomsB)
-	{
-		mb[atomB.id] = b.size();
-		b.push_back({atomB.id, atomB.typeSymbol});
-	}
-
-	for (auto& bondB: bondsB)
-	{
-		size_t atom1 = mb.at(bondB.atomID[0]);
-		size_t atom2 = mb.at(bondB.atomID[1]);
-		
-		if (b[atom2].symbol == H)
-			b[atom1].hydrogens += 1;
-		else
-			b[atom1].links.emplace_back(atom2, bondB.type);
-		
-		if (b[atom1].symbol == H)
-			b[atom2].hydrogens += 1;
-		else
-			b[atom2].links.emplace_back(atom1, bondB.type);
-	}
-
-	size_t N = atomsA.size();
-	size_t ia = 0;
-	
-	bool result = false;
-
-	// try each atom in B to see if it can be traced to be similar to A starting at zero
-	for (size_t ib = 0; ib < N; ++ib)
-	{
-		if (b[ib].symbol != a[ia].symbol or a[ia].hydrogens != b[ib].hydrogens or a[ia].links.size() != b[ib].links.size())
-			continue;
-		
-		std::vector<bool> va(N, false), vb(N, false);
-
-		if (SubStructuresAreIsomeric(a, b, ia, ib, va, vb, outMapping))
-		{
-			result = true;
-			break;
-		}
-	}
-	
-	return result;
-}

 // --------------------------------------------------------------------
 // Compound

-Compound::Compound(const std::string& file, const std::string& id,
-	const std::string& name, const std::string& group)
-	: mID(id), mName(name), mGroup(group)
+Compound::Compound(cif::Datablock &db)
 {
-	try
-	{
-		mCF.load(file);
-		
-		// locate the datablock
-		auto& db = mCF["comp_" + id];
-		
-		auto& compoundAtoms = db["chem_comp_atom"];
-		
-		for (auto row: compoundAtoms)
-		{
-			std::string id, symbol, energy;
-			float charge;
-			
-			cif::tie(id, symbol, energy, charge) = row.get("atom_id", "type_symbol", "type_energy", "partial_charge");
-			
-			mAtoms.push_back({
-				id, AtomTypeTraits(symbol).type(), energy, charge
-			});
-		}
-		sort(mAtoms.begin(), mAtoms.end(), CompoundAtomLess());
-
-		auto& compBonds = db["chem_comp_bond"];
-		
-		for (auto row: compBonds)
-		{
-			CompoundBond b;
-			std::string type, aromatic;
-			
-			cif::tie(b.atomID[0], b.atomID[1], type, b.distance, b.esd) =
-				row.get("atom_id_1", "atom_id_2", "type", "value_dist", "value_dist_esd");
-			
-			using cif::iequals;
-			
-			if (iequals(type, "single") or iequals(type, "sing"))		b.type = singleBond;
-			else if (iequals(type, "double") or iequals(type, "doub"))	b.type = doubleBond;
-			else if (iequals(type, "triple") or iequals(type, "trip"))	b.type = tripleBond;
-			else if (iequals(type, "deloc") or iequals(type, "aromat") or iequals(type, "aromatic"))
-				b.type = delocalizedBond;
-			else
-			{
-				if (cif::VERBOSE)
-					std::cerr << "Unimplemented chem_comp_bond.type " << type << " in " << id << std::endl;
-				b.type = singleBond;
-			}
-			
-			if (b.atomID[0] > b.atomID[1])
-				swap(b.atomID[0], b.atomID[1]);
-			
-			mBonds.push_back(b);
-		}
-		sort(mBonds.begin(), mBonds.end(), CompoundBondLess());
+	auto &chemComp = db["chem_comp"];

-		for (auto row: db["chem_comp_angle"])
-		{
-			CompoundAngle a;
-			
-			cif::tie(a.atomID[0], a.atomID[1], a.atomID[2], a.angle, a.esd) =
-				row.get("atom_id_1", "atom_id_2", "atom_id_3", "value_angle", "value_angle_esd");
-			
-			mAngles.push_back(a);
-		}
+	if (chemComp.size() != 1)
+		throw std::runtime_error("Invalid compound file, chem_comp should contain a single row");

-		for (auto row: db["chem_comp_tor"])
-		{
-			CompoundTorsion a;
-			
-			cif::tie(a.atomID[0], a.atomID[1], a.atomID[2], a.atomID[3], a.angle, a.esd, a.period) =
-				row.get("atom_id_1", "atom_id_2", "atom_id_3", "atom_id_4", "value_angle", "value_angle_esd", "period");
-			
-			mTorsions.push_back(a);
-		}
+	cif::tie(mID, mName, mType, mFormula, mFormulaWeight, mFormalCharge) =
+		chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge");

-		for (auto row: db["chem_comp_chir"])
-		{
-			CompoundChiralCentre cc;
-			std::string volumeSign;
-			
-			cif::tie(cc.id, cc.atomIDCentre, cc.atomID[0],
-				cc.atomID[1], cc.atomID[2], volumeSign) = 
-				row.get("id", "atom_id_centre", "atom_id_1",
-					"atom_id_2", "atom_id_3", "volume_sign");
-			
-			if (volumeSign == "negativ" or volumeSign == "negative")
-				cc.volumeSign = negativ;
-			else if (volumeSign == "positiv" or volumeSign == "positive")
-				cc.volumeSign = positiv;
-			else if (volumeSign == "both")
-				cc.volumeSign = both;
-			else
-			{
-				if (cif::VERBOSE)
-					std::cerr << "Unimplemented chem_comp_chir.volume_sign " << volumeSign << " in " << id << std::endl;
-				continue;
-			}
-			
-			mChiralCentres.push_back(cc);
-		}
-
-		auto& compPlanes = db["chem_comp_plane_atom"];
-		
-		for (auto row: compPlanes)
-		{
-			std::string atom_id, plane_id;
-			float esd;	
-			
-			cif::tie(atom_id, plane_id, esd) = row.get("atom_id", "plane_id", "dist_esd");
-
-			auto i = find_if(mPlanes.begin(), mPlanes.end(), [&](auto& p) { return p.id == plane_id;});
-			if (i == mPlanes.end())
-				mPlanes.emplace_back(CompoundPlane{ plane_id, { atom_id }, esd });
-			else
-				i->atomID.push_back(atom_id);
-		}
-	}
-	catch (const std::exception& ex)
+	auto &chemCompAtom = db["chem_comp_atom"];
+	for (auto row: chemCompAtom)
 	{
-		std::cerr << "Error loading ccp4 file for " << id << " from file " << file << std::endl;
-		throw;
+		CompoundAtom atom;
+		std::string typeSymbol;
+		cif::tie(atom.id, typeSymbol, atom.charge, atom.aromatic, atom.leavingAtom, atom.stereoConfig, atom.x, atom.y, atom.z) =
+			row.get("id", "type_symbol", "charge", "pdbx_aromatic_flag", "pdbx_leaving_atom_flag", "pdbx_stereo_config",
+			"model_Cartn_x", "model_Cartn_y", "model_Cartn_z");
+		atom.typeSymbol = AtomTypeTraits(typeSymbol).type();
+		mAtoms.push_back(std::move(atom));
 	}
-}

-std::string Compound::formula() const
-{
-	std::string result;
-	
-	std::map<std::string,uint32_t> atoms;
-	float chargeSum = 0;
-
-	for (auto r: mAtoms)
-	{
-		atoms[AtomTypeTraits(r.typeSymbol).symbol()] += 1;
-		chargeSum += r.partialCharge;
-	}
-	
-	auto c = atoms.find("C");
-	if (c != atoms.end())
-	{
-		result = "C";
-		
-		if (c->second > 1)
-			result += std::to_string(c->second);
-		
-		atoms.erase(c);
-		
-		auto h = atoms.find("H");
-		if (h != atoms.end())
-		{
-			result += " H";
-			if (h->second > 1)
-				result += std::to_string(h->second);
-			
-			atoms.erase(h);
-		}
-	}
-	
-	for (auto a: atoms)
+	auto &chemCompBond = db["chem_comp_bond"];
+	for (auto row: chemCompBond)
 	{
-		if (not result.empty())
-			result += ' ';
-		
-		result += a.first;
-		if (a.second > 1)
-			result += std::to_string(a.second);	
+		CompoundBond bond;
+		std::string valueOrder;
+		cif::tie(bond.atomID[0], bond.atomID[1], valueOrder, bond.aromatic, bond.stereoConfig)
+			= row.get("atom_id_1", "atom_id_2", "value_order", "pdbx_aromatic_flag", "pdbx_stereo_config");
+		bond.type = from_string(valueOrder);
+		mBonds.push_back(std::move(bond));
 	}
-
-	int charge = lrint(chargeSum);
-	if (charge != 0)
-		result += ' ' + std::to_string(charge);
-
-	return result;
-}
-
-float Compound::formulaWeight() const
-{
-	float result = 0;
-
-	for (auto r: mAtoms)
-		result += AtomTypeTraits(r.typeSymbol).weight();
-
-	return result;
-}
-
-int Compound::charge() const
-{
-	float result = 0;
-
-	for (auto r: mAtoms)
-		result += r.partialCharge;
-
-	return lrint(result);
-}
-
-std::string Compound::type() const
-{
-	std::string result;
-	
-	// known groups are (counted from ccp4 monomer dictionary)
-
-	//	D-pyranose
-	//	DNA
-	//	L-PEPTIDE LINKING
-	//	L-SACCHARIDE
-	//	L-peptide
-	//	L-pyranose
-	//	M-peptide
-	//	NON-POLYMER
-	//	P-peptide
-	//	RNA
-	//	furanose
-	//	non-polymer
-	//	non_polymer
-	//	peptide
-	//	pyranose
-	//	saccharide
-	
-	if (cif::iequals(mID, "gly"))
-		result = "peptide linking";
-	else if (cif::iequals(mGroup, "l-peptide") or cif::iequals(mGroup, "L-peptide linking") or cif::iequals(mGroup, "peptide"))
-		result = "L-peptide linking";
-	else if (cif::iequals(mGroup, "DNA"))
-		result = "DNA linking";
-	else if (cif::iequals(mGroup, "RNA"))
-		result = "RNA linking";
-//	else
-//		result = mGroup;
-	
-	return result;
-}
-
-bool Compound::isWater() const
-{
-	return mID == "HOH" or mID == "H2O";
-}
-
-bool Compound::isSugar() const
-{
-	return cif::iequals(mGroup, "furanose") or cif::iequals(mGroup, "pyranose");
 }

-CompoundAtom Compound::getAtomByID(const std::string& atomID) const
+CompoundAtom Compound::getAtomByID(const std::string &atomID) const
 {
 	CompoundAtom result = {};
-	for (auto& a: mAtoms)
+	for (auto &a : mAtoms)
 	{
 		if (a.id == atomID)
 		{
@@ -579,13 +156,13 @@ CompoundAtom Compound::getAtomByID(const std::string& atomID) const
 		}
 	}

-	if (result.id != atomID)	
+	if (result.id != atomID)
 		throw std::out_of_range("No atom " + atomID + " in Compound " + mID);
-	
+
 	return result;
 }

-const Compound* Compound::create(const std::string& id)
+const Compound *Compound::create(const std::string &id)
 {
 	auto result = CompoundFactory::instance().get(id);
 	if (result == nullptr)
@@ -593,149 +170,44 @@ const Compound* Compound::create(const std::string& id)
 	return result;
 }

-bool Compound::atomsBonded(const std::string& atomId_1, const std::string& atomId_2) const
+bool Compound::atomsBonded(const std::string &atomId_1, const std::string &atomId_2) const
 {
 	auto i = find_if(mBonds.begin(), mBonds.end(),
-		[&](const CompoundBond& b)
-		{
-			return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2)
-				or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
+		[&](const CompoundBond &b) {
+			return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2) or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
 		});
-	
+
 	return i != mBonds.end();
 }

-float Compound::atomBondValue(const std::string& atomId_1, const std::string& atomId_2) const
-{
-	auto i = find_if(mBonds.begin(), mBonds.end(),
-		[&](const CompoundBond& b)
-		{
-			return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2)
-				or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
-		});
-	
-	return i != mBonds.end() ? i->distance : 0;
-}
+// float Compound::atomBondValue(const std::string &atomId_1, const std::string &atomId_2) const
+// {
+// 	auto i = find_if(mBonds.begin(), mBonds.end(),
+// 		[&](const CompoundBond &b) {
+// 			return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2) or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
+// 		});

-bool Compound::isIsomerOf(const Compound& c) const
-{
-	bool result = false;
-	
-	for (;;)
-	{
-		// easy tests first
-		if (mID == c.mID)
-		{
-			result = true;
-			break;
-		}
-		
-		if (mAtoms.size() != c.mAtoms.size())
-			break;
-		
-		if (mBonds.size() != c.mBonds.size())
-			break;
-		
-		if (mChiralCentres.size() != c.mChiralCentres.size())
-			break;
+// 	return i != mBonds.end() ? i->distance : 0;
+// }

-		// same number of atoms of each type?
-		std::map<AtomType,int> aTypeCount, bTypeCount;
-		
-		bool sameAtomNames = true;
-		for (size_t i = 0; i < mAtoms.size(); ++i)
-		{
-			auto& a = mAtoms[i];
-			auto& b = c.mAtoms[i];
-			
-			aTypeCount[a.typeSymbol] += 1;
-			bTypeCount[b.typeSymbol] += 1;
-			
-			if (a.id != b.id or a.typeSymbol != b.typeSymbol)
-				sameAtomNames = false;
-		}
-		
-		if (not sameAtomNames and aTypeCount != bTypeCount)
-			break;
-		
-		bool sameBonds = sameAtomNames;
-		for (size_t i = 0; sameBonds and i < mBonds.size(); ++i)
-		{
-			sameBonds =
-				mBonds[i].atomID[0] == c.mBonds[i].atomID[0] and
-				mBonds[i].atomID[1] == c.mBonds[i].atomID[1] and
-				mBonds[i].type == c.mBonds[i].type;
-		}
-		
-		if (sameBonds)
-		{
-			result = true;
-			break;
-		}
-		
-		// implement rest of tests
-		
-		std::vector<std::tuple<std::string,std::string>> mapping;
-		result = StructuresAreIsomeric(mAtoms, mBonds, c.mAtoms, c.mBonds, mapping);
-		
-		if (cif::VERBOSE and result)
-		{
-			for (auto& m: mapping)
-				std::cerr << "  " << std::get<0>(m) << " => " << std::get<1>(m) << std::endl;
-		}
-		
-		break;	
-	}
-	
-	return result;
-}

-std::vector<std::tuple<std::string,std::string>> Compound::mapToIsomer(const Compound& c) const
-{
-	std::vector<std::tuple<std::string,std::string>> result;
-	
-	bool check = StructuresAreIsomeric(mAtoms, mBonds, c.mAtoms, c.mBonds, result);
-	if (not check)
-		throw std::runtime_error("Compounds " + id() + " and " + c.id() + " are not isomers in call to mapToIsomer");
-	
-	return result;
-}
+// float Compound::bondAngle(const std::string &atomId_1, const std::string &atomId_2, const std::string &atomId_3) const
+// {
+// 	float result = nanf("1");

-std::vector<std::string> Compound::isomers() const
-{
-	std::vector<std::string> result;
-	
-	auto& db = IsomerDB::instance();
-	if (db.count(mID))
-	{
-		result = db[mID];
-		
-		auto i = find(result.begin(), result.end(), mID);
-		assert(i != result.end());
-		
-		result.erase(i);
-	}
-	
-	return result;
-}
+// 	for (auto &a : mAngles)
+// 	{
+// 		if (not(a.atomID[1] == atomId_2 and
+// 				((a.atomID[0] == atomId_1 and a.atomID[2] == atomId_3) or
+// 					(a.atomID[2] == atomId_1 and a.atomID[0] == atomId_3))))
+// 			continue;

-float Compound::bondAngle(const std::string& atomId_1, const std::string& atomId_2, const std::string& atomId_3) const
-{
-	float result = nanf("1");
-	
-	for (auto& a: mAngles)
-	{
-		if (not (a.atomID[1] == atomId_2 and
-				 ((a.atomID[0] == atomId_1 and a.atomID[2] == atomId_3) or
-				  (a.atomID[2] == atomId_1 and a.atomID[0] == atomId_3))))
-			continue;
-		
-		result = a.angle;
-		break;
-	}
-	
-	return result;
-}
+// 		result = a.angle;
+// 		break;
+// 	}
+
+// 	return result;
+// }

 //static float calcC(float a, float b, float alpha)
 //{
@@ -743,288 +215,85 @@ float Compound::bondAngle(const std::string& atomId_1, const std::string& atomId
 //	float d = sqrt(b * b - f * f);
 //	float e = a - d;
 //	float c = sqrt(f * f + e * e);
-//	
+//
 //	return c;
 //}

-float Compound::chiralVolume(const std::string& centreID) const
-{
-	float result = 0;
-	
-	for (auto& cv: mChiralCentres)
-	{
-		if (cv.id != centreID)
-			continue;
-		
-		// calculate the expected chiral volume
-
-		// the edges
-		
-		float a = atomBondValue(cv.atomIDCentre, cv.atomID[0]);
-		float b = atomBondValue(cv.atomIDCentre, cv.atomID[1]);
-		float c = atomBondValue(cv.atomIDCentre, cv.atomID[2]);
-		
-		// the angles for the top of the tetrahedron
-		
-		float alpha = bondAngle(cv.atomID[0], cv.atomIDCentre, cv.atomID[1]);
-		float beta  = bondAngle(cv.atomID[1], cv.atomIDCentre, cv.atomID[2]);
-		float gamma = bondAngle(cv.atomID[2], cv.atomIDCentre, cv.atomID[0]);
-		
-		float cosa = cos(alpha * kPI / 180);
-		float cosb = cos(beta * kPI / 180);
-		float cosc = cos(gamma * kPI / 180);
-		
-		result = (a * b * c * sqrt(1 + 2 * cosa * cosb * cosc - (cosa * cosa) - (cosb * cosb) - (cosc * cosc))) / 6;
-		
-		if (cv.volumeSign == negativ)
-			result = -result;
-		
-		break;
-	}
-	
-	return result;
-}
+// float Compound::chiralVolume(const std::string &centreID) const
+// {
+// 	float result = 0;

-// --------------------------------------------------------------------
+// 	for (auto &cv : mChiralCentres)
+// 	{
+// 		if (cv.id != centreID)
+// 			continue;

-Link::Link(cif::Datablock& db)
-{
-	mID = db.getName();
-	
-	auto& linkBonds = db["chem_link_bond"];
-	
-	for (auto row: linkBonds)
-	{
-		LinkBond b;
-		std::string type, aromatic;
-		
-		cif::tie(b.atom[0].compID, b.atom[0].atomID,
-				b.atom[1].compID, b.atom[1].atomID, type, b.distance, b.esd) =
-			row.get("atom_1_comp_id", "atom_id_1", "atom_2_comp_id", "atom_id_2",
-				"type", "value_dist", "value_dist_esd");
-		
-		using cif::iequals;
-		
-		if (iequals(type, "single") or iequals(type, "sing"))		b.type = singleBond;
-		else if (iequals(type, "double") or iequals(type, "doub"))	b.type = doubleBond;
-		else if (iequals(type, "triple") or iequals(type, "trip"))	b.type = tripleBond;
-		else if (iequals(type, "deloc") or iequals(type, "aromat") or iequals(type, "aromatic"))
-											b.type = delocalizedBond;
-		else
-		{
-			if (cif::VERBOSE)
-				std::cerr << "Unimplemented chem_link_bond.type " << type << " in " << mID << std::endl;
-			b.type = singleBond;
-		}
-		
-//		if (b.atom[0] > b.atom[1])
-//			swap(b.atom[0], b.atom[1]);
-		
-		mBonds.push_back(b);
-	}
-//	sort(mBonds.begin(), mBonds.end(), LinkBondLess());
+// 		// calculate the expected chiral volume

-	auto& linkAngles = db["chem_link_angle"];
-	for (auto row: linkAngles)
-	{
-		LinkAngle a;
-		
-		cif::tie(a.atom[0].compID, a.atom[0].atomID, a.atom[1].compID, a.atom[1].atomID,
-				a.atom[2].compID, a.atom[2].atomID, a.angle, a.esd) =
-			row.get("atom_1_comp_id", "atom_id_1", "atom_2_comp_id", "atom_id_2",
-				"atom_3_comp_id", "atom_id_3", "value_angle", "value_angle_esd");
-		
-		mAngles.push_back(a);
-	}
+// 		// the edges

-	for (auto row: db["chem_link_tor"])
-	{
-		LinkTorsion a;
-		
-		cif::tie(a.atom[0].compID, a.atom[0].atomID, a.atom[1].compID, a.atom[1].atomID,
-				a.atom[2].compID, a.atom[2].atomID, a.atom[3].compID, a.atom[3].atomID,
-				a.angle, a.esd, a.period) =
-			row.get("atom_1_comp_id", "atom_id_1", "atom_2_comp_id", "atom_id_2",
-				"atom_3_comp_id", "atom_id_3", "atom_4_comp_id", "atom_id_4",
-				"value_angle", "value_angle_esd", "period");
-		
-		mTorsions.push_back(a);
-	}
+// 		float a = atomBondValue(cv.atomIDCentre, cv.atomID[0]);
+// 		float b = atomBondValue(cv.atomIDCentre, cv.atomID[1]);
+// 		float c = atomBondValue(cv.atomIDCentre, cv.atomID[2]);

-	auto& linkChir = db["chem_link_chir"];
-	for (auto row: linkChir)
-	{
-		LinkChiralCentre cc;
-		std::string volumeSign;
-		
-		cif::tie(cc.id, cc.atomCentre.compID, cc.atomCentre.atomID,
-				cc.atom[0].compID, cc.atom[0].atomID,
-				cc.atom[1].compID, cc.atom[1].atomID,
-				cc.atom[2].compID, cc.atom[2].atomID,
-				volumeSign) = 
-			row.get("id", "atom_centre_comp_id", "atom_id_centre",
-				"atom_1_comp_id", "atom_id_1", "atom_2_comp_id", "atom_id_2",
-				"atom_3_comp_id", "atom_id_3", "volume_sign");
-		
-		if (volumeSign == "negativ" or volumeSign == "negative")
-			cc.volumeSign = negativ;
-		else if (volumeSign == "positiv" or volumeSign == "positive")
-			cc.volumeSign = positiv;
-		else if (volumeSign == "both")
-			cc.volumeSign = both;
-		else
-		{
-			if (cif::VERBOSE)
-				std::cerr << "Unimplemented chem_link_chir.volume_sign " << volumeSign << " in " << mID << std::endl;
-			continue;
-		}
-		
-		mChiralCentres.push_back(cc);
-	}
+// 		// the angles for the top of the tetrahedron

-	auto& linkPlanes = db["chem_link_plane"];
-	for (auto row: linkPlanes)
-	{
-		int compID;
-		std::string atomID, planeID;
-		float esd;	
-		
-		cif::tie(planeID, compID, atomID, esd) = row.get("plane_id", "atom_comp_id", "atom_id", "dist_esd");
-
-		auto i = find_if(mPlanes.begin(), mPlanes.end(), [&](auto& p) { return p.id == planeID;});
-		if (i == mPlanes.end())
-		{
-			std::vector<LinkAtom> atoms{LinkAtom{ compID, atomID }};
-			mPlanes.emplace_back(LinkPlane{ planeID, move(atoms), esd });
-		}
-		else
-			i->atoms.push_back({ compID, atomID });
-	}
-}
+// 		float alpha = bondAngle(cv.atomID[0], cv.atomIDCentre, cv.atomID[1]);
+// 		float beta = bondAngle(cv.atomID[1], cv.atomIDCentre, cv.atomID[2]);
+// 		float gamma = bondAngle(cv.atomID[2], cv.atomIDCentre, cv.atomID[0]);

-const Link& Link::create(const std::string& id)
-{
-	auto result = CompoundFactory::instance().getLink(id);
-	if (result == nullptr)
-		result = CompoundFactory::instance().createLink(id);
-	
-	if (result == nullptr)
-		throw std::runtime_error("Link with id " + id + " not found");
-	
-	return *result;
-}
+// 		float cosa = cos(alpha * kPI / 180);
+// 		float cosb = cos(beta * kPI / 180);
+// 		float cosc = cos(gamma * kPI / 180);

-Link::~Link()
-{
-}
+// 		result = (a * b * c * sqrt(1 + 2 * cosa * cosb * cosc - (cosa * cosa) - (cosb * cosb) - (cosc * cosc))) / 6;

-float Link::atomBondValue(const LinkAtom& atom1, const LinkAtom& atom2) const
-{
-	auto i = find_if(mBonds.begin(), mBonds.end(),
-		[&](auto& b)
-		{
-			return (b.atom[0] == atom1 and b.atom[1] == atom2)
-				or (b.atom[0] == atom2 and b.atom[1] == atom1);
-		});
-	
-	return i != mBonds.end() ? i->distance : 0;
-}
+// 		if (cv.volumeSign == negativ)
+// 			result = -result;

-float Link::bondAngle(const LinkAtom& atom1, const LinkAtom& atom2, const LinkAtom& atom3) const
-{
-	float result = nanf("1");
-	
-	for (auto& a: mAngles)
-	{
-		if (not (a.atom[1] == atom2 and
-				 ((a.atom[0] == atom1 and a.atom[2] == atom3) or
-				  (a.atom[2] == atom1 and a.atom[0] == atom3))))
-			continue;
-		
-		result = a.angle;
-		break;
-	}
-	
-	return result;
-}
+// 		break;
+// 	}

-float Link::chiralVolume(const std::string& centreID) const
-{
-	float result = 0;
-	
-	for (auto& cv: mChiralCentres)
-	{
-		if (cv.id != centreID)
-			continue;
-		
-		// calculate the expected chiral volume
-
-		// the edges
-		
-		float a = atomBondValue(cv.atomCentre, cv.atom[0]);
-		float b = atomBondValue(cv.atomCentre, cv.atom[1]);
-		float c = atomBondValue(cv.atomCentre, cv.atom[2]);
-		
-		// the angles for the top of the tetrahedron
-		
-		float alpha = bondAngle(cv.atom[0], cv.atomCentre, cv.atom[1]);
-		float beta  = bondAngle(cv.atom[1], cv.atomCentre, cv.atom[2]);
-		float gamma = bondAngle(cv.atom[2], cv.atomCentre, cv.atom[0]);
-		
-		float cosa = cos(alpha * kPI / 180);
-		float cosb = cos(beta * kPI / 180);
-		float cosc = cos(gamma * kPI / 180);
-		
-		result = (a * b * c * sqrt(1 + 2 * cosa * cosb * cosc - (cosa * cosa) - (cosb * cosb) - (cosc * cosc))) / 6;
-		
-		if (cv.volumeSign == negativ)
-			result = -result;
-		
-		break;
-	}
-	
-	return result;
-}
+// 	return result;
+// }

 // --------------------------------------------------------------------
 // a factory class to generate compounds

-const std::map<std::string,char> kAAMap{
-	{ "ALA", 'A' },
-	{ "ARG", 'R' },
-	{ "ASN", 'N' },
-	{ "ASP", 'D' },
-	{ "CYS", 'C' },
-	{ "GLN", 'Q' },
-	{ "GLU", 'E' },
-	{ "GLY", 'G' },
-	{ "HIS", 'H' },
-	{ "ILE", 'I' },
-	{ "LEU", 'L' },
-	{ "LYS", 'K' },
-	{ "MET", 'M' },
-	{ "PHE", 'F' },
-	{ "PRO", 'P' },
-	{ "SER", 'S' },
-	{ "THR", 'T' },
-	{ "TRP", 'W' },
-	{ "TYR", 'Y' },
-	{ "VAL", 'V' },
-	{ "GLX", 'Z' },
-	{ "ASX", 'B' }
-};
-
-const std::map<std::string,char> kBaseMap{
-	{ "A", 'A' },
-	{ "C", 'C' },
-	{ "G", 'G' },
-	{ "T", 'T' },
-	{ "U", 'U' },
-	{ "DA", 'A' },
-	{ "DC", 'C' },
-	{ "DG", 'G' },
-	{ "DT", 'T' }
-};
+const std::map<std::string, char> kAAMap{
+	{"ALA", 'A'},
+	{"ARG", 'R'},
+	{"ASN", 'N'},
+	{"ASP", 'D'},
+	{"CYS", 'C'},
+	{"GLN", 'Q'},
+	{"GLU", 'E'},
+	{"GLY", 'G'},
+	{"HIS", 'H'},
+	{"ILE", 'I'},
+	{"LEU", 'L'},
+	{"LYS", 'K'},
+	{"MET", 'M'},
+	{"PHE", 'F'},
+	{"PRO", 'P'},
+	{"SER", 'S'},
+	{"THR", 'T'},
+	{"TRP", 'W'},
+	{"TYR", 'Y'},
+	{"VAL", 'V'},
+	{"GLX", 'Z'},
+	{"ASX", 'B'}};
+
+const std::map<std::string, char> kBaseMap{
+	{"A", 'A'},
+	{"C", 'C'},
+	{"G", 'G'},
+	{"T", 'T'},
+	{"U", 'U'},
+	{"DA", 'A'},
+	{"DC", 'C'},
+	{"DG", 'G'},
+	{"DT", 'T'}};

 // --------------------------------------------------------------------

@@ -1033,20 +302,17 @@ class CompoundFactoryImpl
  public:
 	CompoundFactoryImpl();

-	CompoundFactoryImpl(const std::string& file, CompoundFactoryImpl* next);
+	CompoundFactoryImpl(const std::string &file, CompoundFactoryImpl *next);

 	~CompoundFactoryImpl()
 	{
 		delete mNext;
 	}

-	Compound* get(std::string id);
-	Compound* create(std::string id);
+	Compound *get(std::string id);
+	Compound *create(std::string id);

-	const Link* getLink(std::string id);
-	const Link* createLink(std::string id);
-	
-	CompoundFactoryImpl* pop()
+	CompoundFactoryImpl *pop()
 	{
 		auto result = mNext;
 		mNext = nullptr;
@@ -1054,93 +320,65 @@ class CompoundFactoryImpl
 		return result;
 	}

-	std::string unalias(const std::string& resName) const
-	{
-		std::string result = resName;
-		
-		auto& e = const_cast<cif::File&>(mFile)["comp_synonym_list"];
-		
-		for (auto& synonym: e["chem_comp_synonyms"])
-		{
-			if (ba::iequals(synonym["comp_alternative_id"].as<std::string>(), resName) == false)
-				continue;
-			
-			result = synonym["comp_id"].as<std::string>();
-			ba::trim(result);
-			break;
-		}
-
-		if (result.empty() and mNext)
-			result = mNext->unalias(resName);
-		
-		return result;
-	}
-
-	bool isKnownPeptide(const std::string& resName)
+	bool isKnownPeptide(const std::string &resName)
 	{
 		return mKnownPeptides.count(resName) or
-			(mNext != nullptr and mNext->isKnownPeptide(resName));
+			   (mNext != nullptr and mNext->isKnownPeptide(resName));
 	}

-	bool isKnownBase(const std::string& resName)
+	bool isKnownBase(const std::string &resName)
 	{
 		return mKnownBases.count(resName) or
-			(mNext != nullptr and mNext->isKnownBase(resName));
+			   (mNext != nullptr and mNext->isKnownBase(resName));
 	}

  private:
-	std::shared_timed_mutex		mMutex;
-
-	std::string					mPath;
-	std::vector<Compound*>		mCompounds;
-	std::vector<const Link*>	mLinks;
-	std::set<std::string>		mKnownPeptides;
-	std::set<std::string>		mKnownBases;
-	std::set<std::string>		mMissing;
-	cif::File					mFile;
-	CompoundFactoryImpl*		mNext = nullptr;
+	std::shared_timed_mutex mMutex;
+
+	std::string mPath;
+	std::vector<Compound *> mCompounds;
+	std::set<std::string> mKnownPeptides;
+	std::set<std::string> mKnownBases;
+	std::set<std::string> mMissing;
+	CompoundFactoryImpl *mNext = nullptr;
 };

 // --------------------------------------------------------------------

 CompoundFactoryImpl::CompoundFactoryImpl()
 {
-	for (const auto&[ key, value]: kAAMap)
+	for (const auto &[key, value] : kAAMap)
 		mKnownPeptides.insert(key);

-	for (const auto&[ key, value]: kBaseMap)
+	for (const auto &[key, value] : kBaseMap)
 		mKnownBases.insert(key);
 }

-CompoundFactoryImpl::CompoundFactoryImpl(const std::string& file, CompoundFactoryImpl* next)
-	: mPath(file), mFile(file), mNext(next)
+CompoundFactoryImpl::CompoundFactoryImpl(const std::string &file, CompoundFactoryImpl *next)
+	: mPath(file)
+	, mNext(next)
 {
-	const std::regex peptideRx("(?:[lmp]-)?peptide", std::regex::icase);
+	cif::File cifFile(file);
+	if (not cifFile.isValid())
+		throw std::runtime_error("Invalid compound file");

-	auto& cat = mFile.firstDatablock()["chem_comp"];
-
-	for (auto& chemComp: cat)
+	for (auto &db: cifFile)
 	{
-		std::string group, threeLetterCode;
-		
-		cif::tie(group, threeLetterCode) = chemComp.get("group", "three_letter_code");
-
-		if (std::regex_match(group, peptideRx))
-			mKnownPeptides.insert(threeLetterCode);
-		else if (ba::iequals(group, "DNA") or ba::iequals(group, "RNA"))
-			mKnownBases.insert(threeLetterCode);
+		auto compound = std::make_unique<Compound>(db);
+
+		mCompounds.push_back(compound.release());
 	}
 }

-Compound* CompoundFactoryImpl::get(std::string id)
+Compound *CompoundFactoryImpl::get(std::string id)
 {
 	std::shared_lock lock(mMutex);

 	ba::to_upper(id);

-	Compound* result = nullptr;
-	
-	for (auto cmp: mCompounds)
+	Compound *result = nullptr;
+
+	for (auto cmp : mCompounds)
 	{
 		if (cmp->id() == id)
 		{
@@ -1148,119 +386,72 @@ Compound* CompoundFactoryImpl::get(std::string id)
 			break;
 		}
 	}
-	
+
 	if (result == nullptr and mNext != nullptr)
 		result = mNext->get(id);
-	
+
 	return result;
 }

-Compound* CompoundFactoryImpl::create(std::string id)
+Compound *CompoundFactoryImpl::create(std::string id)
 {
 	ba::to_upper(id);

-	Compound* result = get(id);
-	if (result == nullptr and mMissing.count(id) == 0 and not mFile.empty())
-	{
-		std::unique_lock lock(mMutex);
+	Compound *result = get(id);
+	// if (result == nullptr and mMissing.count(id) == 0 and not mFile.empty())
+	// {
+	// 	std::unique_lock lock(mMutex);

-		auto& cat = mFile.firstDatablock()["chem_comp"];
+	// 	auto &cat = mFile.firstDatablock()["chem_comp"];

-		auto rs = cat.find(cif::Key("three_letter_code") == id);
-	
-		if (not rs.empty())
-		{
-			auto row = rs.front();
-
-			std::string name, group;
-			uint32_t numberAtomsAll, numberAtomsNh;
-			cif::tie(name, group, numberAtomsAll, numberAtomsNh) =
-				row.get("name", "group", "number_atoms_all", "number_atoms_nh");
-			
-			ba::trim(name);
-			ba::trim(group);
-
-			if (mFile.get("comp_" + id) == nullptr)
-			{
-				auto clibd_mon = fs::path(getenv("CLIBD_MON"));
-				
-				fs::path resFile = clibd_mon / ba::to_lower_copy(id.substr(0, 1)) / (id + ".cif");
-			
-				if (not fs::exists(resFile) and	(id == "COM" or id == "CON" or "PRN")) 		// seriously...
-					resFile = clibd_mon / ba::to_lower_copy(id.substr(0, 1)) / (id + '_' + id + ".cif");
-
-				if (not fs::exists(resFile))
-					mMissing.insert(id);
-				else
-				{
-					mCompounds.push_back(new Compound(resFile.string(), id, name, group));
-					result = mCompounds.back();
-				}
-			}				
-			else
-			{
-				mCompounds.push_back(new Compound(mPath, id, name, group));
-				result = mCompounds.back();
-			}
-		}
-		
-		if (result == nullptr and mNext != nullptr)
-			result = mNext->create(id);
-	}
-	
-	return result;
-}
+	// 	auto rs = cat.find(cif::Key("three_letter_code") == id);

-const Link* CompoundFactoryImpl::getLink(std::string id)
-{
-	std::shared_lock lock(mMutex);
+	// 	if (not rs.empty())
+	// 	{
+	// 		auto row = rs.front();

-	ba::to_upper(id);
+	// 		std::string name, group;
+	// 		uint32_t numberAtomsAll, numberAtomsNh;
+	// 		cif::tie(name, group, numberAtomsAll, numberAtomsNh) =
+	// 			row.get("name", "group", "number_atoms_all", "number_atoms_nh");

-	const Link* result = nullptr;
-	
-	for (auto link: mLinks)
-	{
-		if (link->id() == id)
-		{
-			result = link;
-			break;
-		}
-	}
-	
-	if (result == nullptr and mNext != nullptr)
-		result = mNext->getLink(id);
-	
-	return result;
-}
+	// 		ba::trim(name);
+	// 		ba::trim(group);

-const Link* CompoundFactoryImpl::createLink(std::string id)
-{
-	ba::to_upper(id);
+	// 		if (mFile.get("comp_" + id) == nullptr)
+	// 		{
+	// 			auto clibd_mon = fs::path(getenv("CLIBD_MON"));

-	const Link* result = getLink(id);
+	// 			fs::path resFile = clibd_mon / ba::to_lower_copy(id.substr(0, 1)) / (id + ".cif");

-	if (result == nullptr)
-	{
-		std::unique_lock lock(mMutex);
+	// 			if (not fs::exists(resFile) and (id == "COM" or id == "CON" or "PRN")) // seriously...
+	// 				resFile = clibd_mon / ba::to_lower_copy(id.substr(0, 1)) / (id + '_' + id + ".cif");
+
+	// 			if (not fs::exists(resFile))
+	// 				mMissing.insert(id);
+	// 			else
+	// 			{
+	// 				mCompounds.push_back(new Compound(resFile.string(), id, name, group));
+	// 				result = mCompounds.back();
+	// 			}
+	// 		}
+	// 		else
+	// 		{
+	// 			mCompounds.push_back(new Compound(mPath, id, name, group));
+	// 			result = mCompounds.back();
+	// 		}
+	// 	}
+
+	// 	if (result == nullptr and mNext != nullptr)
+	// 		result = mNext->create(id);
+	// }

-		auto db = mFile.get("link_" + id);
-		if (db != nullptr)
-		{
-			result = new Link(*db);
-			mLinks.push_back(result);
-		}
-		
-		if (result == nullptr and mNext != nullptr)
-			result = mNext->createLink(id);
-	}
-	
 	return result;
 }

 // --------------------------------------------------------------------

-CompoundFactory* CompoundFactory::sInstance;
+CompoundFactory *CompoundFactory::sInstance;
 thread_local std::unique_ptr<CompoundFactory> CompoundFactory::tlInstance;
 bool CompoundFactory::sUseThreadLocalInstance;

@@ -1272,21 +463,6 @@ void CompoundFactory::init(bool useThreadLocalInstanceOnly)
 CompoundFactory::CompoundFactory()
 	: mImpl(nullptr)
 {
-	const char* clibdMon = getenv("CLIBD_MON");
-	if (clibdMon != nullptr)
-	{
-		fs::path db = fs::path(clibdMon) / "list" / "mon_lib_list.cif";
-		if (fs::exists(db))
-			pushDictionary(db.string());
-	}
-
-	if (mImpl == nullptr)
-	{
-		if (cif::VERBOSE)
-			std::cerr << "Could not load the mon_lib_list.cif file from CCP4, please make sure you have installed CCP4 and sourced the environment." << std::endl;
-
-		mImpl = new CompoundFactoryImpl();
-	}
 }

 CompoundFactory::~CompoundFactory()
@@ -1294,7 +470,7 @@ CompoundFactory::~CompoundFactory()
 	delete mImpl;
 }

-CompoundFactory& CompoundFactory::instance()
+CompoundFactory &CompoundFactory::instance()
 {
 	if (sUseThreadLocalInstance)
 	{
@@ -1321,20 +497,20 @@ void CompoundFactory::clear()
 	}
 }

-void CompoundFactory::pushDictionary(const std::string& inDictFile)
+void CompoundFactory::pushDictionary(const std::string &inDictFile)
 {
 	if (not fs::exists(inDictFile))
 		throw std::runtime_error("file not found: " + inDictFile);

-//	ifstream file(inDictFile);
-//	if (not file.is_open())
-//		throw std::runtime_error("Could not open peptide list " + inDictFile);
+	//	ifstream file(inDictFile);
+	//	if (not file.is_open())
+	//		throw std::runtime_error("Could not open peptide list " + inDictFile);

 	try
 	{
 		mImpl = new CompoundFactoryImpl(inDictFile, mImpl);
 	}
-	catch (const std::exception& ex)
+	catch (const std::exception &ex)
 	{
 		std::cerr << "Error loading dictionary " << inDictFile << std::endl;
 		throw;
@@ -1348,39 +524,32 @@ void CompoundFactory::popDictionary()
 }

 // id is the three letter code
-const Compound* CompoundFactory::get(std::string id)
+const Compound *CompoundFactory::get(std::string id)
 {
 	return mImpl->get(id);
 }

-const Compound* CompoundFactory::create(std::string id)
+const Compound *CompoundFactory::create(std::string id)
 {
 	return mImpl->create(id);
 }

-const Link* CompoundFactory::getLink(std::string id)
-{
-	return mImpl->getLink(id);
-}
-
-const Link* CompoundFactory::createLink(std::string id)
-{
-	return mImpl->createLink(id);
-}
-
-bool CompoundFactory::isKnownPeptide(const std::string& resName) const
+bool CompoundFactory::isKnownPeptide(const std::string &resName) const
 {
 	return mImpl->isKnownPeptide(resName);
 }

-bool CompoundFactory::isKnownBase(const std::string& resName) const
+bool CompoundFactory::isKnownBase(const std::string &resName) const
 {
 	return mImpl->isKnownBase(resName);
 }

-std::string CompoundFactory::unalias(const std::string& resName) const
+// --------------------------------------------------------------------
+
+std::vector<std::string> Compound::addExtraComponents(const std::filesystem::path &components)
 {
-	return mImpl->unalias(resName);
- }

 }
+
+
+} // namespace mmcif
--- a/src/PDB2Cif.cpp
+++ b/src/PDB2Cif.cpp
@@ -457,7 +457,6 @@ class PDBFileParser
 		char					iCode;
 		int						numHetAtoms = 0;
 		std::string				text;
-		bool					sugar = false;
 		std::string				asymID;
 		std::vector<PDBRecord*>	atoms;
 		bool					processed = false;
@@ -467,14 +466,6 @@ class PDBFileParser
 		HET(const std::string& hetID, char chainID, int seqNum, char iCode, int numHetAtoms = 0, const std::string& text = {})
 			: hetID(hetID), chainID(chainID), seqNum(seqNum), iCode(iCode), numHetAtoms(numHetAtoms), text(text)
 		{
-			// just in case we don't have a CCP4 available
-			if (hetID == "MAN" or hetID == "BMA" or hetID == "NAG" or hetID == "NDG" or hetID == "FUC" or hetID == "FUL")
-				sugar = true;
-			else
-			{
-				auto compound = CompoundFactory::instance().create(hetID);
-				sugar = compound ? compound->isSugar() : false;
-			}
 		}

 	};
@@ -4374,33 +4365,38 @@ void PDBFileParser::ConstructEntities()
 			mMod2parent.count(cc) ? mMod2parent[cc] : cc
 		);
 		
-		std::string formula = mFormuls[cc];
-		if (formula.empty() and compound != nullptr)
-			formula = compound->formula();
-		else
-		{
-			const std::regex rx(R"(\d+\((.+)\))");
-			std::smatch m;
-			if (std::regex_match(formula, m, rx))
-				formula = m[1].str();
-		}
-		
-		std::string name = mHetnams[cc];
-		if (name.empty() and compound != nullptr)
-			name = compound->name();
-
-		std::string type = "other";
+		std::string name;
+		std::string formula;
+		std::string type;
 		std::string nstd = ".";
+		std::string formulaWeight;
 		
 		if (compound != nullptr)
 		{
+			name = compound->name();
 			type = compound->type();
 			
-			if (type.empty())
-				type = "NON-POLYMER";
-
 			if (iequals(type, "L-peptide linking") or iequals(type, "peptide linking"))
 				nstd = "y";
+			
+			formula = compound->formula();
+			formulaWeight = std::to_string(compound->formulaWeight());
+		}
+
+		if (name.empty())
+			name = mHetnams[cc];
+
+		if (type.empty())
+			type = "NON-POLYMER";
+
+		if (formula.empty())
+		{
+			formula = mFormuls[cc];
+
+			const std::regex rx(R"(\d+\((.+)\))");
+			std::smatch m;
+			if (std::regex_match(formula, m, rx))
+				formula = m[1].str();
 		}

 		if (modResSet.count(cc))
@@ -4410,6 +4406,7 @@ void PDBFileParser::ConstructEntities()
 			{ "id", cc },
 			{ "name", name },
 			{ "formula", formula },
+			{ "formula_weight", formulaWeight },
 			{ "mon_nstd_flag", nstd },
 			{ "type", type }
 		});

--- a/src/Structure.cpp
+++ b/src/Structure.cpp
@@ -566,16 +566,6 @@ int Atom::charge() const
 	return property<int>("pdbx_formal_charge");
 }

-std::string Atom::energyType() const
-{
-	std::string result;
-	
-	if (impl() and impl()->mCompound)
-		result = impl()->mCompound->getAtomByID(impl()->mAtomID).typeEnergy;
-	
-	return result;
-}
-
 float Atom::uIso() const
 {
 	float result;
@@ -1013,21 +1003,6 @@ bool Residue::isEntity() const
 	return a1.size() == a2.size();
 }

-bool Residue::isSugar() const
-{
-	return compound().isSugar();
-}
-
-bool Residue::isPyranose() const
-{
-	return cif::iequals(compound().group(), "pyranose");
-}
-
-bool Residue::isFuranose() const
-{
-	return cif::iequals(compound().group(), "furanose");
-}
-
 std::string Residue::authID() const
 {
 	std::string result;
@@ -2101,7 +2076,7 @@ void Structure::insertCompound(const std::string& compoundID, bool isEntity)
 {
 	auto compound = Compound::create(compoundID);
 	if (compound == nullptr)
-		throw std::runtime_error("Trying to insert unknown compound " + compoundID + " (not found in CCP4 monomers lib)");
+		throw std::runtime_error("Trying to insert unknown compound " + compoundID + " (not found in CCD)");

 	cif::Datablock& db = *mFile.impl().mDb;
 	
@@ -2113,6 +2088,7 @@ void Structure::insertCompound(const std::string& compoundID, bool isEntity)
 			{ "id", compoundID },
 			{ "name", compound->name() },
 			{ "formula", compound->formula() },
+			{ "formula_weight", compound->formulaWeight() },
 			{ "type", compound->type() }
 		});
 	}