backup

git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@172 a1961a4f-ab94-4bcc-80e8-33b5a54de466

backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@172 a1961a4f-ab94-4bcc-80e8-33b5a54de466
edf132c4 · maarten · f3878b37 · edf132c4 · edf132c4 · edf132c4
Commit edf132c4 authored Nov 22, 2017 by maarten
17 changed files
--- a/include/cif++/BondMap.h
+++ b/include/cif++/BondMap.h
+// copyright
+#include <unordered_map>
+#include "cif++/Structure.h"
+namespace libcif
+{
+class BondMap
+{
+  public:
+	BondMap(const Structure& p);
+	BondMap(const BondMap&) = delete;
+	BondMap& operator=(const BondMap&) = delete;
+	bool operator()(const Atom& a, const Atom& b) const;
+  private:
+	uint32 dim;
+	std::vector<bool> bond;
+	std::unordered_map<std::string,size_t> index;
+};
+}
--- a/include/cif++/Cif++.h
+++ b/include/cif++/Cif++.h
@@ -7,6 +7,7 @@
 #include <regex>
 #include <iostream>
 #include <set>
+#include <list>
 #include <boost/lexical_cast.hpp>
 #include <boost/any.hpp>
@@ -211,12 +212,11 @@ class Datablock
 	Category* get(const string& name);
 	void getTagOrder(vector<string>& tags) const;
+	void write(std::ostream& os, const vector<string>& order);
+	void write(std::ostream& os);
  private:
-	void write(std::ostream& os);
-	void write(std::ostream& os, const vector<string>& order);
 	std::list<Category>	mCategories;
 	string				mName;
 	Validator*			mValidator;
@@ -451,7 +451,7 @@ class Row
 {
  public:
 	friend class Category;
-	friend class catIndex;
+	friend class CatIndex;
 	friend class RowComparator;
 	friend struct detail::ItemReference;
@@ -995,7 +995,7 @@ class Category
 	vector<ItemColumn>	mColumns;
 	ItemRow*			mHead;
 	ItemRow*			mTail;
-	class catIndex*	mIndex;
+	class CatIndex*		mIndex;
 };
 // --------------------------------------------------------------------

--- a/include/cif++/CifUtils.h
+++ b/include/cif++/CifUtils.h
@@ -53,4 +53,30 @@ std::tuple<std::string,std::string> splitTagName(const std::string& tag);
 std::vector<std::string> wordWrap(const std::string& text, unsigned int width);
+// --------------------------------------------------------------------
+//	Code helping with terminal i/o
+uint32 get_terminal_width();
+// --------------------------------------------------------------------
+//	A progress bar
+class Progress
+{
+  public:
+				Progress(int64 inMax, const std::string& inAction);
+	virtual		~Progress();
+	void		consumed(int64 inConsumed);	// consumed is relative
+	void		progress(int64 inProgress);	// progress is absolute
+	void		message(const std::string& inMessage);
+  private:
+				Progress(const Progress&);
+	Progress&	operator=(const Progress&);
+	struct ProgressImpl*	mImpl;
+};
 }
--- a/include/cif++/DistanceMap.h
+++ b/include/cif++/DistanceMap.h
+// copyright
+#pragma once
+#include <unordered_map>
+#include <clipper/clipper.h>
+#include "cif++/Structure.h"
+namespace libcif
+{
+class DistanceMap
+{
+  public:
+	DistanceMap(const Structure& p, const clipper::Spacegroup& spacegroup, const clipper::Cell& cell);
+	DistanceMap(const DistanceMap&) = delete;
+	DistanceMap& operator=(const DistanceMap&) = delete;
+	float operator()(const Atom& a, const Atom& b) const;
+	std::vector<Atom> near(const Atom& a, float maxDistance = 3.5f) const;
+  private:
+	uint32 dim;
+	std::vector<float> dist;
+	std::unordered_map<std::string,size_t> index;
+};
+}
--- a/include/cif++/PDB2CifRemark3.h
+++ b/include/cif++/PDB2CifRemark3.h
 #pragma once
-#include "pdb2cif.h"
+#include "cif++/PDB2Cif.h"
 // --------------------------------------------------------------------
@@ -11,30 +11,30 @@ class Remark3Parser
  public:
 	virtual ~Remark3Parser() {}
-	static bool parse(const std::string& expMethod, PDBRecord* r, cif::datablock& db);
+	static bool parse(const std::string& expMethod, PDBRecord* r, cif::Datablock& db);
 	virtual std::string program();
 	virtual std::string version();
  protected:
-	Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::datablock& db,
+	Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::Datablock& db,
 			const TemplateLine templatelines[], uint32 templateLineCount, std::regex programVersion);
-	virtual float Parse();
+	virtual float parse();
-	std::string NextLine();
+	std::string nextLine();
-	bool Match(const char* expr, int nextState);
+	bool match(const char* expr, int nextState);
-	void StoreCapture(const char* category, std::initializer_list<const char*> items, bool createNew = false);
+	void storeCapture(const char* category, std::initializer_list<const char*> items, bool createNew = false);
-	void StoreRefineLsRestr(const char* type, std::initializer_list<const char*> values);
+	void storeRefineLsRestr(const char* type, std::initializer_list<const char*> values);
-	void UpdateRefineLsRestr(const char* type, std::initializer_list<const char*> values);
+	void updateRefineLsRestr(const char* type, std::initializer_list<const char*> values);
-	virtual void Fixup() {}
+	virtual void fixup() {}
 	std::string		mName;
 	std::string		mExpMethod;
 	PDBRecord*		mRec;
-	cif::datablock	mDb;
+	cif::Datablock	mDb;
 	std::string		mLine;
 	std::smatch		mM;
 	uint32			mState;

--- a/include/cif++/PeptideDB.h
+++ b/include/cif++/PeptideDB.h
@@ -12,15 +12,15 @@ class PeptideDB
  public:
 	static PeptideDB& Instance();
-	void PushDictionary(boost::filesystem::path dict);
+	void pushDictionary(boost::filesystem::path dict);
-	void PopDictionary();
+	void popDictionary();
-	bool IsKnownPeptide(const std::string& res_name) const;
+	bool isKnownPeptide(const std::string& res_name) const;
-	bool IsKnownBase(const std::string& res_name) const;
+	bool isKnownBase(const std::string& res_name) const;
-	std::string GetNameForResidue(const std::string& res_name) const;
+	std::string nameForResidue(const std::string& res_name) const;
-	std::string GetFormulaForResidue(const std::string& res_name) const;
+	std::string formulaForResidue(const std::string& res_name) const;
-	std::string Unalias(const std::string& res_name) const;
+	std::string unalias(const std::string& res_name) const;
  private:
 	PeptideDB();

--- a/include/cif++/Point.h
+++ b/include/cif++/Point.h
@@ -231,9 +231,9 @@ class SphericalDots
 			double lat = std::asin((2.0 * i) / P);
 			double lon = std::fmod(i, kGoldenRatio) * 2 * kPI / kGoldenRatio;
-			p->x(sin(lon) * cos(lat));
+			p->setX(sin(lon) * cos(lat));
-			p->y(cos(lon) * cos(lat));
+			p->setY(cos(lon) * cos(lat));
-			p->z(           sin(lat));
+			p->setZ(           sin(lat));
 			++p;
 		}

--- a/include/cif++/Structure.h
+++ b/include/cif++/Structure.h
@@ -37,9 +37,9 @@
 namespace cif
 {
 	class Datablock;
+	class File;
 };
 namespace libcif
 {
@@ -218,6 +218,7 @@ class File : public std::enable_shared_from_this<File>
 	std::vector<const Entity*> entities();
 	cif::Datablock& data();
+	cif::File& file();
  private:

--- a/src/BondMap.cpp
+++ b/src/BondMap.cpp
+// copyright
+#include "cif++/Config.h"
+#include "cif++/Cif++.h"
+#include "cif++/BondMap.h"
+#include "cif++/Compound.h"
+#include "cif++/CifUtils.h"
+using namespace std;
+namespace libcif
+{
+// --------------------------------------------------------------------
+BondMap::BondMap(const Structure& p)
+	: dim(0)
+{
+	auto atoms = p.atoms();
+	dim = atoms.size();
+	bond = vector<bool>(dim * (dim - 1), false);
+	for (auto& atom: atoms)
+	{
+		size_t ix = index.size();
+		index[atom.id()] = ix;
+	};
+	auto bindAtoms = [this](const string& a, const string& b)
+	{
+		size_t ixa = index[a];
+		size_t ixb = index[b];
+		if (ixb < ixa)
+			swap(ixa, ixb);
+		uint32 ix = ixb + ixa * dim - ixa * (ixa + 1) / 2;
+		assert(ix < bond.size());
+		bond[ix] = true;
+	};
+	cif::Datablock& db = p.getFile().data();
+	// collect all compounds first
+	set<string> compounds;
+	for (auto c: db["chem_comp"])
+		compounds.insert(c["id"].as<string>());
+	// make sure we also have all residues in the polyseq
+	for (auto m: db["entity_poly_seq"])
+	{
+		string c = m["mon_id"].as<string>();
+		if (compounds.count(c))
+			continue;
+		cerr << "Warning: mon_id " << c << " is missing in the chem_comp category" << endl;
+		compounds.insert(c);
+	}
+	// first link all residues in a polyseq
+	string lastAsymID;
+	int lastSeqID;
+	for (auto r: db["pdbx_poly_seq_scheme"])
+	{
+		string asymId;
+		int seqId;
+		cif::tie(asymId, seqId) = r.get("asym_id", "seq_id");
+		if (asymId != lastAsymID)		// first in a new sequece
+		{
+			lastAsymID = asymId;
+			lastSeqID = seqId;
+			continue;
+		}
+		auto c = db["atom_site"].find(cif::Key("label_asym_id") == asymId and cif::Key("label_seq_id") == lastSeqID and cif::Key("label_atom_id") == "C");
+		if (c.size() != 1)
+			cerr << "Unexpected number (" << c.size() << ") of atoms with atom ID C in asym_id " << asymId << " with seq id " << lastSeqID << endl;
+		auto n = db["atom_site"].find(cif::Key("label_asym_id") == asymId and cif::Key("label_seq_id") == seqId and cif::Key("label_atom_id") == "N");
+		if (n.size() != 1)
+			cerr << "Unexpected number (" << n.size() << ") of atoms with atom ID N in asym_id " << asymId << " with seq id " << seqId << endl;
+		if (not (c.empty() or n.empty()))
+			bindAtoms(c.front()["id"].as<string>(), n.front()["id"].as<string>());
+		lastSeqID = seqId;
+	}
+	for (auto l: db["struct_conn"])
+	{
+		string asym1, asym2, atomId1, atomId2;
+		int seqId1, seqId2;
+		cif::tie(asym1, asym2, atomId1, atomId2, seqId1, seqId2) =
+			l.get("ptnr1_label_asym_id", "ptnr2_label_asym_id",
+				  "ptnr1_label_atom_id", "ptnr2_label_atom_id",
+				  "ptnr1_label_seq_id", "ptnr2_label_seq_id");
+		auto a = db["atom_site"].find(cif::Key("label_asym_id") == asym1 and cif::Key("label_seq_id") == seqId1 and cif::Key("label_atom_id") == atomId1);
+		if (a.size() != 1)
+			cerr << "Unexpected number (" << a.size() << ") of atoms for link with asym_id " << asym1 << " seq_id " << seqId1 << " atom_id  " << atomId1 << endl;
+		auto b = db["atom_site"].find(cif::Key("label_asym_id") == asym2 and cif::Key("label_seq_id") == seqId2 and cif::Key("label_atom_id") == atomId2);
+		if (b.size() != 1)
+			cerr << "Unexpected number (" << b.size() << ") of atoms for link with asym_id " << asym2 << " seq_id " << seqId2 << " atom_id  " << atomId2 << endl;
+		if (not (a.empty() or b.empty()))
+			bindAtoms(a.front()["id"].as<string>(), b.front()["id"].as<string>());
+	}
+	// then link all atoms in the compounds
+	cif::Progress progress(compounds.size(), "Creating bond map");
+	for (auto c: compounds)
+	{
+		auto* compound = libcif::Compound::create(c);
+		if (not compound)
+		{
+			cerr << "Missing compound information for " << c << endl;
+			continue;
+		}
+		if (compound->isWater())
+		{
+			if (VERBOSE)
+				cerr << "skipping water in bond map calculation" << endl;
+			continue;
+		}
+		// loop over poly_seq_scheme
+		for (auto r: db["pdbx_poly_seq_scheme"].find(cif::Key("mon_id") == c))
+		{
+			string asymId;
+			int seqId;
+			cif::tie(asymId, seqId) = r.get("asym_id", "seq_id");
+			vector<Atom> rAtoms;
+			for (auto a: db["atom_site"].find(cif::Key("label_asym_id") == asymId and cif::Key("label_seq_id") == seqId))
+				rAtoms.push_back(p.getAtomById(a["id"].as<string>()));
+			for (size_t i = 0; i + 1 < rAtoms.size(); ++i)
+			{
+				for (size_t j = i + 1; j < rAtoms.size(); ++j)
+				{
+					if (compound->atomsBonded(rAtoms[i].labelAtomId(), rAtoms[j].labelAtomId()))
+						bindAtoms(rAtoms[i].id(), rAtoms[j].id());
+				}
+			}
+		}
+		// loop over pdbx_nonpoly_scheme
+		for (auto r: db["pdbx_nonpoly_scheme"].find(cif::Key("mon_id") == c))
+		{
+			string asymId;
+			cif::tie(asymId) = r.get("asym_id");
+			vector<Atom> rAtoms;
+			for (auto a: db["atom_site"].find(cif::Key("label_asym_id") == asymId))
+				rAtoms.push_back(p.getAtomById(a["id"].as<string>()));
+			for (size_t i = 0; i + 1 < rAtoms.size(); ++i)
+			{
+				for (size_t j = i + 1; j < rAtoms.size(); ++j)
+				{
+					if (compound->atomsBonded(rAtoms[i].labelAtomId(), rAtoms[j].labelAtomId()))
+					{
+						size_t ixa = index[rAtoms[i].id()];
+						size_t ixb = index[rAtoms[j].id()];
+						if (ixb < ixa)
+							swap(ixa, ixb);
+						uint32 ix = ixb + ixa * dim - ixa * (ixa + 1) / 2;
+						assert(ix < bond.size());
+						bond[ix] = true;
+					}
+				}
+			}
+		}
+		progress.consumed(1);
+	}
+}
+bool BondMap::operator()(const Atom& a, const Atom& b) const
+{
+	uint32 ixa = index.at(a.id());
+	uint32 ixb = index.at(b.id());
+	if (ixb < ixa)
+		swap(ixa, ixb);
+	uint32 ix = ixb + ixa * dim - ixa * (ixa + 1) / 2;
+	assert(ix < bond.size());
+	return bond[ix];
+}
+}
--- a/src/Cif++.cpp
+++ b/src/Cif++.cpp
@@ -525,11 +525,11 @@ int RowComparator::operator()(const ItemRow* a, const ItemRow* b) const
 //	class to keep an index on the keys of a Category. This is a red/black
 //	tree implementation.
-class catIndex
+class CatIndex
 {
  public:
-	catIndex(Category* cat);
+	CatIndex(Category* cat);
-	~catIndex();
+	~CatIndex();
 	ItemRow* find(ItemRow* k) const;
@@ -716,17 +716,17 @@ class catIndex
 	entry*				mRoot;
 };
-catIndex::catIndex(Category* cat)
+CatIndex::CatIndex(Category* cat)
 	: mCat(*cat), mComp(cat), mRoot(nullptr)
 {
 }
-catIndex::~catIndex()
+CatIndex::~CatIndex()
 {
 	delete mRoot;
 }
-ItemRow* catIndex::find(ItemRow* k) const
+ItemRow* CatIndex::find(ItemRow* k) const
 {
 	const entry* r = mRoot;
 	while (r != nullptr)
@@ -743,13 +743,13 @@ ItemRow* catIndex::find(ItemRow* k) const
 	return r ? r->mRow : nullptr;
 }
-void catIndex::insert(ItemRow* k)
+void CatIndex::insert(ItemRow* k)
 {
 	mRoot = insert(mRoot, k);
 	mRoot->mRed = false;
 }
-catIndex::entry* catIndex::insert(entry* h, ItemRow* v)
+CatIndex::entry* CatIndex::insert(entry* h, ItemRow* v)
 {
 	if (h == nullptr)
 		return new entry(v);
@@ -772,14 +772,14 @@ catIndex::entry* catIndex::insert(entry* h, ItemRow* v)
 	return h;
 }
-void catIndex::erase(ItemRow* k)
+void CatIndex::erase(ItemRow* k)
 {
 	mRoot = erase(mRoot, k);
 	if (mRoot != nullptr)
 		mRoot->mRed = false;
 }
-catIndex::entry* catIndex::erase(entry* h, ItemRow* k)
+CatIndex::entry* CatIndex::erase(entry* h, ItemRow* k)
 {
 	if (mComp(k, h->mRow) < 0)
 	{
@@ -820,7 +820,7 @@ catIndex::entry* catIndex::erase(entry* h, ItemRow* k)
 	return fixUp(h);
 }
-void catIndex::reconstruct()
+void CatIndex::reconstruct()
 {
 	delete mRoot;
 	mRoot = nullptr;
@@ -897,7 +897,7 @@ void catIndex::reconstruct()
 //	mRoot = e.front();
 }
-size_t catIndex::size() const
+size_t CatIndex::size() const
 {
 	stack<entry*> s;
 	s.push(mRoot);
@@ -921,7 +921,7 @@ size_t catIndex::size() const
 	return result;
 }
-void catIndex::validate() const
+void CatIndex::validate() const
 {
 	if (mRoot != nullptr)
 	{
@@ -935,7 +935,7 @@ void catIndex::validate() const
 	}
 }
-void catIndex::validate(entry* h, bool isParentRed, uint32 blackDepth, uint32& minBlack, uint32& maxBlack) const
+void CatIndex::validate(entry* h, bool isParentRed, uint32 blackDepth, uint32& minBlack, uint32& maxBlack) const
 {
 	if (h->mRed)
 		assert(not isParentRed);
@@ -1018,7 +1018,7 @@ Category::Category(Datablock& db, const string& name, Validator* Validator)
 			for (auto& k: mCatValidator->mMandatoryFields)
 				addColumn(k);
-			mIndex = new catIndex(this);
+			mIndex = new CatIndex(this);
 		}
 	}
 }
@@ -1044,7 +1044,7 @@ void Category::setValidator(Validator* v)
 		mCatValidator = mValidator->getValidatorForCategory(mName);
 		if (mCatValidator != nullptr)
 		{
-			mIndex = new catIndex(this);
+			mIndex = new CatIndex(this);
 			mIndex->reconstruct();
 #if DEBUG
 			assert(mIndex->size() == size());
@@ -1192,7 +1192,7 @@ void Category::clear()
 	if (mIndex != nullptr)
 	{
 		delete mIndex;
-		mIndex = new catIndex(this);
+		mIndex = new CatIndex(this);
 	}
 }

--- a/src/CifParser.cpp
+++ b/src/CifParser.cpp
@@ -762,11 +762,11 @@ void DictParser::parseSaveFrame()
 		string category = dict.firstItem("_category.id");
 		vector<string> keys;
-		for (auto k: dict["categoryKey"])
+		for (auto k: dict["category_key"])
 			keys.push_back(get<1>(splitTagName(k["name"].as<string>())));
 		iset groups;
-		for (auto g: dict["categoryGroup"])
+		for (auto g: dict["category_group"])
 			groups.insert(g["id"].as<string>());
 		mImpl->mCategoryValidators.push_back(ValidateCategory{category, keys, groups});

--- a/src/CifUtils.cpp
+++ b/src/CifUtils.cpp
@@ -4,8 +4,21 @@
 #include <tuple>
 #include <iostream>
+#include <cstdio>
+#include <atomic>
+#if defined(_MSC_VER)
+#define TERM_WIDTH 80
+#else
+#include <termios.h>
+#include <sys/ioctl.h>
+#endif
 #include <boost/algorithm/string.hpp>
+#include <boost/thread.hpp>
+#if BOOST_VERSION >= 104800
+#include <boost/timer/timer.hpp>
+#endif
 #include "cif++/CifUtils.h"
@@ -94,13 +107,13 @@ int icompare(const char* a, const char* b)
 	return d;
 }
-void to_lower(string& s)
+void toLower(string& s)
 {
 	for (auto& c: s)
 		c = tolower(c);
 }
-string to_lower_copy(const string& s)
+string toLowerCopy(const string& s)
 {
 	string result(s);
 	for (auto& c: result)
@@ -110,7 +123,7 @@ string to_lower_copy(const string& s)
 // --------------------------------------------------------------------
-tuple<string,string> split_tag_name(const string& tag)
+tuple<string,string> splitTagName(const string& tag)
 {
 	if (tag.empty())
 		throw runtime_error("empty tag");
@@ -196,12 +209,12 @@ const LineBreakClass kASCII_LBTable[128] =
 	kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_OpenPunctuation, kLBC_BreakAfter, kLBC_ClosePunctuation, kLBC_Alphabetic, kLBC_CombiningMark
 };
-string::const_iterator next_line_break(string::const_iterator text, string::const_iterator end)
+string::const_iterator nextLineBreak(string::const_iterator text, string::const_iterator end)
 {
 	if (text == end)
 		return text;
-	enum break_action
+	enum breakAction
 	{ 
 		DBK = 0, // direct break 	(blank in table)
 		IBK, 	// indirect break	(% in table)
@@ -210,7 +223,7 @@ string::const_iterator next_line_break(string::const_iterator text, string::cons
 		CPB		// combining prohibited break
 	};
-	const break_action brkTable[27][27] = {
+	const breakAction brkTable[27][27] = {
 	//   	OP  	CL  	CP  	QU  	GL  	NS  	EX  	SY  	IS  	PR  	PO  	NU  	AL  	ID  	IN  	HY  	BA  	BB  	B2  	ZW  	CM  	WJ  	H2  	H3  	JL  	JV  	JT
 /* OP */ { 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	CPB, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK },
 /* CL */ { 	DBK, 	PBK, 	PBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	PBK, 	IBK, 	IBK, 	DBK, 	DBK, 	DBK, 	DBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK },
@@ -278,7 +291,7 @@ string::const_iterator next_line_break(string::const_iterator text, string::cons
 		if (ncls == kLBC_Space)
 			continue;
-		break_action brk = brkTable[cls][ncls];
+		breakAction brk = brkTable[cls][ncls];
 		if (brk == DBK or (brk == IBK and lcls == kLBC_Space))
 			break;
@@ -289,7 +302,7 @@ string::const_iterator next_line_break(string::const_iterator text, string::cons
 	return text;
 }
-vector<string> wrap_line(const string& text, unsigned int width)
+vector<string> wrapLine(const string& text, unsigned int width)
 {
 	vector<string> result;
 	vector<size_t> offsets = { 0 };
@@ -297,7 +310,7 @@ vector<string> wrap_line(const string& text, unsigned int width)
 	auto b = text.begin();
 	while (b != text.end())
 	{
-		auto e = next_line_break(b, text.end());
+		auto e = nextLineBreak(b, text.end());
 		offsets.push_back(e - text.begin());
@@ -350,7 +363,7 @@ vector<string> wrap_line(const string& text, unsigned int width)
 	return result;
 }
-vector<string> word_wrap(const string& text, unsigned int width)
+vector<string> wordWrap(const string& text, unsigned int width)
 {
 	vector<string> paragraphs;
 	ba::split(paragraphs, text, ba::is_any_of("\n"));
@@ -364,11 +377,168 @@ vector<string> word_wrap(const string& text, unsigned int width)
 			continue;
 		}
-		auto lines = wrap_line(p, width);
+		auto lines = wrapLine(p, width);
 		result.insert(result.end(), lines.begin(), lines.end());
 	}
 	return result;
 }
+// --------------------------------------------------------------------
+#ifdef _MSC_VER
+uint32 get_terminal_width()
+{
+	return TERM_WIDTH;
+}
+#else
+uint32 get_terminal_width()
+{
+	struct winsize w;
+	ioctl(0, TIOCGWINSZ, &w);
+	return w.ws_col;
+}
+#endif
+// --------------------------------------------------------------------
+struct ProgressImpl
+{
+					ProgressImpl(int64 inMax, const string& inAction)
+						: mMax(inMax), mConsumed(0), mAction(inAction), mMessage(inAction)
+						, mThread(boost::bind(&ProgressImpl::Run, this)) {}
+	void			Run();
+	void			PrintProgress();
+	void			PrintDone();
+	int64			mMax;
+	atomic<int64>	mConsumed;
+	string			mAction, mMessage;
+	boost::mutex	mMutex;
+	boost::thread	mThread;
+	boost::timer::cpu_timer
+					mTimer;
+};
+void ProgressImpl::Run()
+{
+	try
+	{
+		for (;;)
+		{
+			boost::this_thread::sleep(boost::posix_time::seconds(1));
+			boost::mutex::scoped_lock lock(mMutex);
+			if (mConsumed == mMax)
+				break;
+			PrintProgress();
+		}
+	}
+	catch (...) {}
+	PrintDone();
+}
+void ProgressImpl::PrintProgress()
+{
+	uint32 width = get_terminal_width();
+	string msg;
+	msg.reserve(width + 1);
+	if (mMessage.length() <= 20)
+	{
+		msg = mMessage;
+		if (msg.length() < 20)
+			msg.append(20 - msg.length(), ' ');
+	}
+	else
+		msg = mMessage.substr(0, 17) + "...";
+	msg += " [";
+	float progress = static_cast<float>(mConsumed) / mMax;
+	int tw = width - 28;
+	int twd = static_cast<int>(tw * progress + 0.5f);
+	msg.append(twd, '=');
+	msg.append(tw - twd, ' ');
+	msg.append("] ");
+	int perc = static_cast<int>(100 * progress);
+	if (perc < 100)
+		msg += ' ';
+	if (perc < 10)
+		msg += ' ';
+	msg += to_string(perc);
+	msg += '%';
+	cout << '\r' << msg;
+	cout.flush();
+}
+void ProgressImpl::PrintDone()
+{
+	string msg = mAction + " done in " + mTimer.format(0, "%ts cpu / %ws wall");
+	uint32 width = get_terminal_width();
+	if (msg.length() < width)
+		msg += string(width - msg.length(), ' ');
+	cout << '\r' << msg << endl;
+}
+Progress::Progress(int64 inMax, const string& inAction)
+	: mImpl(nullptr)
+{
+	if (isatty(STDOUT_FILENO))
+		mImpl = new ProgressImpl(inMax, inAction);
+}
+Progress::~Progress()
+{
+	if (mImpl != nullptr and mImpl->mThread.joinable())
+	{
+		mImpl->mThread.interrupt();
+		mImpl->mThread.join();
+	}
+	delete mImpl;
+}
+void Progress::consumed(int64 inConsumed)
+{
+	if (mImpl != nullptr and 
+		(mImpl->mConsumed += inConsumed) >= mImpl->mMax and
+		mImpl->mThread.joinable())
+	{
+		mImpl->mThread.interrupt();
+		mImpl->mThread.join();
+	}
+}
+void Progress::progress(int64 inProgress)
+{
+	if (mImpl != nullptr and 
+		(mImpl->mConsumed = inProgress) >= mImpl->mMax and
+		mImpl->mThread.joinable())
+	{
+		mImpl->mThread.interrupt();
+		mImpl->mThread.join();
+	}
+}
+void Progress::message(const std::string& inMessage)
+{
+	if (mImpl != nullptr)
+	{
+		boost::mutex::scoped_lock lock(mImpl->mMutex);
+		mImpl->mMessage = inMessage;
+	}
+}
 }
--- a/src/DistanceMap.cpp
+++ b/src/DistanceMap.cpp
+// copyright
+#include "cif++/Config.h"
+#include <atomic>
+#include <boost/thread.hpp>
+#include "cif++/DistanceMap.h"
+#include "cif++/CifUtils.h"
+using namespace std;
+namespace libcif
+{
+// --------------------------------------------------------------------
+vector<clipper::RTop_orth> AlternativeSites(const clipper::Spacegroup& spacegroup,
+	const clipper::Cell& cell)
+{
+	vector<clipper::RTop_orth> result;
+	for (int i = 0; i < spacegroup.num_symops(); ++i)
+	{
+		const auto& symop = spacegroup.symop(i);
+		for (int u: { -1, 0, 1})
+			for (int v: { -1, 0, 1})
+				for (int w: { -1, 0, 1})
+				{
+					result.push_back(
+						clipper::RTop_frac(
+							symop.rot(), symop.trn() + clipper::Vec3<>(u, v, w)
+						).rtop_orth(cell));
+				}
+	}
+	return result;
+}
+// --------------------------------------------------------------------
+DistanceMap::DistanceMap(const Structure& p, const clipper::Spacegroup& spacegroup, const clipper::Cell& cell)
+	: dim(0)
+{
+	auto atoms = p.atoms();
+	dim = atoms.size();
+	dist = vector<float>(dim * (dim - 1), 0.f);
+	vector<clipper::Coord_orth> locations(dim);
+	vector<bool> isWater(dim, false);
+	for (auto& atom: atoms)
+	{
+		size_t ix = index.size();
+		index[atom.id()] = ix;
+		locations[ix] = atom.location();
+		isWater[ix] = atom.isWater();
+	};
+	vector<clipper::RTop_orth> rtOrth = AlternativeSites(spacegroup, cell);
+	cif::Progress progress(locations.size() - 1, "Creating distance map");
+	boost::thread_group t;
+	size_t N = boost::thread::hardware_concurrency();
+	atomic<size_t> next(0);
+	for (size_t i = 0; i < N; ++i)
+		t.create_thread([&]()
+		{
+			for (;;)
+			{
+				size_t i = next++;
+				if (i >= locations.size())
+					break;
+				for (size_t j = i + 1; j < locations.size(); ++j)
+				{
+//					if (not (isWater[i] or isWater[j]))
+//						continue;
+					// find nearest location based on spacegroup/cell
+					double minR2 = numeric_limits<double>::max();
+					for (const auto& rt: rtOrth)
+					{
+						auto p = locations[j].transform(rt);
+						double r2 = (locations[i] - p).lengthsq();
+						if (minR2 > r2)
+							minR2 = r2;
+					}
+					uint32 ix = j + i * dim - i * (i + 1) / 2;
+					assert(ix < dist.size());
+					dist[ix] = sqrt(minR2);
+				}
+				progress.consumed(1);
+			}
+		});
+	t.join_all();
+}
+float DistanceMap::operator()(const Atom& a, const Atom& b) const
+{
+	uint32 ixa = index.at(a.id());
+	uint32 ixb = index.at(b.id());
+	if (ixb < ixa)
+		swap(ixa, ixb);
+	uint32 ix = ixb + ixa * dim - ixa * (ixa + 1) / 2;
+	assert(ix < dist.size());
+	return dist[ix];
+}
+vector<Atom> DistanceMap::near(const Atom& a, float maxDistance) const
+{
+	vector<Atom> result;
+	const File& f = a.getFile();
+	uint32 ixa = index.at(a.id());
+	for (auto& i: index)
+	{
+		uint32 ixb = i.second;
+		if (ixb == ixa)
+			continue;
+		uint32 ix;
+		if (ixa < ixb)
+			ix = ixb + ixa * dim - ixa * (ixa + 1) / 2;
+		else
+			ix = ixa + ixb * dim - ixb * (ixb + 1) / 2;
+		assert(ix < dist.size());
+		if (dist[ix] != 0 and dist[ix] <= maxDistance)
+			result.emplace_back(f, i.first);
+	}
+	return result;
+}
+}
--- a/src/PDB2Cif.cpp
+++ b/src/PDB2Cif.cpp
--- a/src/PDB2CifRemark3.cpp
+++ b/src/PDB2CifRemark3.cpp
--- a/src/PeptideDB.cpp
+++ b/src/PeptideDB.cpp
-#include "libpr.h"
+#include "cif++/Config.h"
 #include <set>
 #include <map>
@@ -8,9 +8,8 @@
 #include <boost/filesystem/fstream.hpp>
 #include <boost/algorithm/string.hpp>
-#include "cif++.h"
+#include "cif++/Cif++.h"
+#include "cif++/PeptideDB.h"
-#include "peptidedb.h"
 using namespace std;
 namespace fs = boost::filesystem;
@@ -62,46 +61,46 @@ struct PeptideDBImpl
 	~PeptideDBImpl()
 	{
-		delete m_next;
+		delete mNext;
 	}
-	/*unordered_*/set<string>	m_known_peptides;
+	/*unordered_*/set<string>	mKnownPeptides;
-	set<string>					m_known_bases;
+	set<string>					mKnownBases;
-	cif::file					m_file;
+	cif::File					mFile;
-	cif::category&				m_chem_comp;
+	cif::Category&				mChemComp;
-	PeptideDBImpl*				m_next;
+	PeptideDBImpl*				mNext;
-	string name_for(const string& res_name) const
+	string nameFor(const string& resName) const
 	{
 		string result;
-		for (auto& chem_comp: m_chem_comp)
+		for (auto& chemComp: mChemComp)
 		{
-			if (ba::iequals(chem_comp["three_letter_code"].as<string>(), res_name) == false)
+			if (ba::iequals(chemComp["three_letter_code"].as<string>(), resName) == false)
 				continue;
-			result = chem_comp["name"].as<string>();
+			result = chemComp["name"].as<string>();
 			ba::trim(result);
 			break;
 		}
-		if (result.empty() and m_next)
+		if (result.empty() and mNext)
-			result = m_next->name_for(res_name);
+			result = mNext->nameFor(resName);
 		return result;
 	}
-	string formula_for(string res_name) const;
+	string formulaFor(string resName) const;
-	string unalias(const string& res_name) const
+	string unalias(const string& resName) const
 	{
-		string result = res_name;
+		string result = resName;
-		auto& e = const_cast<cif::file&>(m_file)["comp_synonym_list"];
+		auto& e = const_cast<cif::File&>(mFile)["comp_synonym_list"];
 		for (auto& synonym: e["chem_comp_synonyms"])
 		{
-			if (ba::iequals(synonym["comp_alternative_id"].as<string>(), res_name) == false)
+			if (ba::iequals(synonym["comp_alternative_id"].as<string>(), resName) == false)
 				continue;
 			result = synonym["comp_id"].as<string>();
@@ -109,38 +108,38 @@ struct PeptideDBImpl
 			break;
 		}
-		if (result.empty() and m_next)
+		if (result.empty() and mNext)
-			result = m_next->unalias(res_name);
+			result = mNext->unalias(resName);
 		return result;
 	}
 };
 PeptideDBImpl::PeptideDBImpl(istream& data, PeptideDBImpl* next)
-	: m_file(data), m_chem_comp(m_file.first_datablock()["chem_comp"]), m_next(next)
+	: mFile(data), mChemComp(mFile.firstDatablock()["chem_comp"]), mNext(next)
 {
-	for (auto& chem_comp: m_chem_comp)
+	for (auto& chemComp: mChemComp)
 	{
-		string group, three_letter_code;
+		string group, threeLetterCode;
-		cif::tie(group, three_letter_code) = chem_comp.get("group", "three_letter_code");
+		cif::tie(group, threeLetterCode) = chemComp.get("group", "three_letter_code");
 		if (group == "peptide" or group == "M-peptide" or group == "P-peptide")
-			m_known_peptides.insert(three_letter_code);
+			mKnownPeptides.insert(threeLetterCode);
 		else if (group == "DNA" or group == "RNA")
-			m_known_bases.insert(three_letter_code);
+			mKnownBases.insert(threeLetterCode);
 	}
 }
-string PeptideDBImpl::formula_for(string res) const
+string PeptideDBImpl::formulaFor(string res) const
 {
 	string result;
 	ba::to_upper(res);
-	for (auto& db: m_file)
+	for (auto& db: mFile)
 	{
-		if (db.name() != "comp_" + res)
+		if (db.getName() != "comp_" + res)
 			continue;
 		auto& cat = db["chem_comp_atom"];
@@ -162,15 +161,15 @@ string PeptideDBImpl::formula_for(string res) const
 	if (result.empty())
 	{
-		if (m_next != nullptr)
+		if (mNext != nullptr)
-			result = m_next->formula_for(res);
+			result = mNext->formulaFor(res);
 		else
 		{
-			const char* clibd_mon = getenv("CLIBD_MON");
+			const char* clibdMon = getenv("CLIBD_MON");
-			if (clibd_mon == nullptr)
+			if (clibdMon == nullptr)
 				throw runtime_error("Cannot locate peptide list, please souce the CCP4 environment");
-			fs::path resFile = fs::path(clibd_mon) / ba::to_lower_copy(res.substr(0, 1)) / (res + ".cif");
+			fs::path resFile = fs::path(clibdMon) / ba::to_lower_copy(res.substr(0, 1)) / (res + ".cif");
 			if (fs::exists(resFile))
 			{
 				fs::ifstream file(resFile);
@@ -178,7 +177,7 @@ string PeptideDBImpl::formula_for(string res) const
 				{
 					try
 					{
-						cif::file cf(file);
+						cif::File cf(file);
 						auto& cat = cf["comp_" + res]["chem_comp_atom"];
@@ -223,18 +222,18 @@ PeptideDB& PeptideDB::Instance()
 PeptideDB::PeptideDB()
 {
-	const char* clibd_mon = getenv("CLIBD_MON");
+	const char* clibdMon = getenv("CLIBD_MON");
-	if (clibd_mon == nullptr)
+	if (clibdMon == nullptr)
 		throw runtime_error("Cannot locate peptide list, please souce the CCP4 environment");
-	fs::path db = fs::path(clibd_mon) / "list" / "mon_lib_list.cif";
+	fs::path db = fs::path(clibdMon) / "list" / "mon_lib_list.cif";
-	PushDictionary(db);
+	pushDictionary(db);
 	sInstance = this;
 }
-void PeptideDB::PushDictionary(boost::filesystem::path dict)
+void PeptideDB::pushDictionary(boost::filesystem::path dict)
 {
 	if (not fs::exists(dict))
 		throw runtime_error("file not found: " + dict.string());
@@ -246,13 +245,13 @@ void PeptideDB::PushDictionary(boost::filesystem::path dict)
 	mImpl = new PeptideDBImpl(file, mImpl);
 }
-void PeptideDB::PopDictionary()
+void PeptideDB::popDictionary()
 {
 	if (mImpl != nullptr)
 	{
 		auto i = mImpl;
-		mImpl = i->m_next;
+		mImpl = i->mNext;
-		i->m_next = nullptr;
+		i->mNext = nullptr;
 		delete i;
 	}
 }
@@ -262,27 +261,27 @@ PeptideDB::~PeptideDB()
 	delete mImpl;
 }
-bool PeptideDB::IsKnownPeptide(const string& res_name) const
+bool PeptideDB::isKnownPeptide(const string& resName) const
 {
-	return mImpl->m_known_peptides.count(res_name) > 0;
+	return mImpl->mKnownPeptides.count(resName) > 0;
 }
-bool PeptideDB::IsKnownBase(const string& res_name) const
+bool PeptideDB::isKnownBase(const string& resName) const
 {
-	return mImpl->m_known_bases.count(res_name) > 0;
+	return mImpl->mKnownBases.count(resName) > 0;
 }
-string PeptideDB::GetNameForResidue(const string& res_name) const
+string PeptideDB::nameForResidue(const string& resName) const
 {
-	return mImpl->name_for(res_name);
+	return mImpl->nameFor(resName);
 }
-string PeptideDB::GetFormulaForResidue(const string& res_name) const
+string PeptideDB::formulaForResidue(const string& resName) const
 {
-	return mImpl->formula_for(res_name);
+	return mImpl->formulaFor(resName);
 }
-string PeptideDB::Unalias(const string& res_name) const
+string PeptideDB::unalias(const string& resName) const
 {
-	return mImpl->unalias(res_name);
+	return mImpl->unalias(resName);
 }
--- a/src/Structure.cpp
+++ b/src/Structure.cpp
@@ -457,6 +457,16 @@ cif::Datablock& File::data()
 	return *mImpl->mDb;
 }
+cif::File& File::file()
+{
+	assert(mImpl);
+	if (mImpl == nullptr)
+		throw runtime_error("No data loaded");
+	return mImpl->mData;
+}
 // --------------------------------------------------------------------
 //	Structure