reshuffled files

git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@169 a1961a4f-ab94-4bcc-80e8-33b5a54de466

reshuffled files
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@169 a1961a4f-ab94-4bcc-80e8-33b5a54de466
ca881b82 · maarten · 58666318 · ca881b82 · ca881b82 · ca881b82
Commit ca881b82 authored Nov 21, 2017 by maarten
25 changed files
--- a/include/cif++/atom_type.h
+++ b/include/cif++/atom_type.h
+// Lib for working with structures as contained in mmCIF and PDB files
+
+#pragma once
+
+#include "libcif/config.h"
+
+#include <boost/filesystem/operations.hpp>
+#include <boost/math/quaternion.hpp>
+
+namespace libcif
+{
+
+enum atom_type : uint8
+{
+	Nn = 0,		// Unknown
+	
+	H = 1,		// Hydrogen
+	He = 2,		// Helium
+
+	Li = 3,		// Lithium
+	Be = 4,		// Beryllium
+	B = 5,		// Boron
+	C = 6,		// Carbon
+	N = 7,		// Nitrogen
+	O = 8,		// Oxygen
+	F = 9,		// Fluorine
+	Ne = 10,	// Neon
+
+	Na = 11,	// Sodium
+	Mg = 12,	// Magnesium
+	Al = 13,	// Aluminium
+	Si = 14,	// Silicon
+	P = 15,		// Phosphorus
+	S = 16,		// Sulfur
+	Cl = 17,	// Chlorine
+	Ar = 18,	// Argon
+
+	K = 19,		// Potassium
+	Ca = 20,	// Calcium
+	Sc = 21,	// Scandium
+	Ti = 22,	// Titanium
+	V = 23,		// Vanadium
+	Cr = 24,	// Chromium
+	Mn = 25,	// Manganese
+	Fe = 26,	// Iron
+	Co = 27,	// Cobalt
+	Ni = 28,	// Nickel
+	Cu = 29,	// Copper
+	Zn = 30,	// Zinc
+	Ga = 31,	// Gallium
+	Ge = 32,	// Germanium
+	As = 33,	// Arsenic
+	Se = 34,	// Selenium
+	Br = 35,	// Bromine
+	Kr = 36,	// Krypton
+
+	Rb = 37,	// Rubidium
+	Sr = 38,	// Strontium
+	Y = 39,		// Yttrium
+	Zr = 40,	// Zirconium
+	Nb = 41,	// Niobium
+	Mo = 42,	// Molybdenum
+	Tc = 43,	// Technetium
+	Ru = 44,	// Ruthenium
+	Rh = 45,	// Rhodium
+	Pd = 46,	// Palladium
+	Ag = 47,	// Silver
+	Cd = 48,	// Cadmium
+	In = 49,	// Indium
+	Sn = 50,	// Tin
+	Sb = 51,	// Antimony
+	Te = 52,	// Tellurium
+	I = 53,		// Iodine
+	Xe = 54,	// Xenon
+	Cs = 55,	// Caesium
+	Ba = 56,	// Barium
+	La = 57,	// Lanthanum
+
+	Hf = 72,	// Hafnium
+	Ta = 73,	// Tantalum
+	W = 74,		// Tungsten
+	Re = 75,	// Rhenium
+	Os = 76,	// Osmium
+	Ir = 77,	// Iridium
+	Pt = 78,	// Platinum
+	Au = 79,	// Gold
+	Hg = 80,	// Mercury
+	Tl = 81,	// Thallium
+	Pb = 82,	// Lead
+	Bi = 83,	// Bismuth
+	Po = 84,	// Polonium
+	At = 85,	// Astatine
+	Rn = 86,	// Radon
+	Fr = 87,	// Francium
+	Ra = 88,	// Radium
+	Ac = 89,	// Actinium
+
+	Rf = 104,	// Rutherfordium
+	Db = 105,	// Dubnium
+	Sg = 106,	// Seaborgium
+	Bh = 107,	// Bohrium
+	Hs = 108,	// Hassium
+	Mt = 109,	// Meitnerium
+	Ds = 110,	// Darmstadtium
+	Rg = 111,	// Roentgenium
+	Cn = 112,	// Copernicium
+	Nh = 113,	// Nihonium
+	Fl = 114,	// Flerovium
+	Mc = 115,	// Moscovium
+	Lv = 116,	// Livermorium
+	Ts = 117,	// Tennessine
+	Og = 118,	// Oganesson
+
+	Ce = 58,	// Cerium
+	Pr = 59,	// Praseodymium
+	Nd = 60,	// Neodymium
+	Pm = 61,	// Promethium
+	Sm = 62,	// Samarium
+	Eu = 63,	// Europium
+	Gd = 64,	// Gadolinium
+	Tb = 65,	// Terbium
+	Dy = 66,	// Dysprosium
+	Ho = 67,	// Holmium
+	Er = 68,	// Erbium
+	Tm = 69,	// Thulium
+	Yb = 70,	// Ytterbium
+	Lu = 71,	// Lutetium
+
+	Th = 90,	// Thorium
+	Pa = 91,	// Protactinium
+	U = 92,		// Uranium
+	Np = 93,	// Neptunium
+	Pu = 94,	// Plutonium
+	Am = 95,	// Americium
+	Cm = 96,	// Curium
+	Bk = 97,	// Berkelium
+	Cf = 98,	// Californium
+	Es = 99,	// Einsteinium
+	Fm = 100,	// Fermium
+	Md = 101,	// Mendelevium
+	No = 102,	// Nobelium
+	Lr = 103,	// Lawrencium
+};
+
+// --------------------------------------------------------------------
+// atom_type_info
+
+enum radius_type {
+	eRadiusCalculated,
+	eRadiusEmpirical,
+	eRadiusCovalentEmpirical,
+
+	eRadiusSingleBond,
+	eRadiusDoubleBond,
+	eRadiusTripleBond,
+
+	eRadiusVanderWaals,
+
+	eRadiusTypeCount
+};
+
+struct atom_type_info
+{
+	atom_type		type;
+	std::string		name;
+	std::string		symbol;
+	float			weight;
+	bool			metal;
+	float			radii[eRadiusTypeCount];
+};
+
+extern const atom_type_info kKnownAtoms[];
+
+// --------------------------------------------------------------------
+// atom_type_traits
+
+class atom_type_traits
+{
+  public:
+	atom_type_traits(atom_type a);
+	atom_type_traits(const std::string& symbol);
+	
+	atom_type type() const			{ return m_info->type; }
+	std::string	name() const		{ return m_info->name; }
+	std::string	symbol() const		{ return m_info->symbol; }
+	float weight() const			{ return m_info->weight; }
+	
+	bool is_metal() const			{ return m_info->metal; }
+	
+	static bool is_element(const std::string& symbol);
+	static bool is_metal(const std::string& symbol);
+	
+	float radius(radius_type type = eRadiusSingleBond) const
+	{
+		if (type >= eRadiusTypeCount)
+			throw std::invalid_argument("invalid radius requested");
+		return m_info->radii[type] / 100.f;
+	}
+
+  private:
+	const struct atom_type_info*	m_info;
+};
+
+}
--- a/include/cif++/cif++.h
+++ b/include/cif++/cif++.h
+// cif parsing library
+
+#pragma once
+
+#include "libcif/config.h"
+
+#include <regex>
+#include <iostream>
+#include <set>
+
+#include <boost/lexical_cast.hpp>
+#include <boost/any.hpp>
+
+#include "cif-utils.h"
+
+extern int VERBOSE;
+
+/*
+	Simple C++ interface to CIF files.
+	
+	Assumptions: a file contains one or more datablocks modelled by the class datablock.
+	Each datablock contains categories. These map to the original tables used to fill
+	the mmCIF file. Each category can contain multiple items, the columns in the table.
+	
+	Values are stored as character strings internally.
+	
+	Synopsis:
+	
+	// create a cif file
+	
+	cif::datablock e("1MVE");
+	e.append(cif::category{"_entry", { "id", "1MVE" } });
+	
+	cif::category atom_site("atom_site");
+	size_t nr{};
+	for (my_atom: atoms)
+	{
+		atom_site.push_back({
+			{ "group_PDB", "ATOM" },
+			{ "id", ++nr },
+			{ "type_symbol", my_atom.type.str() },
+			...
+		});
+	}
+	
+	e.append(move(atom_site));
+	
+	cif::file f;
+	f.append(e);
+	
+	ofstream os("1mve.cif");
+	f.write(os);
+
+	// read
+	f.read(ifstream{"1mve.cif"});
+	
+	auto& e = f.first_datablock();
+	
+	cout << "ID of datablock: " << e.id() << endl;
+	
+	auto& atoms = e["atom_site"];
+	for (auto& atom: atoms)
+	{
+		cout << atom["group_PDB"] << ", "
+			 << atom["id"] << ", "
+			 ...
+
+		float x, y, z;
+		cif::tie(x, y, z) = atom.get("Cartn_x", "Cartn_y", "Cartn_z");
+		...
+	}
+
+	Another way of querying a category is by using this construct:
+	
+	auto cat& = e["atom_site"];
+	auto rows = cat.find(key("label_asym_id") == "A" and key("label_seq_id") == 1);
+
+
+*/
+
+namespace cif
+{
+
+using std::string;
+using std::vector;
+
+// mmCIF mapping
+// A CIF data file in this case contains entries (data blocks) which can contain
+// one or more category objects. Each category object contains arrays of items.
+// Better, you can consider the categories as tables containing columns which
+// are the items.
+
+class file;
+class datablock;
+class category;
+class row;			// a flyweight class that references data in categories
+class item;
+class validator;
+
+struct validate_item;
+struct validate_category;
+
+struct item_column;
+struct item_row;
+struct item_value;
+
+// --------------------------------------------------------------------
+// class item
+//
+//	This class is only transient, it is used to construct new rows.
+//	Access to already stored data is through an item_reference object.
+
+class item
+{
+  public:
+	typedef enum { not_applicable, not_defined, text, number } item_content_type;
+	
+	item() {}
+	template<typename T>
+	item(const string& name, const T& value);
+	item(const item& rhs) : m_name(rhs.m_name), m_value(rhs.m_value) {}
+	item(item&& rhs) : m_name(std::move(rhs.m_name)), m_value(std::move(rhs.m_value)) {}
+
+	item& operator=(const item& rhs)
+	{
+		if (this != &rhs)
+		{
+			m_name = rhs.m_name;
+			m_value = rhs.m_value;
+		}
+		
+		return *this;
+	}
+	
+	item& operator=(item&& rhs)
+	{
+		if (this != &rhs)
+		{
+			m_name = std::move(rhs.m_name);
+			m_value = std::move(rhs.m_value);
+		}
+		
+		return *this;
+	}
+	
+	const string& name() const	{ return m_name; }
+	const string& value() const	{ return m_value; }
+
+	void value(const string& v)	{ m_value = v; }
+	
+	bool empty() const			{ return m_value.empty(); }
+	size_t length() const		{ return m_value.length(); }
+	const char* c_str() const	{ return m_value.c_str(); }
+	
+  private:
+	string	m_name;
+  	string	m_value;
+};
+
+template<typename T>
+inline
+item::item(const string& name, const T& value)
+	: m_name(name), m_value(boost::lexical_cast<string>(value))
+{	
+}
+
+template<>
+inline
+item::item(const string& name, const string& value)
+	: m_name(name), m_value(value)
+{
+}
+
+// --------------------------------------------------------------------
+// class datablock acts as an STL container for category objects
+
+class datablock
+{
+  public:
+	friend class file;
+	
+	typedef std::list<category> category_list;
+	typedef category_list::iterator iterator;
+	typedef category_list::const_iterator const_iterator;
+	
+	datablock(const string& name);
+	~datablock();
+
+	datablock(const datablock&) = delete;
+	datablock& operator=(const datablock&) = delete;
+
+	string name() const								{ return m_name; }
+	void set_name(const string& n)					{ m_name = n; }
+	
+	string first_item(const string& tag) const;
+
+	iterator begin()		{ return m_categories.begin(); }
+	iterator end()			{ return m_categories.end(); }
+
+	const_iterator begin() const	{ return m_categories.begin(); }
+	const_iterator end() const		{ return m_categories.end(); }
+
+	category& operator[](const string& name);
+
+	std::tuple<iterator,bool> emplace(const std::string& name);
+	
+	void validate();
+	void set_validator(validator* v);
+
+	// this one only looks up a category, returns nullptr if it does not exist
+	category* get(const string& name);
+
+	void get_tag_order(vector<string>& tags) const;
+
+  private:
+
+	void write(std::ostream& os);
+	void write(std::ostream& os, const vector<string>& order);
+
+	std::list<category>	m_categories;
+	string				m_name;
+	validator*			m_validator;
+	datablock*			m_next;
+};
+
+// --------------------------------------------------------------------
+// class row acts as a container for item objects, It has a more useful
+// interface for accessing the contained columns. The get() method
+// returns a row_result object that can be used to access only a subset
+// of column values by index or by name.
+
+namespace detail
+{
+	// item_reference is a helper class
+	struct item_reference
+	{
+		const char*		m_name;
+		item_row*		m_row;
+
+		template<typename T>
+		item_reference& operator=(const T& value)
+		{
+			this->operator=(boost::lexical_cast<string>(value));
+			return *this;
+		}
+		
+//		operator string() const	{ return c_str(); }
+		
+		template<typename T>
+		T as() const
+		{
+			T result = 0;
+			if (not empty())
+				result = boost::lexical_cast<T>(c_str());
+			return result;
+		}
+		
+		template<typename T>
+		int compare(const T& value) const
+		{
+			int result = 0;
+			try
+			{
+				double v = boost::lexical_cast<T>(c_str());
+				if (v < value)
+					result = -1;
+				else if (v > value)
+					result = 1;
+			}
+			catch (...)
+			{
+				if (VERBOSE)
+					std::cerr << "conversion error in compare for '" << c_str() << '\'' << std::endl;
+				result = 1;
+			}
+			
+			return result;
+		}
+		
+		bool empty() const;
+//		bool unapplicable() const;
+		
+		const char* c_str() const;
+		
+		bool operator!=(const string& s) const		{ return s != c_str(); }
+		bool operator==(const string& s) const		{ return s == c_str(); }
+	};
+
+	template<>
+	inline
+	string item_reference::as<string>() const
+	{
+		return string(c_str());
+	}
+	
+	template<>
+	inline
+	const char* item_reference::as<const char*>() const
+	{
+		return c_str();
+	}
+	
+	template<>
+	inline
+	int item_reference::compare<string>(const string& value) const
+	{
+		return icompare(c_str(), value.c_str());
+	}
+
+	template<>
+	inline
+	int item_reference::compare(const char* const& value) const
+	{
+		return cif::icompare(c_str(), value);
+	}
+	
+	inline std::ostream& operator<<(std::ostream& os, const item_reference& rhs)
+	{
+		os << rhs.c_str();
+		return os;
+	}
+
+	template<>
+	item_reference& item_reference::operator=(const string& value);
+
+	// some helper classes to help create tuple result types
+	
+	template<typename...> struct tuple_catter;
+	
+	template<typename... Ts>
+	struct tuple_catter<std::tuple<Ts...>>
+	{
+		typedef std::tuple<Ts...> type;
+	};
+	
+	template<typename... T1s, typename... T2s, typename... Rem>
+	struct tuple_catter<std::tuple<T1s...>, std::tuple<T2s...>, Rem...>
+	{
+		typedef typename tuple_catter<std::tuple<T1s..., T2s...>, Rem...>::type type;
+	};
+	
+	template<typename...> struct col_getter;
+	
+	template<typename T>
+	struct col_getter<T>
+	{
+		typedef std::tuple<const item_reference>	type;
+		
+		template<typename Res>
+		static type get(Res& rs)
+		{
+			size_t index = Res::N - 1;
+			return std::tuple<const item_reference>{ rs[index] };
+		}
+	};
+	
+	template<typename T, typename... Ts>
+	struct col_getter<T, Ts...>
+	{
+		typedef col_getter<Ts...> next;
+		typedef typename tuple_catter<std::tuple<const item_reference>, typename next::type>::type type;
+		
+		template<typename Res>
+		static type get(Res& rs)
+		{
+			typedef col_getter<Ts...> next;
+			size_t index = Res::N - 1 - sizeof...(Ts);
+			return std::tuple_cat(std::tuple<const item_reference>{ rs[index]}, next::get(rs));
+		}
+	};
+
+	template<typename... C>
+	struct get_row_result
+	{
+		enum { N = sizeof...(C) };
+		typedef typename col_getter<C...>::type tuple_type;
+	
+//		const item_reference operator[](const string& col) const
+//		{
+//			return m_row[col];
+//		}
+		
+		const item_reference operator[](size_t ix) const
+		{
+			return m_row[m_columns[ix]];
+		}
+		
+		get_row_result(row& r, C... columns)
+			: m_row(r), m_columns({{columns...}}) {}
+	
+		row& m_row;
+		std::array<const char*, N> m_columns;
+	};
+	
+	// we want to be able to tie some variables to a row_result, for this we use tiewraps
+
+	template<int IX, typename... Ts>
+	struct tie_wrap;
+	
+	template<int IX, typename T>
+	struct tie_wrap<IX,T>
+	{
+		tie_wrap(T& t)
+			: m_val(t) {}
+	
+		template<typename Res>
+		void operator=(const Res& rr)
+		{
+			typedef typename std::remove_reference<T>::type basic_type;
+
+			const item_reference v = rr[IX];
+			basic_type tv = v.as<basic_type>();
+			m_val = tv;
+		}
+		
+		T& 		m_val;
+	};
+	
+	template<int IX, typename T, typename... Ts>
+	struct tie_wrap<IX, T, Ts...>
+	{
+		typedef tie_wrap<IX + 1, Ts...> next;
+	
+		tie_wrap(T& t, Ts&... ts)
+			: m_val(t), m_next(ts...) {}
+	
+		template<typename Res>
+		void operator=(const Res& rr)
+		{
+			typedef typename std::remove_reference<T>::type basic_type;
+			
+			const item_reference v = rr[IX];
+			basic_type tv = v.as<basic_type>();
+			m_val = tv;
+
+			m_next.operator=(rr);
+		}
+		
+		T& 		m_val;
+		next	m_next;
+	};
+}
+
+template<typename... Ts>
+auto tie(Ts&... v) -> detail::tie_wrap<0, Ts...>
+{
+	return detail::tie_wrap<0, Ts...>(v...);
+}
+
+class row
+{
+  public:
+	friend class category;
+	friend class cat_index;
+	friend class row_comparator;
+	friend struct detail::item_reference;
+
+	row(item_row* data = nullptr) : m_data(data) {}
+	row(const row& rhs);
+	row& operator=(const row& rhs);
+	
+	struct const_iterator : public std::iterator<std::forward_iterator_tag, const item>
+	{
+		typedef std::iterator<std::forward_iterator_tag, item>	base_type;
+		typedef typename base_type::pointer						pointer;
+		typedef typename base_type::reference					reference;
+		
+		const_iterator(item_row* data, item_value* ptr);
+		
+		reference operator*()								{ return m_current; }
+		pointer operator->()								{ return &m_current; }
+		
+		const_iterator& operator++();
+		const_iterator operator++(int)						{ const_iterator result(*this); this->operator++(); return result; } 
+
+		bool operator==(const const_iterator& rhs) const	{ return m_ptr == rhs.m_ptr; } 
+		bool operator!=(const const_iterator& rhs) const	{ return m_ptr != rhs.m_ptr; } 
+		
+	  private:
+
+		void fetch();
+
+	  	item_row*	m_data;
+		item_value*	m_ptr;
+		item		m_current;
+	};
+	
+	// checks for an initialized row:
+	operator bool() const									{ return m_data != nullptr; }
+	
+	bool empty() const;
+	const_iterator begin() const;
+	const_iterator end() const;
+
+// TODO: implement real const version?
+	
+	const detail::item_reference operator[](const char* item_tag) const
+	{
+		return detail::item_reference{item_tag, m_data};
+	}
+
+	detail::item_reference operator[](const char* item_tag)
+	{
+		return detail::item_reference{item_tag, m_data};
+	}
+
+	const detail::item_reference operator[](const string& item_tag) const
+	{
+		return detail::item_reference{item_tag.c_str(), m_data};
+	}
+
+	detail::item_reference operator[](const string& item_tag)
+	{
+		return detail::item_reference{item_tag.c_str(), m_data};
+	}
+
+	template<typename... C>
+	auto get(C... columns) -> detail::get_row_result<C...>
+	{
+		return detail::get_row_result<C...>(*this, columns...);
+	}
+	
+	bool operator==(const row& rhs) const
+	{
+		return m_data == rhs.m_data;
+	}
+
+	item_row* data() const							{ return m_data; }
+
+	void swap(row& rhs)
+	{
+		std::swap(m_data, rhs.m_data);
+	}
+	
+  private:
+
+	void assign(const string& name, const string& value, bool emplacing);
+	void assign(const item& i, bool emplacing);
+
+	item_row*	m_data;
+};
+
+// swap for rows is defined below
+
+// --------------------------------------------------------------------
+// some more templates to be able to do querying
+
+namespace detail
+{
+
+struct condition_impl
+{
+	virtual ~condition_impl() {}
+	
+	virtual bool test(const category& c, const row& r) const = 0;
+	virtual std::string str() const = 0;
+};
+
+}
+
+struct condition
+{
+	condition(detail::condition_impl* impl) : m_impl(impl) {}
+
+	condition(condition&& rhs)
+		: m_impl(nullptr)
+	{
+		std::swap(m_impl, rhs.m_impl);
+	}
+	
+	condition& operator=(condition&& rhs)
+	{
+		std::swap(m_impl, rhs.m_impl);
+		return *this;
+	}
+
+	~condition()
+	{
+		delete m_impl;
+	}
+	
+	bool operator()(const category& c, const row& r) const
+	{
+		assert(m_impl);
+		return m_impl->test(c, r);
+	}
+	
+	std::string str() const
+	{
+		return m_impl->str();
+	}
+
+	detail::condition_impl*	m_impl;
+};
+
+namespace detail
+{
+
+template<typename T>
+struct key_is_condition_impl : public condition_impl
+{
+	typedef T value_type;
+	
+	key_is_condition_impl(const string& item_tag, const value_type& value)
+		: m_item_tag(item_tag), m_value(value) {}
+	
+	virtual bool test(const category& c, const row& r) const
+	{
+		return r[m_item_tag].template compare<value_type>(m_value) == 0;
+	}
+	
+	virtual std::string str() const
+	{
+		return m_item_tag + " == " + boost::lexical_cast<std::string>(m_value);
+	}
+	
+	string m_item_tag;
+	value_type m_value;
+};
+
+template<typename T>
+struct key_is_not_condition_impl : public condition_impl
+{
+	typedef T value_type;
+	
+	key_is_not_condition_impl(const string& item_tag, const value_type& value)
+		: m_item_tag(item_tag), m_value(value) {}
+	
+	virtual bool test(const category& c, const row& r) const
+	{
+		return r[m_item_tag].template compare<value_type>(m_value) != 0;
+	}
+	
+	virtual std::string str() const
+	{
+		return m_item_tag + " != " + boost::lexical_cast<std::string>(m_value);
+	}
+	
+	string m_item_tag;
+	value_type m_value;
+};
+
+template<typename COMP>
+struct key_compare_condition_impl : public condition_impl
+{
+	key_compare_condition_impl(const string& item_tag, COMP&& comp)
+		: m_item_tag(item_tag), m_comp(std::move(comp)) {}
+	
+	virtual bool test(const category& c, const row& r) const
+	{
+		return m_comp(c, r);
+	}
+	
+	virtual std::string str() const
+	{
+		return m_item_tag + " compare " /*+ boost::lexical_cast<std::string>(m_value)*/;
+	}
+	
+	string m_item_tag;
+	COMP m_comp;
+};
+
+struct key_matches_condition_impl : public condition_impl
+{
+	key_matches_condition_impl(const string& item_tag, const std::regex& rx)
+		: m_item_tag(item_tag), m_rx(rx) {}
+	
+	virtual bool test(const category& c, const row& r) const
+	{
+		return std::regex_match(r[m_item_tag].as<string>(), m_rx);
+	}
+	
+	virtual std::string str() const
+	{
+		return m_item_tag + " ~= " + "<rx>";
+	}
+	
+	string m_item_tag;
+	std::regex m_rx;
+};
+
+template<typename T>
+struct any_is_condition_impl : public condition_impl
+{
+	typedef T value_type;
+	
+	any_is_condition_impl(const value_type& value)
+		: m_value(value) {}
+	
+	virtual bool test(const category& c, const row& r) const;
+
+	virtual std::string str() const
+	{
+		return "any == " + boost::lexical_cast<std::string>(m_value);
+	}
+	
+	value_type m_value;
+};
+
+struct any_matches_condition_impl : public condition_impl
+{
+	any_matches_condition_impl(const std::regex& rx)
+		: m_rx(rx) {}
+	
+	virtual bool test(const category& c, const row& r) const;
+
+	virtual std::string str() const
+	{
+		return "any ~= <rx>";
+	}
+	
+	std::regex m_rx;
+};
+
+struct and_condition_impl : public condition_impl
+{
+	and_condition_impl(condition&& a, condition&& b)
+		: m_a(nullptr), m_b(nullptr)
+	{
+		std::swap(m_a, a.m_impl);
+		std::swap(m_b, b.m_impl);
+	}
+	
+	~and_condition_impl()
+	{
+		delete m_a;
+		delete m_b;
+	}
+	
+	virtual bool test(const category& c, const row& r) const
+	{
+		return m_a->test(c, r) and m_b->test(c, r);
+	}
+
+	virtual std::string str() const
+	{
+		return "(" + m_a->str() + ") and (" + m_b->str() + ")";
+	}
+		
+	condition_impl* m_a;
+	condition_impl* m_b;
+};
+
+struct or_condition_impl : public condition_impl
+{
+	or_condition_impl(condition&& a, condition&& b)
+		: m_a(nullptr), m_b(nullptr)
+	{
+		std::swap(m_a, a.m_impl);
+		std::swap(m_b, b.m_impl);
+	}
+	
+	~or_condition_impl()
+	{
+		delete m_a;
+		delete m_b;
+	}
+	
+	virtual bool test(const category& c, const row& r) const
+	{
+		return m_a->test(c, r) or m_b->test(c, r);
+	}
+		
+	virtual std::string str() const
+	{
+		return "(" + m_a->str() + ") or (" + m_b->str() + ")";
+	}
+		
+	condition_impl* m_a;
+	condition_impl* m_b;
+};
+
+}
+
+inline condition operator&&(condition&& a, condition&& b)
+{
+	return condition(new detail::and_condition_impl(std::move(a), std::move(b)));
+}
+
+inline condition operator||(condition&& a, condition&& b)
+{
+	return condition(new detail::or_condition_impl(std::move(a), std::move(b)));
+}
+	
+struct key
+{
+	key(const string& item_tag) : m_item_tag(item_tag) {}
+	key(const char* item_tag) : m_item_tag(item_tag) {}
+	
+	template<typename T>
+	condition operator==(const T& v) const
+	{
+		return condition(new detail::key_is_condition_impl<T>(m_item_tag, v));
+	}
+
+	condition operator==(const char* v) const
+	{
+		string value(v ? v : "");
+		return condition(new detail::key_is_condition_impl<std::string>(m_item_tag, value));
+	}
+	
+	template<typename T>
+	condition operator!=(const T& v) const
+	{
+		return condition(new detail::key_is_not_condition_impl<T>(m_item_tag, v));
+	}
+
+	condition operator!=(const char* v) const
+	{
+		string value(v ? v : "");
+		return condition(new detail::key_is_not_condition_impl<std::string>(m_item_tag, value));
+	}
+
+	template<typename T>
+	condition operator>(const T& v) const
+	{
+		auto comp = [this, v](const category& c, const row& r) -> bool { return r[this->m_item_tag].as<T>() > v; };
+		return condition(new detail::key_compare_condition_impl<decltype(comp)>(m_item_tag, std::move(comp)));
+	}
+
+	template<typename T>
+	condition operator>=(const T& v) const
+	{
+		auto comp = [this, v](const category& c, const row& r) -> bool { return r[this->m_item_tag].as<T>() >= v; };
+		return condition(new detail::key_compare_condition_impl<decltype(comp)>(m_item_tag, std::move(comp)));
+	}
+
+	template<typename T>
+	condition operator<(const T& v) const
+	{
+		auto comp = [this, v](const category& c, const row& r) -> bool { return r[this->m_item_tag].as<T>() < v; };
+		return condition(new detail::key_compare_condition_impl<decltype(comp)>(m_item_tag, std::move(comp)));
+	}
+
+	template<typename T>
+	condition operator<=(const T& v) const
+	{
+		auto comp = [this, v](const category& c, const row& r) -> bool { return r[this->m_item_tag].as<T>() <= v; };
+		return condition(new detail::key_compare_condition_impl<decltype(comp)>(m_item_tag, std::move(comp)));
+	}
+	
+	string m_item_tag;
+};
+
+template<>
+inline
+condition key::operator==(const std::regex& rx) const
+{
+	return condition(new detail::key_matches_condition_impl(m_item_tag, rx));
+}
+
+struct any
+{
+	template<typename T>
+	condition operator==(const T& v) const
+	{
+		return condition(new detail::any_is_condition_impl<T>(v));
+	}
+};
+
+template<>
+inline
+condition any::operator==(const std::regex& rx) const
+{
+	return condition(new detail::any_matches_condition_impl(rx));
+}
+
+// --------------------------------------------------------------------
+// class rowset is used to return find results. Use it to re-order the results
+// or to group them 
+
+class rowset : public vector<row>
+{
+  public:
+	rowset(category& cat);
+	
+	rowset& orderBy(const string& item)
+		{ return orderBy({ item }); }
+	
+	rowset& orderBy(std::initializer_list<string> items);
+
+  private:
+	category&	m_cat;
+};
+
+// --------------------------------------------------------------------
+// class category acts as an STL container for row objects 
+
+class category
+{
+  public:
+	friend class datablock;
+	friend class row;
+	friend struct detail::item_reference;
+
+	category(datablock& db, const string& name, validator* validator);
+	category(const category&) = delete;
+	category& operator=(const category&) = delete;
+	~category();
+
+	const string name() const						{ return m_name; }
+	
+	const detail::item_reference get_first_item(const char* item_name) const;
+
+	struct iterator : public std::iterator<std::forward_iterator_tag, row>
+	{
+		friend class category;
+		
+		typedef std::iterator<std::forward_iterator_tag, row>	base_type;
+		typedef typename base_type::pointer						pointer;
+		typedef typename base_type::reference					reference;
+		
+		iterator(item_row* data) : m_current(data) {}
+		
+		reference operator*()						{ return m_current; }
+		pointer operator->()						{ return &m_current; }
+		
+		iterator& operator++();
+		iterator operator++(int)					{ iterator result(*this); this->operator++(); return result; } 
+
+		bool operator==(const iterator& rhs) const	{ return m_current == rhs.m_current; } 
+		bool operator!=(const iterator& rhs) const	{ return not (m_current == rhs.m_current); } 
+		
+	  private:
+		row		m_current;
+	};
+	
+	iterator begin();
+	iterator end();
+
+	bool empty() const;
+	size_t size() const;
+	
+	void clear();
+	
+	row front()										{ return row(m_head); }
+	row back()										{ return row(m_tail); }
+	
+	row operator[](condition&& cond);
+	rowset find(condition&& cond);
+	bool exists(condition&& cond);
+	
+	rowset orderBy(const string& item)
+		{ return orderBy({ item }); }
+	
+	rowset orderBy(std::initializer_list<string> items);
+	
+	std::tuple<row,bool> emplace(item value)		{ return emplace({ value }); }
+
+	std::tuple<row,bool> emplace(std::initializer_list<item> values)
+		{ return emplace(values.begin(), values.end()); }
+
+	std::tuple<row,bool> emplace(row r);
+	
+	template<class Iter>
+	std::tuple<row,bool> emplace(Iter b, Iter e);
+
+	void erase(condition&& cond);
+	void erase(row r);
+	void erase(iterator ri);
+
+	void validate();
+
+	const validator& get_validator() const;
+	const validate_category* get_cat_validator() const		{ return m_cat_validator; }
+	
+	void set_validator(validator* v);
+
+	iset fields() const;
+	iset mandatory_fields() const;
+	iset key_fields() const;
+	
+	void drop(const string& field);
+
+	void get_tag_order(vector<string>& tags) const;
+
+	// return index for known column, or the next available column index
+	size_t get_column_index(const string& name) const;
+	const string& get_column_name(size_t column_index) const;
+
+	void reorderByIndex();
+
+  private:
+
+	void write(std::ostream& os);
+	void write(std::ostream& os, const vector<string>& order);
+	void write(std::ostream& os, const vector<int>& order, bool includeEmptyColumns);
+
+	size_t add_column(const string& name);
+	
+	datablock&			m_db;
+	string				m_name;
+	validator*			m_validator;
+	const validate_category*	m_cat_validator = nullptr;
+	vector<item_column>	m_columns;
+	item_row*			m_head;
+	item_row*			m_tail;
+	class cat_index*	m_index;
+};
+
+// --------------------------------------------------------------------
+
+class file
+{
+  public:
+	friend class parser;
+	friend class validator;
+
+	file();
+	file(std::istream& is, bool validate = false);
+	file(file&& rhs);
+	file(const file& rhs) = delete;
+	file& operator=(const file& rhs) = delete;
+	
+	~file();
+
+	void load(std::istream& is);
+	void save(std::ostream& os);
+
+	void save(std::ostream& os, const vector<string>& order)	{ write(os, order); }
+	void write(std::ostream& os, const vector<string>& order);
+
+	void load_dictionary();						// load the default dictionary, that is mmcif_ddl in this case
+	void load_dictionary(const char* dict);		// load one of the compiled in dictionaries 
+	void load_dictionary(std::istream& is);		// load dictionary from input stream
+
+	void validate();
+	
+	datablock& first_datablock()			{ return *m_head; }
+	void append(datablock* e);
+	
+	datablock& operator[](const string& name);
+
+	struct iterator : public std::iterator<std::forward_iterator_tag, datablock>
+	{
+		typedef std::iterator<std::forward_iterator_tag, datablock>	base_type;
+		typedef typename base_type::pointer							pointer;
+		typedef typename base_type::reference						reference;
+		
+		iterator(datablock* db) : m_current(db) {}
+		
+		reference operator*()						{ return *m_current; }
+		pointer operator->()						{ return m_current; }
+		
+		iterator& operator++();
+		iterator operator++(int)					{ iterator result(*this); this->operator++(); return result; } 
+
+		bool operator==(const iterator& rhs) const	{ return m_current == rhs.m_current; } 
+		bool operator!=(const iterator& rhs) const	{ return not (m_current == rhs.m_current); } 
+		
+	  private:
+		datablock*		m_current;
+	};
+	
+	iterator begin() const;
+	iterator end() const;
+	
+	const validator& get_validator() const;
+	void get_tag_order(vector<string>& tags) const;
+	
+  private:
+
+	void set_validator(validator* v);
+
+	datablock*	m_head;
+	validator*	m_validator;
+};
+
+// --------------------------------------------------------------------
+// some postponed inlines
+
+namespace detail
+{
+
+template<typename T>
+inline
+bool any_is_condition_impl<T>::test(const category& c, const row& r) const
+{
+	bool result = false;
+	for (auto& f: c.fields())
+	{
+		try
+		{
+			if (r[f].as<value_type>() == m_value)
+			{
+				result = true;
+				break;
+			}
+		}
+		catch (...) {}
+	}
+	
+	return result;
+}
+
+inline bool any_matches_condition_impl::test(const category& c, const row& r) const
+{
+	bool result = false;
+	for (auto& f: c.fields())
+	{
+		try
+		{
+			if (std::regex_match(r[f].as<string>(), m_rx))
+			{
+				result = true;
+				break;
+			}
+		}
+		catch (...) {}
+	}
+	
+	return result;
+}
+	
+}
+
+}
+
+namespace std
+{
+
+template<>
+inline void swap(cif::row& a, cif::row& b)
+{
+	a.swap(b);
+}
+
+}
+
--- a/include/cif++/cif-parser.h
+++ b/include/cif++/cif-parser.h
+// CIF parser
+
+#include "libcif/cif++.h"
+
+#include <stack>
+
+namespace cif
+{
+
+// --------------------------------------------------------------------
+
+class cif_parser_error : public std::runtime_error
+{
+  public:
+	cif_parser_error(uint32 line_nr, const std::string& message);
+};
+
+// --------------------------------------------------------------------
+
+extern const uint32 kMaxLineLength;
+
+extern const uint8 kCharTraitsTable[128];
+
+enum CharTraitsMask: uint8 {
+	kOrdinaryMask = 1 << 0,
+	kNonBlankMask = 1 << 1,
+	kTextLeadMask = 1 << 2,
+	kAnyPrintMask = 1 << 3
+};
+
+inline bool is_white(int ch)
+{
+	return std::isspace(ch) or ch == '#';
+}
+
+inline bool is_ordinary(int ch)
+{
+	return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kOrdinaryMask) != 0;
+}
+
+inline bool is_non_blank(int ch)
+{
+	return ch > 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kNonBlankMask) != 0;
+}
+
+inline bool is_text_lead(int ch)
+{
+	return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kTextLeadMask) != 0;
+}
+
+inline bool is_any_print(int ch)	
+{
+	return ch == '\t' or 
+		(ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
+}
+
+inline bool is_unquoted_string(const char* s)
+{
+	bool result = is_ordinary(*s++);
+	while (result and *s != 0)
+	{
+		result = is_non_blank(*s);
+		++s;
+	}
+	return result;
+}
+
+// --------------------------------------------------------------------
+
+std::tuple<std::string,std::string> split_tag_name(const std::string& tag);
+
+// --------------------------------------------------------------------
+// sac parser, analogous to SAX parser (simple api for xml)
+
+class sac_parser
+{
+  public:
+	sac_parser(std::istream& is);
+	virtual ~sac_parser() {}
+
+	enum CIFToken
+	{
+		eCIFTokenUnknown,
+		
+		eCIFTokenEOF,
+	
+		eCIFTokenDATA,
+		eCIFTokenLOOP,
+		eCIFTokenGLOBAL,
+		eCIFTokenSAVE,
+		eCIFTokenSTOP,
+		eCIFTokenTag,
+		eCIFTokenValue,
+	};
+
+	static const char* kTokenName[];
+
+	enum CIFValueType
+	{
+		eCIFValueInt,
+		eCIFValueFloat,
+		eCIFValueNumeric,
+		eCIFValueString,
+		eCIFValueTextField,
+		eCIFValueInapplicable,
+		eCIFValueUnknown
+	};
+
+	static const char* kValueName[];
+	
+	int get_next_char();
+
+	void retract();
+	void restart();
+	
+	CIFToken get_next_token();
+	void match(CIFToken token);
+
+	void parse_file();
+	void parse_global();
+	void parse_data_block();
+
+	virtual void parse_save_frame();
+	
+	void parse_dictionary();
+	
+	void error(const std::string& msg);
+	
+	// production methods, these are pure virtual here
+	
+	virtual void produce_datablock(const std::string& name) = 0;
+	virtual void produce_category(const std::string& name) = 0;
+	virtual void produce_row() = 0;
+	virtual void produce_item(const std::string& category, const std::string& item, const string& value) = 0;
+
+  protected:
+
+	enum State
+	{
+		eStateStart,
+		eStateWhite,
+		eStateComment,
+		eStateQuestionMark,
+		eStateDot,
+		eStateQuotedString,
+		eStateQuotedStringQuote,
+		eStateUnquotedString,
+		eStateTag,
+		eStateTextField,
+		eStateFloat = 100,
+		eStateInt = 110,
+//		eStateNumericSuffix = 200,
+		eStateValue = 300
+	};
+
+	std::istream&			m_data;
+
+	// parser state
+	bool					m_validate;
+	uint32					m_line_nr;
+	bool					m_bol;
+	int						m_state, m_start;
+	CIFToken				m_lookahead;
+	std::string				m_token_value;
+	CIFValueType			m_token_type;
+	std::stack<int>			m_buffer;
+};
+
+// --------------------------------------------------------------------
+
+class parser : public sac_parser
+{
+  public:
+	parser(std::istream& is, file& f);
+
+	virtual void produce_datablock(const std::string& name);
+	virtual void produce_category(const std::string& name);
+	virtual void produce_row();
+	virtual void produce_item(const std::string& category, const std::string& item, const std::string& value);
+
+  protected:
+	file&					m_file;
+	datablock*				m_data_block;
+	datablock::iterator		m_cat;
+	row						m_row;
+};
+
+// --------------------------------------------------------------------
+
+class dict_parser : public parser
+{
+  public:
+
+	dict_parser(validator& validator, std::istream& is);
+	~dict_parser();
+	
+	void load_dictionary();
+	
+  private:
+
+	virtual void parse_save_frame();
+	
+	bool collect_item_types();
+	void link_items();
+
+	validator&						m_validator;
+	file							m_file;
+	struct dict_parser_data_impl*	m_impl;
+	bool							m_collected_item_types = false;
+};
+
+}
--- a/include/cif++/cif-utils.h
+++ b/include/cif++/cif-utils.h
+// cif parsing library
+
+#pragma once
+
+#include <vector>
+#include <set>
+
+#include "libcif/config.h"
+
+namespace cif
+{
+
+// some basic utilities: Since we're using ASCII input only, we define for optimisation
+// our own case conversion routines.
+
+bool iequals(const std::string& a, const std::string& b);
+int icompare(const std::string& a, const std::string& b);
+
+bool iequals(const char* a, const char* b);
+int icompare(const char* a, const char* b);
+
+void to_lower(std::string& s);
+std::string to_lower_copy(const std::string& s);
+
+// To make life easier, we also define iless and iset using iequals
+
+struct iless
+{
+	bool operator()(const std::string& a, const std::string& b) const
+	{
+		return icompare(a, b) < 0;
+	}
+};
+
+typedef std::set<std::string, iless>	iset;
+
+// --------------------------------------------------------------------
+// This really makes a difference, having our own tolower routines
+
+extern const uint8 kCharToLowerMap[256];
+
+inline char tolower(char ch)
+{
+	return static_cast<char>(kCharToLowerMap[static_cast<uint8>(ch)]);
+}
+
+// --------------------------------------------------------------------
+
+std::tuple<std::string,std::string> split_tag_name(const std::string& tag);
+
+// --------------------------------------------------------------------
+//	custom wordwrapping routine
+
+std::vector<std::string> word_wrap(const std::string& text, unsigned int width);
+
+}
--- a/include/cif++/cif-validator.h
+++ b/include/cif++/cif-validator.h
+// cif parsing library
+
+#include "libcif/cif++.h"
+
+#include <boost/filesystem/path.hpp>
+
+// the std regex of gcc is crashing....
+#include <boost/regex.hpp>
+#include <set>
+
+namespace cif
+{
+	
+struct validate_category;
+
+// --------------------------------------------------------------------
+
+class validation_error : public std::exception
+{
+  public:
+	validation_error(const std::string& msg) : m_msg(msg) {}
+	const char* what() const noexcept		{ return m_msg.c_str(); }
+	std::string m_msg;
+};
+
+// --------------------------------------------------------------------
+
+enum DDL_PrimitiveType
+{
+	ptChar, ptUChar, ptNumb
+};
+
+DDL_PrimitiveType map_to_primitive_type(const std::string& s);
+
+struct validate_type
+{
+	std::string				m_name;
+	DDL_PrimitiveType		m_primitive_type;
+	boost::regex			m_rx;
+
+	bool operator<(const validate_type& rhs) const
+	{
+		return icompare(m_name, rhs.m_name) < 0;
+	}
+
+	// compare values based on type	
+//	int compare(const std::string& a, const std::string& b) const
+//	{
+//		return compare(a.c_str(), b.c_str());
+//	}
+	
+	int compare(const char* a, const char* b) const;
+};
+
+struct validate_item
+{
+	std::string				m_tag;
+	bool					m_mandatory;
+	const validate_type*	m_type;
+	cif::iset				m_enums;
+	validate_item*			m_parent = nullptr;
+	std::set<validate_item*>
+							m_children;
+	validate_category*		m_category = nullptr;
+	std::set<validate_item*>
+							m_foreign_keys;
+	
+	void set_parent(validate_item* parent);
+
+	bool operator<(const validate_item& rhs) const
+	{
+		return icompare(m_tag, rhs.m_tag) < 0;
+	}
+
+	bool operator==(const validate_item& rhs) const
+	{
+		return iequals(m_tag, rhs.m_tag);
+	}
+
+	void operator()(std::string value) const;
+};
+
+struct validate_category
+{
+	std::string				m_name;
+	std::vector<string>		m_keys;
+	cif::iset				m_groups;
+	cif::iset				m_mandatory_fields;
+	std::set<validate_item>	m_item_validators;
+
+	bool operator<(const validate_category& rhs) const
+	{
+		return icompare(m_name, rhs.m_name) < 0;
+	}
+
+	void add_item_validator(validate_item&& v);
+	
+	const validate_item* get_validator_for_item(std::string tag) const;
+	
+	const std::set<validate_item>& item_validators() const
+	{
+		return m_item_validators;
+	}
+};
+
+// --------------------------------------------------------------------
+
+class validator
+{
+  public:
+	friend class dict_parser;
+
+	validator();
+	~validator();
+
+	validator(const validator& rhs) = delete;
+	validator& operator=(const validator& rhs) = delete;
+	
+	validator(validator&& rhs);
+	validator& operator=(validator&& rhs);
+	
+	void add_type_validator(validate_type&& v);
+	const validate_type* get_validator_for_type(std::string type_code) const;
+
+	void add_category_validator(validate_category&& v);
+	const validate_category* get_validator_for_category(std::string category) const;
+
+	void report_error(const std::string& msg);
+	
+	std::string dict_name() const					{ return m_name; }
+	void dict_name(const std::string& name)			{ m_name = name; }
+
+	std::string dict_version() const				{ return m_version; }
+	void dict_version(const std::string& version)	{ m_version = version; }
+
+  private:
+
+	// name is fully qualified here:
+	validate_item* get_validator_for_item(std::string name) const;
+
+	std::string					m_name;
+	std::string					m_version;
+	bool						m_strict = false;
+//	std::set<uint32>			m_sub_categories;
+	std::set<validate_type>		m_type_validators;
+	std::set<validate_category>	m_category_validators;
+};
+
+}
--- a/include/cif++/cif2pdb.h
+++ b/include/cif++/cif2pdb.h
+#pragma once
+
+#include "cif++.h"
+
+void WritePDBFile(std::ostream& pdbFile, cif::file& cifFile);
--- a/include/cif++/compound.h
+++ b/include/cif++/compound.h
+// Lib for working with structures as contained in mmCIF and PDB files
+
+#pragma once
+
+#include <set>
+#include <tuple>
+#include <vector>
+#include <map>
+
+#include "libcif/atom_type.h"
+
+namespace libcif
+{
+
+// --------------------------------------------------------------------
+// The chemical composition of the structure in an mmCIF file is 
+// defined in the class composition. A compositon consists of
+// entities. Each entity can be either a polymer, a non-polymer
+// a macrolide or a water molecule.
+// Entities themselves are made up of compounds. And compounds
+// contain comp_atom records for each atom.
+
+class composition;
+class entity;
+class compound;
+struct comp_atom;
+
+// --------------------------------------------------------------------
+// struct containing information about an atom in a chemical compound
+// This information comes from the CCP4 monomer library. 
+
+struct comp_atom
+{
+	std::string	id;
+	atom_type	type_symbol;
+	std::string	type_energy;
+	float		partial_charge;
+};
+
+// --------------------------------------------------------------------
+// a class that contains information about a chemical compound.
+// This information is derived from the ccp4 monomer library by default.
+// To create compounds, you'd best use the factory method.
+
+class compound
+{
+  public:
+
+	compound(const std::string& id, const std::string& name,
+		const std::string& group, std::vector<comp_atom>&& atoms,
+		std::map<std::tuple<std::string,std::string>,float>&& bonds)
+		: m_id(id), m_name(name), m_group(group)
+		, m_atoms(std::move(atoms)), m_bonds(std::move(bonds))
+	{
+	}
+
+	~compound();
+
+	// factory method, create a compound based on the three letter code
+	// (for amino acids) or the one-letter code (for bases) or the
+	// code as it is known in the CCP4 monomer library.
+
+	static const compound* create(const std::string& id);
+
+	// this second factory method can create a compound even if it is not
+	// recorded in the library. It will take the values from the CCP4 lib
+	// unless the value passed to this function is not empty.
+	static const compound* create(const std::string& id, const std::string& name,
+		const std::string& type, const std::string& formula);
+	
+	// add an additional path to the monomer library.
+	static void add_monomer_library_path(const std::string& dir);
+
+	// accessors
+	std::string id() const					{ return m_id; }
+	std::string	name() const				{ return m_name; }
+	std::string	type() const;
+//	std::string group() const				{ return m_group; }
+	std::vector<comp_atom> atoms() const	{ return m_atoms; }
+	
+	comp_atom get_atom_by_id(const std::string& atom_id) const;
+	
+	bool atoms_bonded(const std::string& atom_id_1, const std::string& atom_id_2) const;
+	float atom_bond_value(const std::string& atom_id_1, const std::string& atom_id_2) const;
+
+	std::string formula() const;
+	float formula_weight() const;
+	int charge() const;
+	bool is_water() const;
+
+  private:
+//	entity&					m_entity;
+	std::string				m_id;
+	std::string				m_name;
+	std::string				m_group;
+	std::vector<comp_atom>	m_atoms;
+	std::map<std::tuple<std::string,std::string>,float>	m_bonds;
+};
+
+// --------------------------------------------------------------------
+// an entity. This is a base class for polymer_entity and non_poly_entity
+// The latter can be either a regular non-polymer (residue), a macrolide or
+// water.
+
+class entity
+{
+  public:
+	entity(const std::string& id, const std::string& type, const std::string& description);
+	virtual ~entity();
+
+	std::string id() const;
+	std::string	type() const;
+	std::string description() const;
+
+	virtual float formula_weight() const = 0;
+	
+  private:
+	std::string		m_id;
+	std::string		m_type;
+	std::string		m_description;
+};
+
+// --------------------------------------------------------------------
+// A polymer entity
+
+class polymer_entity : public entity
+{
+  public:
+	polymer_entity(const std::string& id, const std::string& description);
+	~polymer_entity();
+	
+	std::string		seq_one_letter_code(bool cannonical) const;
+	std::string		pdbx_strand_id() const;
+	virtual float	formula_weight() const;
+	
+	class monomer
+	{
+	  public:
+		friend class polymer_entity;
+		
+		size_t				num() const;					// sequence number
+		bool				hetero() const;					// whether this position contains alternate compounds
+		const compound&		comp(size_t alt_nr) const;		// the chemical compound of this monomer
+		
+	  private:
+		monomer*	m_next;
+		monomer*	m_alt;
+		size_t		m_num;
+		compound*	m_comp;
+	};
+	
+	class iterator : public std::iterator<std::forward_iterator_tag, const monomer>
+	{
+	  public:
+		typedef std::iterator<std::forward_iterator_tag, const monomer>	base_type;
+		typedef base_type::reference									reference;
+		typedef base_type::pointer										pointer;
+		
+		iterator(monomer* monomer = nullptr)
+			: m_cursor(monomer) {}
+
+		iterator(const iterator& rhs)
+			: m_cursor(rhs.m_cursor)
+		{
+		}
+
+		iterator& operator=(const iterator& rhs)
+		{
+			m_cursor = rhs.m_cursor;
+			return *this;
+		}
+		
+		reference	operator*()			{ return *m_cursor; }
+		pointer		operator->()		{ return m_cursor; }
+		
+		iterator&	operator++()		{ m_cursor = m_cursor->m_next; return *this; }
+		iterator	operator++(int)
+		{
+			iterator tmp(*this);
+			operator++();
+			return tmp;
+		}
+		
+		bool		operator==(const iterator& rhs) const		{ return m_cursor == rhs.m_cursor; }
+		bool		operator!=(const iterator& rhs) const		{ return m_cursor != rhs.m_cursor; }
+
+	  private:
+		monomer*	m_cursor;
+	};
+	
+	iterator begin() const		{ return iterator(m_seq); }
+	iterator end() const		{ return iterator(); }
+	
+	const monomer& operator[](size_t index) const;
+
+  private:
+	entity&		m_entity;
+	monomer*	m_seq;
+};
+
+// --------------------------------------------------------------------
+// non_poly entity 
+
+class non_poly_entity : public entity
+{
+  public:
+	non_poly_entity(const std::string& id, const std::string& type, const std::string& description);
+	~non_poly_entity();
+	
+	compound&		comp() const;
+	virtual float	formula_weight() const;
+
+  private:
+ 	compound*	m_compound;
+};
+
+}
--- a/include/cif++/config.h
+++ b/include/cif++/config.h
+// Lib for working with structures as contained in mmCIF and PDB files
+
+#pragma once
+
+#include <string>
+
+#define HAVE_CPP0X_TEMPLATE_ALIASES 1
+#define HAVE_CPP0X_VARIADIC_TEMPLATES 1
+#define HAVE_CPP0X_INITIALIZER_LISTS 1
+
+#if defined(_MSC_VER)
+
+// These are Microsoft Visual C++ special settings
+// the iso646 file contains the C++ keywords that are
+// otherwise not recognized.
+#include <ciso646>
+#define snprintf _snprintf
+
+// Disable some warnings
+#pragma warning (disable : 4996)
+#pragma warning (disable : 4355)
+#endif
+
+#include <boost/version.hpp>
+#include <boost/cstdint.hpp>
+
+typedef int8_t		int8;
+typedef uint8_t		uint8;
+typedef int16_t		int16;
+typedef uint16_t	uint16;
+typedef int32_t		int32;
+typedef uint32_t	uint32;
+typedef int64_t		int64;
+typedef uint64_t	uint64;
--- a/include/cif++/pdb2cif-remark3.h
+++ b/include/cif++/pdb2cif-remark3.h
+#pragma once
+
+#include "pdb2cif.h"
+
+// --------------------------------------------------------------------
+
+struct TemplateLine;
+
+class Remark3Parser
+{
+  public:
+	virtual ~Remark3Parser() {}
+
+	static bool Parse(const std::string& expMethod, PDBRecord* r, cif::datablock& db);
+
+	virtual std::string Program();
+	virtual std::string Version();
+
+  protected:
+
+	Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::datablock& db,
+			const TemplateLine templatelines[], uint32 templateLineCount, std::regex program_version);
+
+	virtual float Parse();
+	std::string NextLine();
+
+	bool Match(const char* expr, int nextState);
+	void StoreCapture(const char* category, std::initializer_list<const char*> items, bool createNew = false);
+	void StoreRefineLsRestr(const char* type, std::initializer_list<const char*> values);
+	void UpdateRefineLsRestr(const char* type, std::initializer_list<const char*> values);
+
+	virtual void Fixup() {}
+
+	std::string		m_name;
+	std::string		m_expMethod;
+	PDBRecord*		m_rec;
+	cif::datablock	m_db;
+	std::string		m_line;
+	std::smatch		m_m;
+	uint32			m_state;
+
+	const TemplateLine*	m_template;
+	uint32				m_templateCount;
+	std::regex			m_program_version;
+};
+
+
--- a/include/cif++/pdb2cif.h
+++ b/include/cif++/pdb2cif.h
+#pragma once
+
+#include "cif++.h"
+
+// --------------------------------------------------------------------
+
+struct PDBRecord
+{
+	PDBRecord*	m_next;
+	uint32		m_line_nr;
+	char		m_name[11];
+	size_t		m_vlen;
+	char		m_value[0];
+
+	PDBRecord(uint32 line_nr, const std::string& name, const std::string& value);
+	~PDBRecord();
+	
+	void* operator new(size_t);
+	void* operator new(size_t size, size_t v_len);
+	
+	void operator delete(void* p);
+
+	bool is(const char* name) const;
+	
+	char v_c(size_t column);
+	std::string v_s(size_t column_first, size_t column_last = std::numeric_limits<size_t>::max());
+	int v_i(int column_first, int column_last);
+	std::string v_f(size_t column_first, size_t column_last);
+};
+
+// --------------------------------------------------------------------
+
+void ReadPDBFile(std::istream& pdbFile, cif::file& cifFile);
--- a/include/cif++/point.h
+++ b/include/cif++/point.h
+// Lib for working with structures as contained in mmCIF and PDB files
+
+#pragma once
+
+#include <libcif/config.h>
+
+#include <boost/filesystem/operations.hpp>
+#include <boost/math/quaternion.hpp>
+
+#include "clipper/core/coords.h"
+
+namespace libcif
+{
+
+typedef boost::math::quaternion<float>	quaternion;
+
+const long double
+	kPI = 3.141592653589793238462643383279502884L;
+
+// --------------------------------------------------------------------
+
+//	point, a location with x, y and z coordinates as float.
+//	This one is derived from a tuple<float,float,float> so
+//	you can do things like:
+//
+//	float x, y, z;
+//	tie(x, y, z) = atom.loc();
+
+struct point : public std::tuple<float,float,float>
+{
+	typedef std::tuple<float,float,float>	base_type;
+	
+	point()								: base_type(0.f, 0.f, 0.f) {}
+	point(float x, float y, float z)	: base_type(x, y, z) {}
+	point(const clipper::Coord_orth& pt): base_type(pt[0], pt[1], pt[2]) {}
+	
+	point& operator=(const clipper::Coord_orth& rhs)
+	{
+		x(rhs[0]);
+		y(rhs[1]);
+		z(rhs[2]);
+		return *this;
+	}
+	
+	float& x()				{ return std::get<0>(*this); }
+	float x() const			{ return std::get<0>(*this); }
+	void x(float x)			{ std::get<0>(*this) = x; }
+
+	float& y()				{ return std::get<1>(*this); }
+	float y() const			{ return std::get<1>(*this); }
+	void y(float y)			{ std::get<1>(*this) = y; }
+
+	float& z()				{ return std::get<2>(*this); }
+	float z() const			{ return std::get<2>(*this); }
+	void z(float z)			{ std::get<2>(*this) = z; }
+	
+	point& operator+=(const point& rhs)
+	{
+		x() += rhs.x();
+		y() += rhs.y();
+		z() += rhs.z();
+		return *this;
+	}
+	
+	point& operator-=(const point& rhs)
+	{
+		x() -= rhs.x();
+		y() -= rhs.y();
+		z() -= rhs.z();
+		return *this;
+	}
+
+	point& operator*=(float rhs)
+	{
+		x() *= rhs;
+		y() *= rhs;
+		z() *= rhs;
+		return *this;
+	}
+	
+	point& operator/=(float rhs)
+	{
+		x() *= rhs;
+		y() *= rhs;
+		z() *= rhs;
+		return *this;
+	}
+
+	float normalize()
+	{
+		auto length = x() * x() + y() * y() + z() * z();
+		if (length > 0)
+		{
+			length = std::sqrt(length);
+			operator/=(length);
+		}
+		return length;
+	}
+	
+	void rotate(const boost::math::quaternion<float>& q)
+	{
+		boost::math::quaternion<float> p(0, x(), y(), z());
+		
+		p = q * p * boost::math::conj(q);
+	
+		x() = p.R_component_2();
+		y() = p.R_component_3();
+		z() = p.R_component_4();
+	}
+	
+	operator clipper::Coord_orth() const
+	{
+		return clipper::Coord_orth(x(), y(), z());
+	}
+};
+
+
+inline std::ostream& operator<<(std::ostream& os, const point& pt)
+{
+	os << '(' << pt.x() << ',' << pt.y() << ',' << pt.z() << ')';
+	return os; 
+}
+
+inline point operator+(const point& lhs, const point& rhs)
+{
+	return point(lhs.x() + rhs.x(), lhs.y() + rhs.y(), lhs.z() + rhs.z());
+}
+
+inline point operator-(const point& lhs, const point& rhs)
+{
+	return point(lhs.x() - rhs.x(), lhs.y() - rhs.y(), lhs.z() - rhs.z());
+}
+
+inline point operator-(const point& pt)
+{
+	return point(-pt.x(), -pt.y(), -pt.z());
+}
+
+inline point operator*(const point& pt, float f)
+{
+	return point(pt.x() * f, pt.y() * f, pt.z() * f);
+}
+
+inline point operator/(const point& pt, float f)
+{
+	return point(pt.x() / f, pt.y() / f, pt.z() / f);
+}
+
+// --------------------------------------------------------------------
+// several standard 3d operations
+
+inline double DistanceSquared(const point& a, const point& b)
+{
+	return
+		(a.x() - b.x()) * (a.x() - b.x()) +
+		(a.y() - b.y()) * (a.y() - b.y()) +
+		(a.z() - b.z()) * (a.z() - b.z());
+}
+
+inline double Distance(const point& a, const point& b)
+{
+	return sqrt(
+		(a.x() - b.x()) * (a.x() - b.x()) +
+		(a.y() - b.y()) * (a.y() - b.y()) +
+		(a.z() - b.z()) * (a.z() - b.z()));
+}
+
+inline float DotProduct(const point& a, const point& b)
+{
+	return a.x() * b.x() + a.y() * b.y() + a.z() * b.z();
+}
+
+inline point CrossProduct(const point& a, const point& b)
+{
+	return point(a.y() * b.z() - b.y() * a.z(),
+				  a.z() * b.x() - b.z() * a.x(),
+				  a.x() * b.y() - b.x() * a.y());
+}
+
+float DihedralAngle(const point& p1, const point& p2, const point& p3, const point& p4);
+float CosinusAngle(const point& p1, const point& p2, const point& p3, const point& p4);
+
+// --------------------------------------------------------------------
+// We use quaternions to do rotations in 3d space
+
+quaternion Normalize(quaternion q);
+
+//std::tuple<double,point> QuaternionToAngleAxis(quaternion q);
+point Centroid(std::vector<point>& points);
+point CenterPoints(std::vector<point>& points);
+quaternion AlignPoints(const std::vector<point>& a, const std::vector<point>& b);
+double RMSd(const std::vector<point>& a, const std::vector<point>& b);
+
+// --------------------------------------------------------------------
+// Helper class to generate evenly divided points on a sphere
+// we use a fibonacci sphere to calculate even distribution of the dots
+
+template<int N>
+class spherical_dots
+{
+  public:
+	enum { P = 2 * N + 1 };
+	typedef typename std::array<point,P>	array_type;
+	typedef typename array_type::const_iterator	iterator;
+
+	static spherical_dots& instance()
+	{
+		static spherical_dots s_instance;
+		return s_instance;
+	}
+	
+	size_t size() const							{ return m_points.size(); }
+	const point operator[](uint32 inIx) const	{ return m_points[inIx]; }
+	iterator begin() const						{ return m_points.begin(); }
+	iterator end() const						{ return m_points.end(); }
+
+	double weight() const						{ return m_weight; }
+
+	spherical_dots()
+	{
+		using namespace std;
+		
+		const double
+			kGoldenRatio = (1 + std::sqrt(5.0)) / 2;
+		
+		m_weight = (4 * kPI) / P;
+		
+		auto p = m_points.begin();
+		
+		for (int32 i = -N; i <= N; ++i)
+		{
+			double lat = std::asin((2.0 * i) / P);
+			double lon = std::fmod(i, kGoldenRatio) * 2 * kPI / kGoldenRatio;
+			
+			p->x(sin(lon) * cos(lat));
+			p->y(cos(lon) * cos(lat));
+			p->z(           sin(lat));
+
+			++p;
+		}
+	}
+
+  private:
+
+	array_type				m_points;
+	double					m_weight;
+};
+
+typedef spherical_dots<50> spherical_dots_50;
+
+
+}
--- a/include/cif++/structure.h
+++ b/include/cif++/structure.h
+// Lib for working with structures as contained in mmCIF and PDB files
+
+#pragma once
+
+#include <boost/filesystem/operations.hpp>
+#include <boost/math/quaternion.hpp>
+
+#include <boost/any.hpp>
+
+#include "libcif/atom_type.h"
+#include "libcif/point.h"
+#include "libcif/compound.h"
+
+/*
+	To modify a structure, you will have to use actions.
+	
+	The currently supported actions are:
+	
+//	- Move atom to new location
+	- Remove atom
+//	- Add new atom that was formerly missing
+//	- Add alternate residue
+	- 
+	
+	Other important design principles:
+	
+	- all objects here are references to the actual data. Not models of
+	  the data itself. That means that if you copy an atom, you copy the
+	  reference to an atom in the structure. You're not creating a new
+	  atom. This may sound obvious, but it is not if you are used to
+	  copy semantics in the C++ world.
+	
+	
+*/
+
+// forward declaration
+namespace cif
+{
+	class datablock;
+};
+
+
+namespace libcif
+{
+
+class atom;
+class residue;
+class monomer;
+class polymer;
+class structure;
+class file;
+
+// --------------------------------------------------------------------
+// We do not want to introduce a dependency on cif++ here, we might want
+// to change the backend storage in the future.
+// So, in order to access the data we use properties based on boost::any
+// Eventually this should be moved to std::variant, but that's only when
+// c++17 is acceptable.
+
+struct property
+{
+	property() {}
+	property(const std::string& name, const boost::any& value)
+		: name(name), value(value) {}
+	
+	std::string name;
+	boost::any value;
+};
+
+typedef std::vector<property>	property_list;
+
+// --------------------------------------------------------------------
+
+class atom
+{
+  public:
+//	atom(const structure& s, const std::string& id);
+	atom(struct atom_impl* impl);
+	atom(const file& f, const std::string& id);
+	atom(const atom& rhs);
+
+	~atom();
+	
+	atom& operator=(const atom& rhs);
+
+	std::string id() const;
+	atom_type type() const;
+
+	point location() const;
+
+	const compound& comp() const;
+	const entity& ent() const;
+	bool is_water() const;
+	int charge() const;
+	
+	boost::any property(const std::string& name) const;
+	void property(const std::string& name, const boost::any& value);
+	
+	// specifications
+	std::string label_atom_id() const;
+	std::string label_comp_id() const;
+	std::string label_asym_id() const;
+	int label_seq_id() const;
+	std::string label_alt_id() const;
+	
+	std::string auth_atom_id() const;
+	std::string auth_comp_id() const;
+	std::string auth_asym_id() const;
+	int auth_seq_id() const;
+	std::string pdbx_auth_ins_code() const;
+	std::string auth_alt_id() const;
+	
+	bool operator==(const atom& rhs) const;
+
+	const file& get_file() const;
+
+  private:
+ 	struct atom_impl*			m_impl;
+};
+
+typedef std::vector<atom> atom_view;
+
+// --------------------------------------------------------------------
+
+class residue : public std::enable_shared_from_this<residue>
+{
+  public:
+	residue(const compound& cmp) : m_compound(cmp) {}
+
+	const compound&		comp() const		{ return m_compound; }
+	virtual atom_view	atoms();
+
+  private:
+	const compound&		m_compound;
+};
+
+//// --------------------------------------------------------------------
+//// a monomer models a single residue in a protein chain 
+//
+//class monomer : public residue
+//{
+//  public:
+//	monomer(polymer& polymer, size_t seq_id, const std::string& comp_id,
+//		const std::string& alt_id);
+//
+//	int num() const								{ return m_num; }
+////	polymer& get_polymer();
+//
+////	std::vector<monomer_ptr> alternates();
+//
+//  private:
+//	polymer_ptr	m_polymer;
+//	int			m_num;
+//};
+//
+//// --------------------------------------------------------------------
+//
+//class polymer : public std::enable_shared_from_this<polymer>
+//{
+//  public:
+//	polymer(const polymer_entity& pe, const std::string& asym_id);
+//	
+//	struct iterator : public std::iterator<std::random_access_iterator_tag, monomer>
+//	{
+//		typedef std::iterator<std::bidirectional_iterator_tag, monomer>	base_type;
+//		typedef base_type::reference									reference;
+//		typedef base_type::pointer										pointer;
+//		
+//		iterator(polymer& list, uint32 index);
+//		iterator(iterator&& rhs);
+//		iterator(const iterator& rhs);
+//		iterator& operator=(const iterator& rhs);
+//		iterator& operator=(iterator&& rhs);
+//		
+//		reference	operator*();
+//		pointer		operator->();
+//		
+//		iterator&	operator++();
+//		iterator	operator++(int);
+//		
+//		iterator&	operator--();
+//		iterator	operator--(int);
+//
+//		bool		operator==(const iterator& rhs) const;
+//		bool		operator!=(const iterator& rhs) const;
+//	};
+//
+//	iterator begin();
+//	iterator end();
+//
+//  private:
+//	polymer_entity				m_entity;
+//	std::string					m_asym_id;
+//	std::vector<residue_ptr>	m_monomers;
+//};
+
+// --------------------------------------------------------------------
+// file is a reference to the data stored in e.g. the cif file.
+// This object is not copyable.
+
+class file : public std::enable_shared_from_this<file>
+{
+  public:
+	file();
+	file(boost::filesystem::path p);
+	~file();
+
+	file(const file&) = delete;
+	file& operator=(const file&) = delete;
+
+	void load(boost::filesystem::path p);
+	void save(boost::filesystem::path p);
+	
+	structure* model(size_t nr = 1);
+
+	struct file_impl& impl() const						{ return *m_impl; }
+
+	std::vector<const entity*> entities();
+
+	cif::datablock& data();
+
+  private:
+
+	struct file_impl*	m_impl;
+};
+
+// --------------------------------------------------------------------
+
+class structure
+{
+  public:
+	structure(file& p, uint32 model_nr = 1);
+	structure(const structure&);
+	structure& operator=(const structure&);
+	~structure();
+
+	file& get_file() const;
+
+	atom_view atoms() const;
+	atom_view waters() const;
+
+	atom get_atom_by_id(std::string id) const;
+	atom get_atom_by_location(point pt, float max_distance) const;
+	
+	atom get_atom_for_label(const std::string& atom_id, const std::string& asym_id,
+		const std::string& comp_id, int seq_id, const std::string& alt_id = "");
+	
+	atom get_atom_for_auth(const std::string& atom_id, const std::string& asym_id,
+		const std::string& comp_id, int seq_id, const std::string& alt_id = "",
+		const std::string& pdbx_auth_ins_code = "");
+	
+	// map between auth and label locations
+	
+	std::tuple<std::string,int,std::string> MapAuthToLabel(const std::string& asym_id,
+		const std::string& seq_id, const std::string& comp_id, const std::string& ins_code = "");
+	
+	std::tuple<std::string,std::string,std::string,std::string> MapLabelToAuth(
+		const std::string& asym_id, int seq_id, const std::string& comp_id);
+
+	// returns chain, seqnr
+	std::tuple<std::string,std::string> MapLabelToAuth(
+		const std::string& asym_id, int seq_id);
+
+	// returns chain,seqnr,comp,iCode
+	std::tuple<std::string,int,std::string,std::string> MapLabelToPDB(
+		const std::string& asym_id, int seq_id, const std::string& comp_id);
+
+	std::tuple<std::string,int,std::string,std::string> MapPDBToLabel(
+		const std::string& asym_id, int seq_id, const std::string& comp_id, const std::string& iCode);
+	
+	// Actions
+	void remove_atom(atom& a);
+
+  private:
+	friend class action;
+
+	struct structure_impl*	m_impl;
+};
+
+}
--- a/rsrc/dictionaries/mmcif_ddl.dic
+++ b/rsrc/dictionaries/mmcif_ddl.dic
--- a/rsrc/dictionaries/mmcif_pdbx.dic
+++ b/rsrc/dictionaries/mmcif_pdbx.dic
--- a/src/atom_type.cpp
+++ b/src/atom_type.cpp
+// Lib for working with structures as contained in mmCIF and PDB files
+
+#include "libcif/atom_type.h"
+#include "libcif/cif++.h"
+
+using namespace std;
+
+namespace libcif
+{
+
+const float kNA = nan("1");
+
+const atom_type_info kKnownAtoms[] =
+{
+	{ Nn,	"Unknown",			"Nn",	0,		false, {	kNA,	kNA,	kNA,	kNA,	kNA,	kNA,	kNA } },
+	{ H,	"Hydrogen",			"H",	1.008,	false, {	53,		25,		37,		32,		kNA,	kNA,	120 } },
+	{ He,	"Helium",			"He",	4.0026,	false, {	31,		kNA,	32,		46,		kNA,	kNA,	140 } },
+	{ Li,	"Lithium",			"Li",	6.94,	true,  {	167,	145,	134,	133,	124,	kNA,	182 } },
+	{ Be,	"Beryllium",		"Be",	9.0122,	true,  {	112,	105,	90,		102,	90,		85,		kNA } },
+	{ B,	"Boron",			"B",	10.81,	true,  {	87,		85,		82,		85,		78,		73,		kNA } },
+	{ C,	"Carbon",			"C",	12.011,	false, {	67,		70,		77,		75,		67,		60,		170 } },
+	{ N,	"Nitrogen",			"N",	14.007,	false, {	56,		65,		75,		71,		60,		54,		155 } },
+	{ O,	"Oxygen",			"O",	15.999,	false, {	48,		60,		73,		63,		57,		53,		152 } },
+	{ F,	"Fluorine",			"F",	18.998,	false, {	42,		50,		71,		64,		59,		53,		147 } },
+	{ Ne,	"Neon",				"Ne",	20.180,	false, {	38,		kNA,	69,		67,		96,		kNA,	154 } },
+	{ Na,	"Sodium",			"Na",	22.990,	true,  {	190,	180,	154,	155,	160,	kNA,	227 } },
+	{ Mg,	"Magnesium",		"Mg",	24.305,	true,  {	145,	150,	130,	139,	132,	127,	173 } },
+	{ Al,	"Aluminium",		"Al",	26.982,	true,  {	118,	125,	118,	126,	113,	111,	kNA } },
+	{ Si,	"Silicon",			"Si",	28.085,	true,  {	111,	110,	111,	116,	107,	102,	210 } },
+	{ P,	"Phosphorus",		"P",	30.974,	false, {	98,		100,	106,	111,	102,	94,		180 } },
+	{ S,	"Sulfur",			"S",	32.06,	false, {	88,		100,	102,	103,	94,		95,		180 } },
+	{ Cl,	"Chlorine",			"Cl",	35.45,	false, {	79,		100,	99,		99,		95,		93,		175 } },
+	{ Ar,	"Argon",			"Ar",	39.948,	false, {	71,		kNA,	97,		96,		107,	96,		188 } },
+	{ K,	"Potassium",		"K",	39.098,	true,  {	243,	220,	196,	196,	193,	kNA,	275 } },
+	{ Ca,	"Calcium",			"Ca",	40.078,	true,  {	194,	180,	174,	171,	147,	133,	kNA } },
+	{ Sc,	"Scandium",			"Sc",	44.956,	true,  {	184,	160,	144,	148,	116,	114,	kNA } },
+	{ Ti,	"Titanium",			"Ti",	47.867,	true,  {	176,	140,	136,	136,	117,	108,	kNA } },
+	{ V,	"Vanadium",			"V",	50.942,	true,  {	171,	135,	125,	134,	112,	106,	kNA } },
+	{ Cr,	"Chromium",			"Cr",	51.996,	true,  {	166,	140,	127,	122,	111,	103,	kNA } },
+	{ Mn,	"Manganese",		"Mn",	54.938,	true,  {	161,	140,	139,	119,	105,	103,	kNA } },
+	{ Fe,	"Iron",				"Fe",	55.845,	true,  {	156,	140,	125,	116,	109,	102,	kNA } },
+	{ Co,	"Cobalt",			"Co",	58.933,	true,  {	152,	135,	126,	111,	103,	96,		kNA } },
+	{ Ni,	"Nickel",			"Ni",	58.693,	true,  {	149,	135,	121,	110,	101,	101,	163 } },
+	{ Cu,	"Copper",			"Cu",	63.546,	true,  {	145,	135,	138,	112,	115,	120,	140 } },
+	{ Zn,	"Zinc",				"Zn",	65.38,	true,  {	142,	135,	131,	118,	120,	kNA,	139 } },
+	{ Ga,	"Gallium",			"Ga",	69.723,	true,  {	136,	130,	126,	124,	117,	121,	187 } },
+	{ Ge,	"Germanium",		"Ge",	72.630,	true,  {	125,	125,	122,	121,	111,	114,	kNA } },
+	{ As,	"Arsenic",			"As",	74.922,	true,  {	114,	115,	119,	121,	114,	106,	185 } },
+	{ Se,	"Selenium",			"Se",	78.971,	false, {	103,	115,	116,	116,	107,	107,	190 } },
+	{ Br,	"Bromine",			"Br",	79.904,	false, {	94,		115,	114,	114,	109,	110,	185 } },
+	{ Kr,	"Krypton",			"Kr",	83.798,	false, {	88,		kNA,	110,	117,	121,	108,	202 } },
+	{ Rb,	"Rubidium",			"Rb",	85.468,	true,  {	265,	235,	211,	210,	202,	kNA,	kNA } },
+	{ Sr,	"Strontium",		"Sr",	87.62,	true,  {	219,	200,	192,	185,	157,	139,	kNA } },
+	{ Y,	"Yttrium",			"Y",	88.906,	true,  {	212,	180,	162,	163,	130,	124,	kNA } },
+	{ Zr,	"Zirconium",		"Zr",	91.224,	true,  {	206,	155,	148,	154,	127,	121,	kNA } },
+	{ Nb,	"Niobium",			"Nb",	92.906,	true,  {	198,	145,	137,	147,	125,	116,	kNA } },
+	{ Mo,	"Molybdenum",		"Mo",	95.95,	true,  {	190,	145,	145,	138,	121,	113,	kNA } },
+	{ Tc,	"Technetium",		"Tc",	98,		true,  {	183,	135,	156,	128,	120,	110,	kNA } },
+	{ Ru,	"Ruthenium",		"Ru",	101.07,	true,  {	178,	130,	126,	125,	114,	103,	kNA } },
+	{ Rh,	"Rhodium",			"Rh",	102.91,	true,  {	173,	135,	135,	125,	110,	106,	kNA } },
+	{ Pd,	"Palladium",		"Pd",	106.42,	true,  {	169,	140,	131,	120,	117,	112,	163 } },
+	{ Ag,	"Silver",			"Ag",	107.87,	true,  {	165,	160,	153,	128,	139,	137,	172 } },
+	{ Cd,	"Cadmium",			"Cd",	112.41,	true,  {	161,	155,	148,	136,	144,	kNA,	158 } },
+	{ In,	"Indium",			"In",	114.82,	true,  {	156,	155,	144,	142,	136,	146,	193 } },
+	{ Sn,	"Tin",				"Sn",	118.71,	true,  {	145,	145,	141,	140,	130,	132,	217 } },
+	{ Sb,	"Antimony",			"Sb",	121.76,	false, {	133,	145,	138,	140,	133,	127,	kNA } },
+	{ Te,	"Tellurium",		"Te",	127.60,	false, {	123,	140,	135,	136,	128,	121,	206 } },
+	{ I,	"Iodine",			"I",	126.90,	false, {	115,	140,	133,	133,	129,	125,	198 } },
+	{ Xe,	"Xenon",			"Xe",	131.29,	false, {	108,	kNA,	130,	131,	135,	122,	216 } },
+	{ Cs,	"Caesium",			"Cs",	132.91,	true,  {	298,	260,	225,	232,	209,	kNA,	kNA } },
+	{ Ba,	"Barium",			"Ba",	137.33,	true,  {	253,	215,	198,	196,	161,	149,	kNA } },
+	{ La,	"Lanthanum",		"La",	138.91,	true,  {	kNA,	195,	169,	180,	139,	139,	kNA } },
+	{ Hf,	"Hafnium",			"Hf",	178.49,	true,  {	208,	155,	150,	152,	128,	122,	kNA } },
+	{ Ta,	"Tantalum",			"Ta",	180.95,	true,  {	200,	145,	138,	146,	126,	119,	kNA } },
+	{ W,	"Tungsten",			"W",	183.84,	true,  {	193,	135,	146,	137,	120,	115,	kNA } },
+	{ Re,	"Rhenium",			"Re",	186.21,	true,  {	188,	135,	159,	131,	119,	110,	kNA } },
+	{ Os,	"Osmium",			"Os",	190.23,	true,  {	185,	130,	128,	129,	116,	109,	kNA } },
+	{ Ir,	"Iridium",			"Ir",	192.22,	true,  {	180,	135,	137,	122,	115,	107,	kNA } },
+	{ Pt,	"Platinum",			"Pt",	195.08,	true,  {	177,	135,	128,	123,	112,	110,	175 } },
+	{ Au,	"Gold",				"Au",	196.97,	true,  {	174,	135,	144,	124,	121,	123,	166 } },
+	{ Hg,	"Mercury",			"Hg",	200.59,	true,  {	171,	150,	149,	133,	142,	kNA,	155 } },
+	{ Tl,	"Thallium",			"Tl",	204.38,	true,  {	156,	190,	148,	144,	142,	150,	196 } },
+	{ Pb,	"Lead",				"Pb",	207.2,	true,  {	154,	180,	147,	144,	135,	137,	202 } },
+	{ Bi,	"Bismuth",			"Bi",	208.98,	true,  {	143,	160,	146,	151,	141,	135,	kNA } },
+	{ Po,	"Polonium",			"Po",	209,	true,  {	135,	190,	kNA,	145,	135,	129,	kNA } },
+	{ At,	"Astatine",			"At",	210,	false, {	127,	kNA,	kNA,	147,	138,	138,	kNA } },
+	{ Rn,	"Radon",			"Rn",	222,	false, {	120,	kNA,	145,	142,	145,	133,	kNA } },
+	{ Fr,	"Francium",			"Fr",	223,	true,  {	kNA,	kNA,	kNA,	223,	218,	kNA,	kNA } },
+	{ Ra,	"Radium",			"Ra",	226,	true,  {	kNA,	215,	kNA,	201,	173,	159,	kNA } },
+	{ Ac,	"Actinium",			"Ac",	227,	true,  {	kNA,	195,	kNA,	186,	153,	140,	kNA } },
+	{ Rf,	"Rutherfordium",	"Rf",	267,	true,  {	kNA,	kNA,	kNA,	157,	140,	131,	kNA } },
+	{ Db,	"Dubnium",			"Db",	268,	true,  {	kNA,	kNA,	kNA,	149,	136,	126,	kNA } },
+	{ Sg,	"Seaborgium",		"Sg",	269,	true,  {	kNA,	kNA,	kNA,	143,	128,	121,	kNA } },
+	{ Bh,	"Bohrium",			"Bh",	270,	true,  {	kNA,	kNA,	kNA,	141,	128,	119,	kNA } },
+	{ Hs,	"Hassium",			"Hs",	277,	true,  {	kNA,	kNA,	kNA,	134,	125,	118,	kNA } },
+	{ Mt,	"Meitnerium",		"Mt",	278,	true,  {	kNA,	kNA,	kNA,	129,	125,	113,	kNA } },
+	{ Ds,	"Darmstadtium",		"Ds",	281,	true,  {	kNA,	kNA,	kNA,	128,	116,	112,	kNA } },
+	{ Rg,	"Roentgenium",		"Rg",	282,	true,  {	kNA,	kNA,	kNA,	121,	116,	118,	kNA } },
+	{ Cn,	"Copernicium",		"Cn",	285,	true,  {	kNA,	kNA,	kNA,	122,	137,	130,	kNA } },
+	{ Nh,	"Nihonium",			"Nh",	286,	true,  {	kNA,	kNA,	kNA,	136,	kNA,	kNA,	kNA } },
+	{ Fl,	"Flerovium",		"Fl",	289,	true,  {	kNA,	kNA,	kNA,	143,	kNA,	kNA,	kNA } },
+	{ Mc,	"Moscovium",		"Mc",	290,	true,  {	kNA,	kNA,	kNA,	162,	kNA,	kNA,	kNA } },
+	{ Lv,	"Livermorium",		"Lv",	293,	true,  {	kNA,	kNA,	kNA,	175,	kNA,	kNA,	kNA } },
+	{ Ts,	"Tennessine",		"Ts",	294,	true,  {	kNA,	kNA,	kNA,	165,	kNA,	kNA,	kNA } },
+	{ Og,	"Oganesson",		"Og",	294,	true,  {	kNA,	kNA,	kNA,	157,	kNA,	kNA,	kNA } },
+	{ Ce,	"Cerium",			"Ce",	140.12,	true,  {	kNA,	185,	kNA,	163,	137,	131,	kNA } },
+	{ Pr,	"Praseodymium",		"Pr",	140.91,	true,  {	247,	185,	kNA,	176,	138,	128,	kNA } },
+	{ Nd,	"Neodymium",		"Nd",	144.24,	true,  {	206,	185,	kNA,	174,	137,	kNA,	kNA } },
+	{ Pm,	"Promethium",		"Pm",	145,	true,  {	205,	185,	kNA,	173,	135,	kNA,	kNA } },
+	{ Sm,	"Samarium",			"Sm",	150.36,	true,  {	238,	185,	kNA,	172,	134,	kNA,	kNA } },
+	{ Eu,	"Europium",			"Eu",	151.96,	true,  {	231,	185,	kNA,	168,	134,	kNA,	kNA } },
+	{ Gd,	"Gadolinium",		"Gd",	157.25,	true,  {	233,	180,	kNA,	169,	135,	132,	kNA } },
+	{ Tb,	"Terbium",			"Tb",	158.93,	true,  {	225,	175,	kNA,	168,	135,	kNA,	kNA } },
+	{ Dy,	"Dysprosium",		"Dy",	162.50,	true,  {	228,	175,	kNA,	167,	133,	kNA,	kNA } },
+	{ Ho,	"Holmium",			"Ho",	164.93,	true,  {	226,	175,	kNA,	166,	133,	kNA,	kNA } },
+	{ Er,	"Erbium",			"Er",	167.26,	true,  {	226,	175,	kNA,	165,	133,	kNA,	kNA } },
+	{ Tm,	"Thulium",			"Tm",	168.93,	true,  {	222,	175,	kNA,	164,	131,	kNA,	kNA } },
+	{ Yb,	"Ytterbium",		"Yb",	173.05,	true,  {	222,	175,	kNA,	170,	129,	kNA,	kNA } },
+	{ Lu,	"Lutetium",			"Lu",	174.97,	true,  {	217,	175,	160,	162,	131,	131,	kNA } },
+	{ Th,	"Thorium",			"Th",	232.04,	true,  {	kNA,	180,	kNA,	175,	143,	136,	kNA } },
+	{ Pa,	"Protactinium",		"Pa",	231.04,	true,  {	kNA,	180,	kNA,	169,	138,	129,	kNA } },
+	{ U,	"Uranium",			"U",	238.03,	true,  {	kNA,	175,	kNA,	170,	134,	118,	186 } },
+	{ Np,	"Neptunium",		"Np",	237,	true,  {	kNA,	175,	kNA,	171,	136,	116,	kNA } },
+	{ Pu,	"Plutonium",		"Pu",	244,	true,  {	kNA,	175,	kNA,	172,	135,	kNA,	kNA } },
+	{ Am,	"Americium",		"Am",	243,	true,  {	kNA,	175,	kNA,	166,	135,	kNA,	kNA } },
+	{ Cm,	"Curium",			"Cm",	247,	true,  {	kNA,	kNA,	kNA,	166,	136,	kNA,	kNA } },
+	{ Bk,	"Berkelium",		"Bk",	247,	true,  {	kNA,	kNA,	kNA,	168,	139,	kNA,	kNA } },
+	{ Cf,	"Californium",		"Cf",	251,	true,  {	kNA,	kNA,	kNA,	168,	140,	kNA,	kNA } },
+	{ Es,	"Einsteinium",		"Es",	252,	true,  {	kNA,	kNA,	kNA,	165,	140,	kNA,	kNA } },
+	{ Fm,	"Fermium",			"Fm",	257,	true,  {	kNA,	kNA,	kNA,	167,	kNA,	kNA,	kNA } },
+	{ Md,	"Mendelevium",		"Md",	258,	true,  {	kNA,	kNA,	kNA,	173,	139,	kNA,	kNA } },
+	{ No,	"Nobelium",			"No",	259,	true,  {	kNA,	kNA,	kNA,	176,	kNA,	kNA,	kNA } },
+	{ Lr,	"Lawrencium",		"Lr",	266,	true,  {	kNA,	kNA,	kNA,	161,	141,	kNA,	kNA } }
+};
+
+uint32 kKnownAtomsCount = sizeof(kKnownAtoms) / sizeof(atom_type_info);
+
+// --------------------------------------------------------------------
+// atom_type_traits
+
+atom_type_traits::atom_type_traits(const string& symbol)
+	: m_info(nullptr)
+{
+	for (auto& i: kKnownAtoms)
+	{
+		if (cif::iequals(i.symbol, symbol))
+		{
+			m_info = &i;
+			break;
+		}
+	}
+	
+	if (m_info == nullptr)
+		throw invalid_argument("Not a known element: " + symbol);
+}
+
+atom_type_traits::atom_type_traits(atom_type t)
+{
+	if (t < H or t > Lr)
+		throw invalid_argument("atom_type out of range");
+	m_info = &kKnownAtoms[t];
+}
+
+bool atom_type_traits::is_element(const string& symbol)
+{
+	bool result = false;
+	
+	for (auto& i: kKnownAtoms)
+	{
+		if (cif::iequals(i.symbol, symbol))
+		{
+			result = true;
+			break;
+		}
+	}
+	
+	return result;
+}
+
+bool atom_type_traits::is_metal(const std::string& symbol)
+{
+	bool result = false;
+	
+	for (auto& i: kKnownAtoms)
+	{
+		if (cif::iequals(i.symbol, symbol))
+		{
+			result = i.metal;
+			break;
+		}
+	}
+	
+	return result;
+}
+	
+}
--- a/src/cif++.cpp
+++ b/src/cif++.cpp
+// cif parsing library
+
+#include <cassert>
+
+#include <stack>
+#include <tuple>
+#include <regex>
+#include <set>
+#include <unordered_map>
+
+#include <boost/algorithm/string.hpp>
+#include <boost/filesystem/operations.hpp>
+#include <boost/filesystem/fstream.hpp>
+
+#if defined(USE_RSRC)
+#include "mrsrc.h"
+#endif
+
+#include "cif++.h"
+#include "cif-parser.h"
+#include "cif-validator.h"
+#include "cif-utils.h"
+
+using namespace std;
+namespace ba = boost::algorithm;
+namespace fs = boost::filesystem;
+
+extern int VERBOSE;
+
+namespace cif
+{
+
+static const char* kEmptyResult = "";
+	
+// --------------------------------------------------------------------
+// most internal data structures are stored as linked lists
+// item values are stored in a simple struct. They should be const anyway
+
+struct item_value
+{
+	item_value*				m_next;
+	uint32					m_column_index;
+	char					m_text[0];
+	
+	item_value(const char* v, uint32 column_index);
+	~item_value();
+	
+	void* operator new(size_t size, size_t data_size);
+	void operator delete(void* p);
+};
+
+// --------------------------------------------------------------------
+
+item_value::item_value(const char* value, uint32 column_index)
+	: m_next(nullptr), m_column_index(column_index)
+{
+	strcpy(m_text, value);
+}
+
+item_value::~item_value()
+{
+	// remove recursion (and be paranoid)
+	while (m_next != nullptr and m_next != this)
+	{
+		auto n = m_next;
+		m_next = n->m_next;
+		n->m_next = nullptr;
+		delete n;
+	}
+}
+
+void* item_value::operator new(size_t size, size_t data_size)
+{
+	return malloc(size + data_size + 1);
+}
+
+void item_value::operator delete(void* p)
+{
+	free(p);
+}
+
+// --------------------------------------------------------------------
+
+// item_column contains info about a column or field in a category
+
+struct item_column
+{
+	string					m_name;		// store lower-case, for optimization
+	const validate_item*	m_validator;
+};
+
+// item_row contains the actual values for a row in a category
+
+struct item_row
+{
+	~item_row();
+
+	void drop(uint32 column_ix);
+	const char* c_str(uint32 column_ix) const;
+	
+	string str() const
+	{
+		stringstream s;
+
+		s << '{';
+		for (auto v = m_values; v != nullptr; v = v->m_next)
+		{
+			s << m_category->get_column_name(v->m_column_index)
+			  << ':'
+			  << v->m_text;
+			 if (v->m_next != nullptr)
+			 	s << ", ";
+		}
+		s << '}';
+
+		return s.str();
+	}
+	
+	item_row*				m_next;
+	category*				m_category;
+	item_value*				m_values;
+};
+
+ostream& operator<<(ostream& os, const item_row& r)
+{
+	os << r.m_category->name() << '[';
+	for (auto iv = r.m_values; iv != nullptr; iv = iv->m_next)
+	{
+		os << iv->m_text;
+		if (iv->m_next)
+			os << ',';
+	}
+	os << ']';
+	
+	return os;
+}
+
+// --------------------------------------------------------------------
+
+item_row::~item_row()
+{
+	// remove recursive
+	while (m_next != nullptr and m_next != this)
+	{
+		auto n = m_next;
+		m_next = n->m_next;
+		n->m_next = nullptr;
+		delete n;
+	}
+
+	delete m_values;
+}
+
+void item_row::drop(uint32 column_ix)
+{
+	if (m_values != nullptr and m_values->m_column_index == column_ix)
+	{
+		auto v = m_values;
+		m_values = m_values->m_next;
+		v->m_next = nullptr;
+		delete v;
+	}
+	else
+	{
+		for (auto v = m_values; v->m_next != nullptr; v = v->m_next)
+		{
+			if (v->m_next->m_column_index == column_ix)
+			{
+				auto vn = v->m_next;
+				v->m_next = vn->m_next;
+				vn->m_next = nullptr;
+				delete vn;
+
+				break;
+			}
+		}
+	}
+
+#if DEBUG
+	for (auto iv = m_values; iv != nullptr; iv = iv->m_next)
+		assert(iv != iv->m_next and (iv->m_next == nullptr or iv != iv->m_next->m_next));
+	
+#endif
+}
+
+const char* item_row::c_str(uint32 column_ix) const
+{
+	const char* result = kEmptyResult;
+	
+	for (auto v = m_values; v != nullptr; v = v->m_next)
+	{
+		if (v->m_column_index == column_ix)
+		{
+			result = v->m_text;
+			break;
+		}
+	}
+
+	return result;
+}
+
+// --------------------------------------------------------------------
+
+namespace detail
+{
+
+template<>
+item_reference& item_reference::operator=(const string& value)
+{
+	row(m_row).assign(m_name, value, false);
+	return *this;
+}
+
+const char*
+item_reference::c_str() const
+{
+	const char* result = kEmptyResult;
+	
+	if (m_row != nullptr /* and m_row->m_category != nullptr*/)
+	{
+//		assert(m_row->m_category);
+		
+		auto cix = m_row->m_category->get_column_index(m_name);
+		
+		for (auto iv = m_row->m_values; iv != nullptr; iv = iv->m_next)
+		{
+			if (iv->m_column_index == cix)
+			{
+				if (iv->m_text[0] != '.' or iv->m_text[1] != 0)
+					result = iv->m_text;
+	
+				break;
+			}
+		}
+	}
+	
+	return result;
+}
+
+bool item_reference::empty() const
+{
+	return c_str() == kEmptyResult;
+}
+
+}
+
+// --------------------------------------------------------------------
+// datablock implementation
+
+datablock::datablock(const string& name)
+	: m_name(name), m_validator(nullptr), m_next(nullptr)
+{
+}
+
+datablock::~datablock()
+{
+	delete m_next;
+}
+
+string datablock::first_item(const string& tag) const
+{
+	string result;
+
+	string cat_name, item_name;
+	std::tie(cat_name, item_name) = split_tag_name(tag);
+	
+	for (auto& cat: m_categories)
+	{
+		if (iequals(cat.name(), cat_name))
+		{
+			result = cat.get_first_item(item_name.c_str()).as<string>();
+			break;
+		}
+	}
+
+	return result;
+}
+
+auto datablock::emplace(const string& name) -> tuple<iterator,bool>
+{
+	bool isNew = false;
+	iterator i = find_if(begin(), end(), [name](const category& cat) -> bool
+		{ return iequals(cat.name(), name); });
+	
+	if (i == end())
+	{
+		isNew = true;
+		i = m_categories.emplace(end(), *this, name, m_validator);
+	}
+	
+	return make_tuple(i, isNew);
+}
+
+category& datablock::operator[](const string& name)
+{
+	iterator i;
+	std::tie(i, ignore) = emplace(name);
+	return *i;
+}
+
+category* datablock::get(const string& name)
+{
+	auto i = find_if(begin(), end(), [name](const category& cat) -> bool 
+		{ return iequals(cat.name(), name); });
+	
+	return i == end() ? nullptr : &*i;
+}
+
+void datablock::validate()
+{
+	if (m_validator == nullptr)
+		throw runtime_error("validator not specified");
+
+	for (auto& cat: *this)
+		cat.validate();
+}
+
+void datablock::set_validator(validator* v)
+{
+	m_validator = v;
+
+	for (auto& cat: *this)
+		cat.set_validator(v);
+}
+
+void datablock::get_tag_order(vector<string>& tags) const
+{
+	for (auto& cat: *this)
+		cat.get_tag_order(tags);
+}
+
+void datablock::write(ostream& os)
+{
+	os << "data_" << m_name << endl
+	   << "# " << endl;
+	
+	// mmcif support, sort of. First write the 'entry' category
+	// and if it exists, _AND_ we have a validator, write out the
+	// audit_conform record.
+
+	for (auto& cat: m_categories)
+	{
+		if (cat.name() == "entry")
+		{
+			cat.write(os);
+			
+			if (m_validator != nullptr)
+			{
+				category audit_conform(*this, "audit_conform", nullptr);
+				audit_conform.emplace({
+					{ "dict_name", m_validator->dict_name() },
+					{ "dict_version", m_validator->dict_version() }
+				});
+				audit_conform.write(os);
+			}
+			
+			break;
+		}
+	}
+
+	for (auto& cat: m_categories)
+	{
+		if (cat.name() != "entry" and cat.name() != "audit_conform")
+			cat.write(os);
+	}
+}
+
+void datablock::write(ostream& os, const vector<string>& order)
+{
+	os << "data_" << m_name << endl
+	   << "# " << endl;
+	
+	vector<string> catOrder;
+	for (auto& o: order)
+	{
+		string cat, item;
+		std::tie(cat, item) = split_tag_name(o);
+		if (find_if(catOrder.rbegin(), catOrder.rend(), [cat](const string& s) -> bool { return iequals(cat, s); }) == catOrder.rend())
+			catOrder.push_back(cat);
+	}
+
+	for (auto& c: catOrder)
+	{
+		auto cat = get(c);
+		if (cat == nullptr)
+			continue;
+		
+		vector<string> items;
+		for (auto& o: order)
+		{
+			string cat_name, item;
+			std::tie(cat_name, item) = split_tag_name(o);
+			
+			if (cat_name == c)
+				items.push_back(item);
+		}
+		
+		cat->write(os, items);
+	}
+	
+	// for any category we missed in the catOrder
+	for (auto& cat: m_categories)
+	{
+		if (find_if(catOrder.begin(), catOrder.end(), [&](const string& s) -> bool { return iequals(cat.name(), s); }) != catOrder.end())
+			continue;
+		
+		cat.write(os);
+	}
+	
+	
+//	// mmcif support, sort of. First write the 'entry' category
+//	// and if it exists, _AND_ we have a validator, write out the
+//	// audit_conform record.
+//
+//	for (auto& cat: m_categories)
+//	{
+//		if (cat.name() == "entry")
+//		{
+//			cat.write(os);
+//			
+//			if (m_validator != nullptr)
+//			{
+//				category audit_conform(*this, "audit_conform", nullptr);
+//				audit_conform.emplace({
+//					{ "dict_name", m_validator->dict_name() },
+//					{ "dict_version", m_validator->dict_version() }
+//				});
+//				audit_conform.write(os);
+//			}
+//			
+//			break;
+//		}
+//	}
+//
+//	for (auto& cat: m_categories)
+//	{
+//		if (cat.name() != "entry" and cat.name() != "audit_conform")
+//			cat.write(os);
+//	}
+}
+
+// --------------------------------------------------------------------
+//
+//	class to compare two rows based on their keys.
+
+class row_comparator
+{
+  public:
+
+	row_comparator(category* cat)
+		: row_comparator(cat, cat->get_cat_validator()->m_keys.begin(), cat->get_cat_validator()->m_keys.end())
+	{
+	}
+		
+	template<typename KeyIter>
+	row_comparator(category* cat, KeyIter b, KeyIter e);
+	
+	int operator()(const item_row* a, const item_row* b) const;
+
+	int operator()(const row& a, const row& b) const
+	{
+		return operator()(a.m_data, b.m_data);
+	}
+	
+  private:
+	typedef function<int(const char*,const char*)>	compare_func;
+
+	typedef tuple<size_t,compare_func>	key_comp;
+
+	vector<key_comp>	m_comp;
+};
+
+template<typename KeyIter>
+row_comparator::row_comparator(category* cat, KeyIter b, KeyIter e)
+{
+	auto cv = cat->get_cat_validator();
+	
+	for (auto ki = b; ki != e; ++ki)
+	{
+		string k = *ki;
+		
+		size_t ix = cat->get_column_index(k);
+
+		auto iv = cv->get_validator_for_item(k);
+		if (iv == nullptr)
+			throw runtime_error("Incomplete dictionary, no item validator for key " + k);
+		
+		auto tv = iv->m_type;
+		if (tv == nullptr)
+			throw runtime_error("Incomplete dictionary, no type validator for item " + k);
+		
+		using namespace placeholders;
+		
+		m_comp.emplace_back(ix, bind(&validate_type::compare, tv, _1, _2));
+	}
+}
+
+int row_comparator::operator()(const item_row* a, const item_row* b) const
+{
+	assert(a);
+	assert(b);
+
+	int d = 0;
+	for (auto& c: m_comp)
+	{
+		size_t k;
+		compare_func f;
+		
+		std::tie(k, f) = c;
+		
+		const char* ka = a->c_str(k);
+		const char* kb = b->c_str(k);
+		
+		d = f(ka, kb);
+
+		if (d != 0)
+			break;
+	}
+	
+	return d;
+}
+
+// --------------------------------------------------------------------
+//
+//	class to keep an index on the keys of a category. This is a red/black
+//	tree implementation.
+
+class cat_index
+{
+  public:
+	cat_index(category* cat);
+	~cat_index();
+	
+	item_row* find(item_row* k) const;
+
+	void insert(item_row* r);
+	void erase(item_row* r);
+
+	// batch create
+	void reconstruct();
+	
+	// reorder the item_row's and returns new head and tail
+	tuple<item_row*,item_row*> reorder()
+	{
+		tuple<item_row*,item_row*> result = make_tuple(nullptr, nullptr);
+		
+		if (m_root != nullptr)
+		{
+			entry* head = findMin(m_root);
+			entry* tail = reorder(m_root);
+			
+			tail->m_row->m_next = nullptr;
+			
+			result = make_tuple(head->m_row, tail->m_row);
+		}
+			
+		return result;
+	}
+
+	size_t size() const;
+	void validate() const;
+	
+  private:
+
+	struct entry
+	{
+		entry(item_row* r)
+			: m_row(r), m_left(nullptr), m_right(nullptr), m_red(true) {}
+		
+		~entry()
+		{
+			delete m_left;
+			delete m_right;
+		}
+
+		item_row*		m_row;
+		entry*			m_left;
+		entry*			m_right;
+		bool			m_red;
+	};
+
+	entry* insert(entry* h, item_row* v);
+	entry* erase(entry* h, item_row* k);
+
+	void validate(entry* h, bool isParentRed, uint32 blackDepth, uint32& minBlack, uint32& maxBlack) const;
+
+	entry* rotateLeft(entry* h)
+	{
+		entry* x = h->m_right;
+		h->m_right = x->m_left;
+		x->m_left = h;
+		x->m_red = h->m_red;
+		h->m_red = true;
+		return x;
+	}
+	
+	entry* rotateRight(entry* h)
+	{
+		entry* x = h->m_left;
+		h->m_left = x->m_right;
+		x->m_right = h;
+		x->m_red = h->m_red;
+		h->m_red = true;
+		return x;
+	}
+	
+	void flipColour(entry* h)
+	{
+		h->m_red = not h->m_red;
+		
+		if (h->m_left != nullptr)
+			h->m_left->m_red = not h->m_left->m_red;
+	
+		if (h->m_right != nullptr)
+			h->m_right->m_red = not h->m_right->m_red;
+	}
+	
+	bool isRed(entry* h) const
+	{
+		return h != nullptr and h->m_red;
+	}
+	
+	entry* moveRedLeft(entry* h)
+	{
+		flipColour(h);
+		
+		if (h->m_right != nullptr and isRed(h->m_right->m_left))
+		{
+			h->m_right = rotateRight(h->m_right);
+			h = rotateLeft(h);
+			flipColour(h);
+		}
+		
+		return h;
+	}
+	
+	entry* moveRedRight(entry* h)
+	{
+		flipColour(h);
+		
+		if (h->m_left != nullptr and isRed(h->m_left->m_left))
+		{
+			h = rotateRight(h);
+			flipColour(h);
+		}
+		
+		return h;
+	}
+	
+	entry* fixUp(entry* h)
+	{
+		if (isRed(h->m_right))
+			h = rotateLeft(h);
+		
+		if (isRed(h->m_left) and isRed(h->m_left->m_left))
+			h = rotateRight(h);
+		
+		if (isRed(h->m_left) and isRed(h->m_right))
+			flipColour(h);
+		
+		return h;
+	}
+	
+	entry* findMin(entry* h)
+	{
+		while (h->m_left != nullptr)
+			h = h->m_left;
+
+		return h;
+	}
+	
+	entry* eraseMin(entry* h)
+	{
+		if (h->m_left == nullptr)
+		{
+			delete h;
+			h = nullptr;
+		}
+		else
+		{
+			if (not isRed(h->m_left) and not isRed(h->m_left->m_left))
+				h = moveRedLeft(h);
+			
+			h->m_left = eraseMin(h->m_left);
+			
+			h = fixUp(h);
+		}
+		
+		return h;
+	}
+	
+	// Fix m_next fields for rows in order of this index
+	entry* reorder(entry* e)
+	{
+		auto result = e;
+		
+		if (e->m_left != nullptr)
+		{
+			auto l = reorder(e->m_left);
+			l->m_row->m_next = e->m_row;
+		}
+		
+		if (e->m_right != nullptr)
+		{
+			auto mr = findMin(e->m_right);
+			e->m_row->m_next = mr->m_row;
+			
+			result = reorder(e->m_right);
+		}
+		
+		return result;
+	}
+	
+	category&			m_cat;
+	row_comparator		m_comp;
+	entry*				m_root;
+};
+
+cat_index::cat_index(category* cat)
+	: m_cat(*cat), m_comp(cat), m_root(nullptr)
+{
+}
+
+cat_index::~cat_index()
+{
+	delete m_root;
+}
+
+item_row* cat_index::find(item_row* k) const
+{
+	const entry* r = m_root;
+	while (r != nullptr)
+	{
+		int d = m_comp(k, r->m_row);
+		if (d < 0)
+			r = r->m_left;
+		else if (d > 0)
+			r = r->m_right;
+		else
+			break;
+	}
+	
+	return r ? r->m_row : nullptr;
+}
+
+void cat_index::insert(item_row* k)
+{
+	m_root = insert(m_root, k);
+	m_root->m_red = false;
+}
+
+cat_index::entry* cat_index::insert(entry* h, item_row* v)
+{
+	if (h == nullptr)
+		return new entry(v);
+	
+	int d = m_comp(v, h->m_row);
+	if (d < 0)		h->m_left = insert(h->m_left, v);
+	else if (d > 0)	h->m_right = insert(h->m_right, v);
+	else
+		throw runtime_error("Duplicate key violation, cat: " + m_cat.name() + " values: " + v->str());
+
+	if (isRed(h->m_right) and not isRed(h->m_left))
+		h = rotateLeft(h);
+
+	if (isRed(h->m_left) and isRed(h->m_left->m_left))
+		h = rotateRight(h);
+	
+	if (isRed(h->m_left) and isRed(h->m_right))	
+		flipColour(h);
+	
+	return h;
+}
+
+void cat_index::erase(item_row* k)
+{
+	m_root = erase(m_root, k);
+	if (m_root != nullptr)
+		m_root->m_red = false;
+}
+
+cat_index::entry* cat_index::erase(entry* h, item_row* k)
+{
+	if (m_comp(k, h->m_row) < 0)
+	{
+		if (h->m_left != nullptr)
+		{
+			if (not isRed(h->m_left) and not isRed(h->m_left->m_left))
+				h = moveRedLeft(h);
+
+			h->m_left = erase(h->m_left, k);
+		}
+	}
+	else
+	{
+		if (isRed(h->m_left))
+			h = rotateRight(h);
+			
+		if (m_comp(k, h->m_row) == 0 and h->m_right == nullptr)
+		{
+			delete h;
+			return nullptr;
+		}
+		
+		if (h->m_right != nullptr)
+		{
+			if (not isRed(h->m_right) and not isRed(h->m_right->m_left))
+				h = moveRedRight(h);
+			
+			if (m_comp(k, h->m_row) == 0)
+			{
+				h->m_row = findMin(h->m_right)->m_row;
+				h->m_right = eraseMin(h->m_right);
+			}
+			else
+				h->m_right = erase(h->m_right, k);
+		}
+	}
+	
+	return fixUp(h);
+}
+
+void cat_index::reconstruct()
+{
+	delete m_root;
+	m_root = nullptr;
+	
+	for (auto r: m_cat)
+		insert(r.m_data);
+
+// maybe reconstruction can be done quicker by using the following commented code.
+// however, I've not had the time to think of a way to set the red/black flag correctly in that case.
+	
+//	vector<item_row*> rows;
+//	transform(m_cat.begin(), m_cat.end(), back_inserter(rows),
+//		[](row r) -> item_row* { assert(r.m_data); return r.m_data; });
+//	
+//	assert(std::find(rows.begin(), rows.end(), nullptr) == rows.end());
+//	
+//	// don't use sort here, it will run out of the stack of something.
+//	// quicksort is notorious for using excessive recursion.
+//	// Besides, most of the time, the data is ordered already anyway.
+//
+//	stable_sort(rows.begin(), rows.end(), [this](item_row* a, item_row* b) -> bool { return this->m_comp(a, b) < 0; });
+//	
+//	for (size_t i = 0; i < rows.size() - 1; ++i)
+//		assert(m_comp(rows[i], rows[i + 1]) < 0);
+//	
+//	deque<entry*> e;
+//	transform(rows.begin(), rows.end(), back_inserter(e),
+//		[](item_row* r) -> entry* { return new entry(r); });
+//	
+//	while (e.size() > 1)
+//	{
+//		deque<entry*> ne;
+//		
+//		while (not e.empty())
+//		{
+//			entry* a = e.front();
+//			e.pop_front();
+//			
+//			if (e.empty())
+//				ne.push_back(a);
+//			else
+//			{
+//				entry* b = e.front();
+//				b->m_left = a;
+//				
+//				assert(m_comp(a->m_row, b->m_row) < 0);
+//
+//				e.pop_front();
+//				
+//				if (not e.empty())
+//				{
+//					entry* c = e.front();
+//					e.pop_front();
+//
+//					assert(m_comp(b->m_row, c->m_row) < 0);
+//				
+//					b->m_right = c;
+//				}
+//
+//				ne.push_back(b);
+//				
+//				if (not e.empty())
+//				{
+//					ne.push_back(e.front());
+//					e.pop_front();
+//				}
+//			}
+//		}
+//		
+//		swap (e, ne);
+//	}
+//	
+//	assert(e.size() == 1);
+//	m_root = e.front();
+}
+
+size_t cat_index::size() const
+{
+	stack<entry*> s;
+	s.push(m_root);
+	
+	size_t result = 0;
+	
+	while (not s.empty())
+	{
+		entry* e = s.top();
+		s.pop();
+		
+		if (e == nullptr)
+			continue;
+		
+		++result;
+		
+		s.push(e->m_left);
+		s.push(e->m_right);
+	}
+	
+	return result;
+}
+
+void cat_index::validate() const
+{
+	if (m_root != nullptr)
+	{
+		uint32 minBlack = numeric_limits<uint32>::max();
+		uint32 maxBlack = 0;
+		
+		assert(not m_root->m_red);
+		
+		validate(m_root, false, 0, minBlack, maxBlack);
+		assert(minBlack == maxBlack);
+	}
+}
+
+void cat_index::validate(entry* h, bool isParentRed, uint32 blackDepth, uint32& minBlack, uint32& maxBlack) const
+{
+	if (h->m_red)
+		assert(not isParentRed);
+	else
+		++blackDepth;
+	
+	if (isParentRed)
+		assert(not h->m_red);
+	
+	if (h->m_left != nullptr and h->m_right != nullptr)
+	{
+		if (isRed(h->m_left))
+			assert(not isRed(h->m_right));
+		if (isRed(h->m_right))
+			assert(not isRed(h->m_left));
+	}
+	
+	if (h->m_left != nullptr)
+	{
+		assert(m_comp(h->m_left->m_row, h->m_row) < 0);
+		validate(h->m_left, h->m_red, blackDepth, minBlack, maxBlack);
+	}
+	else
+	{
+		if (minBlack > blackDepth)
+			minBlack = blackDepth;
+		if (maxBlack < blackDepth)
+			maxBlack = blackDepth;
+	}
+	
+	if (h->m_right != nullptr)
+	{
+		assert(m_comp(h->m_right->m_row, h->m_row) > 0);
+		validate(h->m_right, h->m_right, blackDepth, minBlack, maxBlack);
+	}
+	else
+	{
+		if (minBlack > blackDepth)
+			minBlack = blackDepth;
+		if (maxBlack < blackDepth)
+			maxBlack = blackDepth;
+	}
+}
+
+// --------------------------------------------------------------------
+
+rowset::rowset(category& cat)
+	: m_cat(cat)
+{
+}
+
+rowset& rowset::orderBy(initializer_list<string> items)
+{
+	row_comparator c(&m_cat, items.begin(), items.end());
+	
+	stable_sort(begin(), end(), c);
+	
+	return *this;
+}
+
+// --------------------------------------------------------------------
+
+category::category(datablock& db, const string& name, validator* validator)
+	: m_db(db), m_name(name), m_validator(validator)
+	, m_head(nullptr), m_tail(nullptr), m_index(nullptr)
+{
+	if (m_name.empty())
+		throw validation_error("invalid empty name for category");
+	
+	if (m_validator != nullptr)
+	{
+		m_cat_validator = m_validator->get_validator_for_category(m_name);
+		if (m_cat_validator != nullptr)
+		{
+			// make sure all required columns are added
+			
+			for (auto& k: m_cat_validator->m_keys)
+				add_column(k);
+
+			for (auto& k: m_cat_validator->m_mandatory_fields)
+				add_column(k);
+			
+			m_index = new cat_index(this);
+		}
+	}
+}
+
+category::~category()
+{
+	delete m_head;
+	delete m_index;
+}
+
+void category::set_validator(validator* v)
+{
+	m_validator = v;
+	
+	if (m_index != nullptr)
+	{
+		delete m_index;
+		m_index = nullptr;
+	}
+	
+	if (m_validator != nullptr)
+	{
+		m_cat_validator = m_validator->get_validator_for_category(m_name);
+		if (m_cat_validator != nullptr)
+		{
+			m_index = new cat_index(this);
+			m_index->reconstruct();
+#if DEBUG
+			assert(m_index->size() == size());
+			m_index->validate();
+#endif
+		}
+	}
+	else
+		m_cat_validator = nullptr;
+}
+
+size_t category::get_column_index(const string& name) const
+{
+	size_t result;
+
+	for (result = 0; result < m_columns.size(); ++result)
+	{
+		if (iequals(name, m_columns[result].m_name))
+			break;
+	}
+	
+	return result;
+}
+
+const string& category::get_column_name(size_t column_ix) const
+{
+	return m_columns.at(column_ix).m_name;
+}
+
+size_t category::add_column(const string& name)
+{
+	size_t result = get_column_index(name);
+	
+	if (result == m_columns.size())
+	{
+		const validate_item* item_validator = nullptr;
+		
+		if (m_cat_validator != nullptr)
+		{
+			item_validator = m_cat_validator->get_validator_for_item(name);
+			if (item_validator == nullptr)
+				m_validator->report_error("tag " + name + " not allowed in category " + m_name);
+		}
+		
+		m_columns.push_back({name, item_validator});
+	}
+	
+	return result;
+}
+
+void category::reorderByIndex()
+{
+	if (m_index != nullptr)
+		std::tie(m_head, m_tail) = m_index->reorder();
+}
+
+size_t category::size() const
+{
+	size_t result = 0;
+	
+	for (auto pi = m_head; pi != nullptr; pi = pi->m_next)
+		++result;
+	
+	return result;
+}
+
+bool category::empty() const
+{
+	return m_head == nullptr or m_head->m_values == nullptr;
+}
+
+void category::drop(const string& field)
+{
+	using namespace placeholders;
+	auto ci = find_if(m_columns.begin(), m_columns.end(),
+		[field](item_column& c) -> bool { return iequals(c.m_name, field); });
+
+	if (ci != m_columns.end())
+	{
+		uint32 column_ix = ci - m_columns.begin();
+		
+		for (auto pi = m_head; pi != nullptr; pi = pi->m_next)
+			pi->drop(column_ix);
+		
+		m_columns.erase(ci);
+	}
+}
+
+row category::operator[](condition&& cond)
+{
+	row result;
+	
+	for (auto r: *this)
+	{
+		if (cond(*this, r))
+		{
+			result = r;
+			break;
+		}
+	}
+
+	return result;
+}	
+
+rowset category::find(condition&& cond)
+{
+	rowset result(*this);
+	for (auto r: *this)
+	{
+		if (cond(*this, r))
+			result.push_back(r);
+	}
+	return result;
+}
+
+bool category::exists(condition&& cond)
+{
+	bool result = false;
+	
+	for (auto r: *this)
+	{
+		if (cond(*this, r))
+		{
+			result = true;
+			break;
+		}
+	}
+
+	return result;
+}
+
+rowset category::orderBy(std::initializer_list<string> items)
+{
+	rowset result(*this);
+	result.insert(result.begin(), begin(), end());
+	
+	return result.orderBy(items);
+}
+
+void category::clear()
+{
+	delete m_head;
+	m_head = m_tail = nullptr;
+	
+	if (m_index != nullptr)
+	{
+		delete m_index;
+		m_index = new cat_index(this);
+	}
+}
+
+template<class Iter>
+tuple<row,bool> category::emplace(Iter b, Iter e)
+{
+	// First, make sure all mandatory fields are supplied
+	tuple<row,bool> result = make_tuple(row(), true);
+
+	if (m_cat_validator != nullptr and b != e)
+	{
+		for (auto& col: m_columns)
+		{
+			auto iv = m_cat_validator->get_validator_for_item(col.m_name);
+	
+			if (iv == nullptr)
+				continue;
+			
+			bool seen = false;
+			
+			for (auto v = b; v != e; ++v)
+			{
+				if (iequals(v->name(), col.m_name))
+				{
+					seen = true;
+					break;
+				}
+			}
+			
+			if (not seen and iv->m_mandatory)
+				throw runtime_error("missing mandatory field " + col.m_name + " for category " + m_name);
+		}
+		
+		if (m_index != nullptr)
+		{
+			unique_ptr<item_row> nr(new item_row{nullptr, this, nullptr});
+			row r(nr.get());
+			auto keys = key_fields(); 
+			
+			for (auto v = b; v != e; ++v)
+			{
+				if (keys.count(v->name()))
+					r.assign(v->name(), v->value(), true);
+			}
+			
+			auto test = m_index->find(nr.get());
+			if (test != nullptr)
+			{
+				if (VERBOSE > 1)
+					cerr << "Not inserting new record in " << m_name << " (duplicate key)" << endl;
+				result = make_tuple(row(test), false);
+			}
+		}
+	}
+	
+	if (get<1>(result))
+	{
+		auto nr = new item_row{nullptr, this, nullptr};
+
+		if (m_head == nullptr)
+		{
+			assert(m_tail == nullptr);
+			m_head = m_tail = nr;
+		}
+		else
+		{
+			assert(m_tail != nullptr);
+			assert(m_head != nullptr);
+			m_tail->m_next = nr;
+			m_tail = nr;
+		}
+
+		row r(nr);
+
+		for (auto v = b; v != e; ++v)
+			r.assign(*v, true);
+		
+		get<0>(result) = r;
+
+		if (m_index != nullptr)
+			m_index->insert(nr);
+	}
+	
+	return result;
+}
+
+tuple<row,bool> category::emplace(row r)
+{
+	return emplace(r.begin(), r.end());
+}
+
+void category::erase(condition&& cond)
+{
+	rowset remove(*this);
+
+	for (auto r: *this)
+	{
+		if (cond(*this, r))
+			remove.push_back(r);
+	}
+
+	for (auto r: remove)
+		erase(r);
+}
+
+void category::erase(iterator p)
+{
+	erase(*p);
+}
+
+void category::erase(row r)
+{
+	iset keys;
+	if (m_cat_validator)
+		keys = iset(m_cat_validator->m_keys.begin(), m_cat_validator->m_keys.end());
+	
+	for (auto& col: m_columns)
+	{
+		auto iv = col.m_validator;
+		if (iv == nullptr or iv->m_children.empty())
+			continue;
+		
+		if (not keys.count(col.m_name))
+			continue;
+		
+		const char* value = r[col.m_name].c_str();
+		
+		for (auto child: iv->m_children)
+		{
+			if (child->m_category == nullptr)
+				continue;
+			
+			auto child_cat = m_db.get(child->m_category->m_name);
+			if (child_cat == nullptr)
+				continue;
+				
+			auto rows = child_cat->find(key(child->m_tag) == value);
+			for (auto& cr: rows)
+				child_cat->erase(cr);
+		}
+	}
+
+	if (m_head == nullptr)
+		throw runtime_error("erase");
+
+	if (m_index != nullptr)
+		m_index->erase(r.m_data);
+	
+	if (r == m_head)
+	{
+		m_head = m_head->m_next;
+		r.m_data->m_next = nullptr;
+		delete r.m_data;
+	}
+	else
+	{
+		for (auto pi = m_head; pi != nullptr; pi = pi->m_next)
+		{
+			if (pi->m_next == r.m_data)
+			{
+				pi->m_next = r.m_data->m_next;
+				r.m_data->m_next = nullptr;
+				delete r.m_data;
+				break;
+			}
+		}
+	}
+}
+
+void category::get_tag_order(vector<string>& tags) const
+{
+	for (auto& c: m_columns)
+		tags.push_back("_" + m_name + "." + c.m_name);
+}
+
+const detail::item_reference category::get_first_item(const char* item_name) const
+{
+	return detail::item_reference{item_name, m_head};
+}
+
+category::iterator category::begin()
+{
+	return iterator(m_head);
+}
+
+category::iterator category::end()
+{
+	return iterator(nullptr);
+}
+
+void category::validate()
+{
+	if (m_validator == nullptr)
+		throw runtime_error("no validator specified");
+
+	if (empty())
+	{
+		if (VERBOSE > 2)
+			cerr << "Skipping validation of empty category " << m_name << endl;
+		return;
+	}
+	
+	if (m_cat_validator == nullptr)
+	{
+		m_validator->report_error("undefined category " + m_name);
+		return;
+	}
+	
+	auto mandatory = m_cat_validator->m_mandatory_fields;
+
+	for (auto& col: m_columns)
+	{
+		auto iv = m_cat_validator->get_validator_for_item(col.m_name);
+		if (iv == nullptr)
+			m_validator->report_error("Field " + col.m_name + " is not valid in category " + m_name);
+		
+		col.m_validator = iv;
+		
+		mandatory.erase(col.m_name);
+	}
+	
+	if (not mandatory.empty())
+		m_validator->report_error("In category " + m_name + " the following mandatory fields are missing: " + ba::join(mandatory, ", "));
+	
+	// check index?
+	if (m_index)
+	{
+#if not defined(NDEBUG)
+		m_index->validate();
+		for (auto r: *this)
+		{
+			if (m_index->find(r.m_data) != r.m_data)
+				m_validator->report_error("Key not found in index for category " + m_name);
+		}
+#endif
+	}
+	
+	// validate all values
+	mandatory = m_cat_validator->m_mandatory_fields;
+	
+	for (auto ri = m_head; ri != nullptr; ri = ri->m_next)
+	{
+		for (size_t cix = 0; cix < m_columns.size(); ++cix)
+		{
+			bool seen = false;
+			auto iv = m_columns[cix].m_validator;
+			
+			if (iv == nullptr)
+			{
+				m_validator->report_error("invalid field " + m_columns[cix].m_name + " for category " + m_name);
+				continue;
+			}
+			
+			for (auto vi = ri->m_values; vi != nullptr; vi = vi->m_next)
+			{
+				if (vi->m_column_index == cix)
+				{
+					seen = true;
+ 					(*iv)(vi->m_text);
+				}
+			}
+			
+			if (seen)
+				continue;
+			
+			if (iv != nullptr and iv->m_mandatory)
+				m_validator->report_error("missing mandatory field " + m_columns[cix].m_name + " for category " + m_name);
+		}
+	}
+}
+
+const validator& category::get_validator() const
+{
+	if (m_validator == nullptr)
+		throw runtime_error("no validator defined yet");
+	return *m_validator;
+}
+
+iset category::fields() const
+{
+	if (m_validator == nullptr)
+		throw runtime_error("No validator specified");
+	
+	if (m_cat_validator == nullptr)
+		m_validator->report_error("undefined category");
+	
+	iset result;
+	for (auto& iv: m_cat_validator->m_item_validators)
+		result.insert(iv.m_tag);
+	return result;
+}
+
+iset category::mandatory_fields() const
+{
+	if (m_validator == nullptr)
+		throw runtime_error("No validator specified");
+	
+	if (m_cat_validator == nullptr)
+		m_validator->report_error("undefined category");
+	
+	return m_cat_validator->m_mandatory_fields;
+}
+
+iset category::key_fields() const
+{
+	if (m_validator == nullptr)
+		throw runtime_error("No validator specified");
+	
+	if (m_cat_validator == nullptr)
+		m_validator->report_error("undefined category");
+	
+	return iset{ m_cat_validator->m_keys.begin(), m_cat_validator->m_keys.end() };
+}
+
+auto category::iterator::operator++() -> iterator&
+{
+	m_current = row(m_current.data()->m_next);
+	return *this;
+}
+
+namespace detail
+{
+
+size_t write_value(ostream& os, string value, size_t offset, size_t width)
+{
+	if (value.find('\n') != string::npos or width == 0 or value.length() >= 132)					// write as text field
+	{
+		ba::replace_all(value, "\n;", "\n\\;");
+
+		if (offset > 0)
+			os << endl;
+		os << ';' << value;
+		if (not ba::ends_with(value, "\n"))
+			os << endl;
+		os << ';' << endl;
+		offset = 0;
+	}
+	else if (is_unquoted_string(value.c_str()))
+	{
+		os << value;
+
+		if (value.length() < width)
+		{
+			os << string(width - value.length(), ' ');
+			offset += width;
+		}
+		else
+		{
+			os << ' ';
+			offset += value.length() + 1;
+		}
+	}
+	else
+	{
+		bool done = false;
+		for (char q: { '\'', '"'})
+		{
+			auto p = value.find(q);	// see if we can use the quote character
+			while (p != string::npos and is_non_blank(value[p + 1]) and value[p + 1] != q)
+				p = value.find(q, p + 1);
+			
+			if (p != string::npos)
+				continue;
+			
+			os << q << value << q;
+
+			if (value.length() + 2 < width)
+			{
+				os << string(width - value.length() - 2, ' ');
+				offset += width;
+			}
+			else
+			{
+				os << ' ';
+				offset += value.length() + 1;
+			}
+
+			done = true;
+			break;
+		}
+		
+		if (not done)
+		{
+			if (offset > 0)
+				os << endl;
+			os << ';' << value << endl
+			   << ';' << endl;
+			offset = 0;
+		}
+	}
+
+	return offset;
+}
+	
+}
+
+void category::write(ostream& os, const vector<int>& order, bool includeEmptyColumns)
+{
+	if (empty())
+		return;
+	
+	// If the first row has a next, we need a loop_
+	bool need_loop = (m_head->m_next != nullptr);
+	
+	if (need_loop)
+	{
+		os << "loop_" << endl;
+		
+		vector<size_t> column_widths;
+		
+		for (auto cix: order)
+		{
+			auto& col = m_columns[cix];
+			os << '_' << m_name << '.' << col.m_name << ' ' << endl;
+			column_widths.push_back(2);
+		}
+		
+		for (auto row = m_head; row != nullptr; row = row->m_next)
+		{
+			for (auto v = row->m_values; v != nullptr; v = v->m_next)
+			{
+				if (strchr(v->m_text, '\n') == nullptr)
+				{
+					size_t l = strlen(v->m_text);
+					
+					if (not is_unquoted_string(v->m_text))
+						l += 2;
+
+					if (l >= 132)
+						continue;
+
+					if (column_widths[v->m_column_index] < l + 1)
+						column_widths[v->m_column_index] = l + 1;
+				}
+			}
+		}
+		
+		for (auto row = m_head; row != nullptr; row = row->m_next)	// loop over rows
+		{
+			size_t offset = 0;
+		
+			for (size_t cix: order)
+			{
+				size_t w = column_widths[cix];
+				
+				string s;
+				for (auto iv = row->m_values; iv != nullptr; iv = iv->m_next)
+				{
+					if (iv->m_column_index == cix)
+					{
+						s = iv->m_text;
+						break;
+					}
+				}
+				
+				if (s.empty())
+					s = "?";
+				
+				size_t l = s.length();
+				if (not is_unquoted_string(s.c_str()))
+					l += 2;
+				if (l < w)
+					l = w;
+
+				if (offset + l >= 132 and offset > 0)
+				{
+					os << endl;
+					offset = 0;
+				}
+				
+				offset = detail::write_value(os, s, offset, w);
+				
+				if (offset >= 132)
+				{
+					os << endl;
+					offset = 0;
+				}
+			}
+			
+			if (offset > 0)
+				os << endl;
+		}
+	}
+	else
+	{
+		// first find the indent level 
+		size_t l = 0;
+		
+		for (auto& col: m_columns)
+		{
+			string tag = '_' + m_name + '.' + col.m_name;
+			
+			if (l < tag.length())
+				l = tag.length();
+		}
+		
+		l += 3;
+		
+		for (size_t cix: order)
+		{
+			auto& col = m_columns[cix];
+			
+			os << '_' << m_name << '.' << col.m_name << string(l - col.m_name.length() - m_name.length() - 2, ' ');
+			
+			string s;
+			for (auto iv = m_head->m_values; iv != nullptr; iv = iv->m_next)
+			{
+				if (iv->m_column_index == cix)
+				{
+					s = iv->m_text;
+					break;
+				}
+			}
+			
+			if (s.empty())
+				s = "?";
+			
+			size_t offset = l;
+			if (s.length() + l >= kMaxLineLength)
+			{
+				os << endl;
+				offset = 0;
+			}
+
+			if (detail::write_value(os, s, offset, 1) != 0)
+				os << endl;
+		}
+	}
+
+	os << "# " << endl;
+}
+
+void category::write(ostream& os)
+{
+	vector<int> order(m_columns.size());
+	iota(order.begin(), order.end(), 0);
+	write(os, order, false);
+}
+
+void category::write(ostream& os, const vector<string>& columns)
+{
+	// make sure all columns are present
+	for (auto& c: columns)
+		add_column(c);
+	
+	vector<int> order;
+	order.reserve(m_columns.size());
+
+	for (auto& c: columns)
+		order.push_back(get_column_index(c));
+
+	for (size_t i = 0; i < m_columns.size(); ++i)
+	{
+		if (std::find(order.begin(), order.end(), i) == order.end())
+			order.push_back(i);
+	}
+
+	write(os, order, true);
+}
+
+// --------------------------------------------------------------------
+
+row::row(const row& rhs)
+	: m_data(rhs.m_data)
+{
+}
+
+row& row::operator=(const row& rhs)
+{
+	m_data = rhs.m_data;
+	return *this;
+}
+
+void row::assign(const string& name, const string& value, bool emplacing)
+{
+	if (m_data == nullptr)
+		throw logic_error("invalid row, no data");
+	
+	auto cat = m_data->m_category;
+	auto cix = cat->add_column(name);
+	auto& col = cat->m_columns[cix];
+//	auto& db = cat->m_db;
+
+	const char* oldValue = nullptr;
+	for (auto iv = m_data->m_values; iv != nullptr; iv = iv->m_next)
+	{
+		assert(iv != iv->m_next and (iv->m_next == nullptr or iv != iv->m_next->m_next));
+
+		if (iv->m_column_index == cix)
+		{
+			oldValue = iv->m_text;
+			break;
+		}
+	}
+	
+	if (oldValue != nullptr and value == oldValue)	// no need to update
+		return;
+
+	// check the value
+	if (col.m_validator)
+		(*col.m_validator)(value);
+
+	// If the field is part of the key for this category, remove it from the index
+	// before updating
+	
+	bool reinsert = false;
+	
+	if (not emplacing)	// an update of an item's value
+	{
+////#if DEBUG
+////		if (VERBOSE)
+////			cerr << "reassigning the value of key field _" << cat->m_name << '.' << name << endl;
+////#endif
+//		// see if we need to update any child categories that depend on this value
+//		auto iv = col.m_validator;
+//		if (iv != nullptr and not iv->m_children.empty())
+//		{
+//			for (auto child: iv->m_children)
+//			{
+//				if (child->m_category == nullptr)
+//					continue;
+//				
+//				auto child_cat = db.get(child->m_category->m_name);
+//				if (child_cat == nullptr)
+//					continue;
+//					
+//				auto rows = child_cat->find(key(child->m_tag) == oldValue);
+//				for (auto& cr: rows)
+//					cr.assign(child->m_tag, value, false);
+//			}
+//		}
+
+		if (cat->m_index != nullptr and cat->key_fields().count(name))
+		{
+			reinsert = cat->m_index->find(m_data);
+			if (reinsert)
+				cat->m_index->erase(m_data);
+		}
+	}
+
+	// first remove old value with cix
+
+	if (m_data->m_values == nullptr)
+		;	// nothing to do
+	else if (m_data->m_values->m_column_index == cix)
+	{
+		auto iv = m_data->m_values;
+		m_data->m_values = iv->m_next;
+		iv->m_next = nullptr;
+		delete iv;
+	}
+	else
+	{
+		for (auto iv = m_data->m_values; iv->m_next != nullptr; iv = iv->m_next)
+		{
+			if (iv->m_next->m_column_index == cix)
+			{
+				auto nv = iv->m_next;
+				iv->m_next = nv->m_next;
+				nv->m_next = nullptr;
+				delete nv;
+				
+				break;
+			}
+		}
+	}
+
+#if DEBUG
+	for (auto iv = m_data->m_values; iv != nullptr; iv = iv->m_next)
+		assert(iv != iv->m_next and (iv->m_next == nullptr or iv != iv->m_next->m_next));
+#endif
+
+	if (not value.empty())
+	{
+		auto nv = new(value.length()) item_value(value.c_str(), cix);
+	
+		if (m_data->m_values == nullptr)
+			m_data->m_values = nv;
+		else
+		{
+			auto iv = m_data->m_values;
+			while (iv->m_next != nullptr)
+				iv = iv->m_next;
+			iv->m_next = nv;
+		}
+	}
+
+#if DEBUG
+	for (auto iv = m_data->m_values; iv != nullptr; iv = iv->m_next)
+		assert(iv != iv->m_next and (iv->m_next == nullptr or iv != iv->m_next->m_next));
+#endif
+
+	if (reinsert)
+		cat->m_index->insert(m_data);
+}
+
+void row::assign(const item& value, bool emplacing)
+{
+	assign(value.name(), value.value(), emplacing);
+}
+
+bool row::empty() const
+{
+	return m_data == nullptr or m_data->m_values == nullptr;
+}
+
+auto row::begin() const -> const_iterator
+{
+	return const_iterator(m_data, m_data->m_values);
+}
+
+auto row::end() const -> const_iterator
+{
+	return const_iterator(m_data, nullptr);
+}
+
+row::const_iterator::const_iterator(item_row* data, item_value* ptr)
+	: m_data(data), m_ptr(ptr)
+{
+	if (m_ptr != nullptr)
+		fetch();
+}
+
+row::const_iterator& row::const_iterator::operator++()
+{
+	if (m_ptr != nullptr)
+		m_ptr = m_ptr->m_next;
+
+	if (m_ptr != nullptr)
+		fetch();
+	
+	return *this;
+}
+
+void row::const_iterator::fetch()
+{
+	m_current = item(
+		m_data->m_category->get_column_name(m_ptr->m_column_index),
+		m_ptr->m_text);
+}
+
+// --------------------------------------------------------------------
+
+file::file()
+	: m_head(nullptr)
+	, m_validator(nullptr)
+{
+}
+
+file::file(istream& is, bool validate)
+	: file()
+{
+//	parser p(is, *this);
+//	p.parse_file();
+	load(is);
+}
+
+file::file(file&& rhs)
+	: m_head(nullptr), m_validator(nullptr)
+{
+	swap(m_head, rhs.m_head);
+	swap(m_validator, rhs.m_validator);
+}
+
+file::~file()
+{
+	delete m_head;
+	delete m_validator;
+}
+
+void file::append(datablock* e)
+{
+	e->set_validator(m_validator);
+	
+	if (m_head == nullptr)
+		m_head = e;
+	else
+	{
+		auto ie = m_head;
+		for (;;)
+		{
+			if (iequals(ie->name(), e->name()))
+				throw validation_error("datablock " + e->name() + " already defined in file");
+
+			if (ie->m_next == nullptr)
+			{
+				ie->m_next = e;
+				break;
+			}
+			
+			ie = ie->m_next;
+		}
+	}
+}
+
+void file::load(istream& is)
+{
+	validator* saved = m_validator;
+	set_validator(nullptr);
+
+	parser p(is, *this);
+	p.parse_file();
+	
+	if (saved != nullptr)
+	{
+		set_validator(saved);
+		validate();
+	}
+}
+
+void file::save(ostream& os)
+{
+	datablock* e = m_head;
+	while (e != nullptr)
+	{
+		e->write(os);
+		e = e->m_next;
+	}
+}
+
+void file::write(ostream& os, const vector<string>& order)
+{
+	datablock* e = m_head;
+	while (e != nullptr)
+	{
+		e->write(os, order);
+		e = e->m_next;
+	}
+}
+
+datablock& file::operator[](const string& name)
+{
+	datablock* result = m_head;
+	while (result != nullptr and not iequals(result->m_name, name))
+		result = result->m_next;
+	if (result == nullptr)
+		throw runtime_error("datablock " + name + " does not exist");
+	return *result;
+}
+
+void file::validate()
+{
+	if (m_validator == nullptr)
+	{
+		if (VERBOSE)
+			cerr << "No dictionary loaded explicitly, loading default" << endl;
+		
+		load_dictionary();
+	}
+
+	for (auto d = m_head; d != nullptr; d = d->m_next)
+		d->validate();
+}
+
+const validator& file::get_validator() const
+{
+	if (m_validator == nullptr)
+		throw runtime_error("no validator defined yet");
+	return *m_validator;
+}
+
+void file::load_dictionary()
+{
+	load_dictionary("mmcif_ddl");
+}
+
+void file::load_dictionary(const char* dict)
+{
+	fs::path dict_file = string("dictionaries/") + dict + ".dic";
+	
+#if defined(USE_RSRC)
+	mrsrc::rsrc dict_data(dict_file.string());
+
+	if (not dict_data)
+		throw invalid_argument("no such dictionary");
+	
+	struct membuf : public streambuf
+	{
+		membuf(char* dict, size_t length)
+		{
+			this->setg(dict, dict, dict + length);
+		}
+	} buffer(const_cast<char*>(dict_data.data()), dict_data.size());
+	
+	istream is(&buffer);
+#else
+	if (not fs::exists(dict_file))
+		throw runtime_error("Dictionary not found (" + dict_file.string() + ")");
+	fs::ifstream is(dict_file);
+#endif
+
+	load_dictionary(is);
+}
+
+void file::load_dictionary(istream& is)
+{
+	unique_ptr<validator> v(new validator());
+
+	dict_parser p(*v, is);
+	p.load_dictionary();
+
+	set_validator(v.release());
+}
+
+void file::set_validator(validator* v)
+{
+	m_validator = v;
+
+	for (auto d = m_head; d != nullptr; d = d->m_next)
+		d->set_validator(m_validator);
+}
+
+void file::get_tag_order(vector<string>& tags) const
+{
+	for (auto d = m_head; d != nullptr; d = d->m_next)
+		d->get_tag_order(tags);
+}
+
+auto file::iterator::operator++() -> iterator&
+{
+	m_current = m_current->m_next;
+	return *this;
+}
+
+auto file::begin() const -> iterator
+{
+	return iterator(m_head);
+}
+
+auto file::end() const -> iterator
+{
+	return iterator(nullptr);
+}
+
+}
--- a/src/cif-parser.cpp
+++ b/src/cif-parser.cpp
+// cif parsing library
+
+#include <set>
+
+#include <boost/algorithm/string.hpp>
+
+#include "libcif/cif++.h"
+#include "libcif/cif-parser.h"
+#include "libcif/cif-validator.h"
+
+using namespace std;
+namespace ba = boost::algorithm;
+
+extern int VERBOSE;
+
+namespace cif
+{
+
+const uint32 kMaxLineLength = 132;
+
+const uint8 kCharTraitsTable[128] = {
+	//	0	1	2	3	4	5	6	7	8	9	a	b	c	d	e	f
+		14,	15,	14,	14,	14,	15,	15,	14,	15,	15,	15,	15,	15,	15,	15,	15,	//	2
+		15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	10,	15,	15,	15,	15,	//	3
+		15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	//	4
+		15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	14,	15,	14,	15,	14,	//	5
+		15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	//	6
+		15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	15,	0,	//	7
+};
+
+// --------------------------------------------------------------------
+
+cif_parser_error::cif_parser_error(uint32 line_nr, const string& message)
+	: runtime_error("parse error at line " + to_string(line_nr) + ": " + message)
+{
+}
+
+// --------------------------------------------------------------------
+
+const char* sac_parser::kTokenName[] = {
+	"unknown",
+	"EOF",
+	"DATA",
+	"LOOP",
+	"GLOBAL",
+	"SAVE",
+	"STOP",
+	"Tag",
+	"Value"
+};
+
+const char* sac_parser::kValueName[] = {
+	"Int",
+	"Float",
+	"Numeric",
+	"String",
+	"TextField",
+	"Inapplicable",
+	"Unknown"
+};
+
+// --------------------------------------------------------------------
+
+sac_parser::sac_parser(std::istream& is)
+	: m_data(is)
+{
+	m_validate = true;
+	m_line_nr = 1;
+	m_bol = true;
+	m_lookahead = get_next_token();
+}
+
+void sac_parser::error(const string& msg)
+{
+	throw cif_parser_error(m_line_nr, msg);
+}
+
+// get_next_char takes a char from the buffer, or if it is empty
+// from the istream. This function also does carriage/linefeed
+// translation.
+int sac_parser::get_next_char()
+{
+	int result;
+
+	if (m_buffer.empty())
+		result = m_data.get();
+	else
+	{
+		result = m_buffer.top();
+		m_buffer.pop();
+	}
+	
+	// very simple CR/LF translation into LF
+	if (result == '\r')
+	{
+		int lookahead = m_data.get();
+		if (lookahead != '\n')
+			m_buffer.push(lookahead);
+		result = '\n';
+	}
+	
+	m_token_value += static_cast<char>(result);
+	
+	if (result == '\n')
+		++m_line_nr;
+	
+	if (VERBOSE >= 6)
+	{
+		cerr << "get_next_char => ";
+		if (iscntrl(result) or not isprint(result))
+			cerr << int(result) << endl;
+		else
+			cerr << char(result) << endl;
+	}
+	
+	return result;
+}
+
+void sac_parser::retract()
+{
+	assert(not m_token_value.empty());
+
+	char ch = m_token_value.back();
+	if (ch == '\n')
+		--m_line_nr;
+	
+	m_buffer.push(ch);
+	m_token_value.pop_back();
+}
+
+void sac_parser::restart()
+{
+	while (not m_token_value.empty())
+		retract();
+	
+	switch (m_start)
+	{
+		case eStateStart:
+			m_state = m_start = eStateFloat;
+			break;
+		
+		case eStateFloat:
+			m_state = m_start = eStateInt;
+			break;
+		
+		case eStateInt:
+			m_state = m_start = eStateValue;
+			break;
+		
+		default:
+			error("Invalid state in sac_parser");
+	}
+	
+	m_bol = false;
+}
+
+void sac_parser::match(sac_parser::CIFToken t)
+{
+	if (m_lookahead != t)
+		error(string("Unexpected token, expected ") + kTokenName[t] + " but found " + kTokenName[m_lookahead]);
+	
+	m_lookahead = get_next_token();
+}
+
+sac_parser::CIFToken sac_parser::get_next_token()
+{
+	const auto kEOF = char_traits<char>::eof();
+	
+	CIFToken result = eCIFTokenUnknown;
+	int quoteChar = 0;
+	m_state = m_start = eStateStart;
+	m_bol = false;
+	
+	m_token_value.clear();
+	m_token_type = eCIFValueUnknown;
+	
+	while (result == eCIFTokenUnknown)
+	{
+		auto ch = get_next_char();
+		
+		switch (m_state)
+		{
+			case eStateStart:
+				if (ch == kEOF)
+					result = eCIFTokenEOF;
+				else if (ch == '\n')
+				{
+					m_bol = true;
+					m_state = eStateWhite;
+				}
+				else if (ch == ' ' or ch == '\t')
+					m_state = eStateWhite;
+				else if (ch == '#')
+					m_state = eStateComment;
+				else if (ch == '.')
+					m_state = eStateDot;
+				else if (ch == '_')
+					m_state = eStateTag;
+				else if (ch == ';' and m_bol)
+					m_state = eStateTextField;
+				else if (ch == '\'' or ch == '"')
+				{
+					quoteChar = ch;
+					m_state = eStateQuotedString;
+				}
+				else if (ch == '?')
+					m_state = eStateQuestionMark;
+				else
+					restart();
+				break;
+			
+			case eStateWhite:
+				if (ch == kEOF)
+					result = eCIFTokenEOF;
+				else if (not isspace(ch))
+				{
+					m_state = eStateStart;
+					retract();
+					m_token_value.clear();
+				}
+				else
+					m_bol = (ch == '\n');
+				break;
+			
+			case eStateComment:
+				if (ch == '\n')
+				{
+					m_state = eStateStart;
+					m_bol = true;
+					m_token_value.clear();
+				}
+				else if (ch == kEOF)
+					result = eCIFTokenEOF;
+				else if (not is_any_print(ch))
+					error("invalid character in comment");
+				break;
+			
+			case eStateQuestionMark:
+				if (is_non_blank(ch))
+					m_state = eStateValue;
+				else
+				{
+					retract();
+					result = eCIFTokenValue;
+					m_token_value.clear();
+					m_token_type = eCIFValueUnknown;
+				}
+				break;
+
+			case eStateDot:
+				if (isdigit(ch))
+					m_state = eStateFloat + 2;
+				else if (isspace(ch))
+				{
+					retract();
+					result = eCIFTokenValue;
+					m_token_type = eCIFValueInapplicable;
+				}
+				else
+					m_state = eStateValue;
+				break;
+
+			case eStateTextField:
+				if (ch == '\n')
+					m_state = eStateTextField + 1;
+				else if (ch == kEOF)
+					error("unterminated textfield");
+				else if (not is_any_print(ch))
+//					error("invalid character in text field '" + string({ static_cast<char>(ch) }) + "' (" + to_string((int)ch) + ")");
+					cerr << "invalid character in text field '" << string({ static_cast<char>(ch) }) << "' (" << ch << ") line: " << m_line_nr << endl;
+				break;
+			
+			case eStateTextField + 1:
+				if (is_text_lead(ch) or ch == ' ' or ch == '\t')
+					m_state = eStateTextField;
+				else if (ch == ';')
+				{
+					assert(m_token_value.length() >= 2);
+					m_token_value = m_token_value.substr(1, m_token_value.length() - 3);
+					m_token_type = eCIFValueTextField;
+					result = eCIFTokenValue;
+				}
+				else if (ch == kEOF)
+					error("unterminated textfield");
+				else if (ch != '\n')
+					error("invalid character in text field");
+				break;
+			
+			case eStateQuotedString:
+				if (ch == kEOF)
+					error("unterminated quoted string");
+				else if (ch == quoteChar)
+					m_state = eStateQuotedStringQuote;
+				else if (not is_any_print(ch))
+					error("invalid character in quoted string");
+				break;
+			
+			case eStateQuotedStringQuote:
+				if (is_white(ch))
+				{
+					retract();
+					result = eCIFTokenValue;
+					m_token_type = eCIFValueString;
+					
+					assert(m_token_value.length() >= 3);
+					m_token_value = m_token_value.substr(1, m_token_value.length() - 2);
+				}
+				else if (ch == quoteChar)
+					;
+				else if (is_any_print(ch))
+					m_state = eStateQuotedString;
+				else if (ch == kEOF)
+					error("unterminated quoted string");
+				else
+					error("invalid character in quoted string");
+				break;
+			
+			case eStateTag:
+				if (not is_non_blank(ch))
+				{
+					retract();
+					result = eCIFTokenTag;
+				}
+				break;
+			
+			case eStateFloat:
+				if (ch == '+' or ch == '-')
+				{
+					m_state = eStateFloat + 1;
+				}
+				else if (isdigit(ch))
+					m_state = eStateFloat + 1;
+				else
+					restart();
+				break;
+			
+			case eStateFloat + 1:
+//				if (ch == '(')	// numeric???
+//					m_state = eStateNumericSuffix;
+//				else
+				if (ch == '.')
+					m_state = eStateFloat + 2;
+				else if (tolower(ch) == 'e')
+					m_state = eStateFloat + 3;
+				else if (is_white(ch) or ch == kEOF)
+				{
+					retract();
+					result = eCIFTokenValue;
+					m_token_type = eCIFValueInt;
+				}
+				else
+					restart();
+				break;
+			
+			// parsed '.'
+			case eStateFloat + 2:
+//				if (ch == '(')	// numeric???
+//					m_state = eStateNumericSuffix;
+//				else
+				if (tolower(ch) == 'e')
+					m_state = eStateFloat + 3;
+				else if (is_white(ch) or ch == kEOF)
+				{
+					retract();
+					result = eCIFTokenValue;
+					m_token_type = eCIFValueFloat;
+				}
+				else
+					restart();
+				break;
+			
+			// parsed 'e'
+			case eStateFloat + 3:
+				if (ch == '-' or ch == '+')
+					m_state = eStateFloat + 4;
+				else if (isdigit(ch))
+					m_state = eStateFloat + 5;
+				else
+					restart();
+				break;
+
+			case eStateFloat + 4:
+				if (isdigit(ch))
+					m_state = eStateFloat + 5;
+				else
+					restart();
+				break;
+			
+			case eStateFloat + 5:
+//				if (ch == '(')
+//					m_state = eStateNumericSuffix;
+//				else
+				if (is_white(ch) or ch == kEOF)
+				{
+					retract();
+					result = eCIFTokenValue;
+					m_token_type = eCIFValueFloat;
+				}
+				else
+					restart();
+				break;
+			
+			case eStateInt:
+				if (isdigit(ch) or ch == '+' or ch == '-')
+					m_state = eStateInt + 1;
+				else
+					restart();
+				break;
+			
+			case eStateInt + 1:
+				if (is_white(ch) or ch == kEOF)
+				{
+					retract();
+					result = eCIFTokenValue;
+					m_token_type = eCIFValueInt;
+				}
+				else
+					restart();
+				break;
+			
+//			case eStateNumericSuffix:
+//				if (isdigit(ch))
+//					m_state = eStateNumericSuffix + 1;
+//				else
+//					restart();
+//				break;
+//			
+//			case eStateNumericSuffix + 1:
+//				if (ch == ')')
+//				{
+//					result = eCIFTokenValue;
+//					m_token_type = eCIFValueNumeric;
+//				}
+//				else if (not isdigit(ch))
+//					restart();
+//				break;
+			
+			case eStateValue:
+				if (is_non_blank(ch))
+					m_state = eStateValue + 1;
+				else
+					error("invalid character at this position");
+				break;
+			
+			case eStateValue + 1:
+				if (ch == '_')		// first _, check for keywords
+				{
+					string s = to_lower_copy(m_token_value);
+					
+					if (s == "global_")
+						result = eCIFTokenGLOBAL;
+					else if (s == "stop_")
+						result = eCIFTokenSTOP;
+					else if (s == "loop_")
+						result = eCIFTokenLOOP;
+					else if (s == "data_" or s == "save_")
+						m_state = eStateValue + 2;
+				}
+				else if (not is_non_blank(ch))
+				{
+					retract();
+					result = eCIFTokenValue;
+					m_token_type = eCIFValueString;
+				}
+				break;
+
+			case eStateValue + 2:
+				if (not is_non_blank(ch))
+				{
+					retract();
+					
+					if (tolower(m_token_value[0]) == 'd')
+						result = eCIFTokenDATA;
+					else
+						result = eCIFTokenSAVE;
+					
+					m_token_value.erase(m_token_value.begin(), m_token_value.begin() + 5); 
+				}
+				break;
+			
+			default:
+				assert(false);
+				error("Invalid state in get_next_token");
+				break;
+		}
+	}
+
+	if (VERBOSE >= 5)
+	{
+		cerr << kTokenName[result];
+		if (m_token_type != eCIFValueUnknown)
+			cerr << ' ' << kValueName[m_token_type];
+		if (result != eCIFTokenEOF)
+			cerr << " '" << m_token_value << '\'';
+		cerr << endl;
+	}
+	
+	return result;
+}
+
+void sac_parser::parse_file()
+{
+	try
+	{
+		while (m_lookahead != eCIFTokenEOF)
+		{
+			switch (m_lookahead)
+			{
+				case eCIFTokenGLOBAL:
+					parse_global();
+					break;
+				
+				case eCIFTokenDATA:
+					produce_datablock(m_token_value);
+
+					match(eCIFTokenDATA);
+					parse_data_block();
+					break;
+				
+				default:
+					error("This file does not seem to be an mmCIF file");
+					break;
+			}
+		}
+	}
+	catch (const exception& ex)
+	{
+		error(string("Error parsing file: '") + ex.what() + "'");
+	}
+}
+
+void sac_parser::parse_global()
+{
+	match(eCIFTokenGLOBAL);
+	while (m_lookahead == eCIFTokenTag)
+	{
+		match(eCIFTokenTag);
+		match(eCIFTokenValue);
+	}
+}
+
+void sac_parser::parse_data_block()
+{
+	string cat;
+	
+	while (m_lookahead == eCIFTokenLOOP or m_lookahead == eCIFTokenTag or m_lookahead == eCIFTokenSAVE)
+	{
+		switch (m_lookahead)
+		{
+			case eCIFTokenLOOP:
+			{
+				cat.clear();	// should start a new category
+				
+				match(eCIFTokenLOOP);
+				
+				vector<string> tags;
+				
+				while (m_lookahead == eCIFTokenTag)
+				{
+					string cat_name, item_name;
+					std::tie(cat_name, item_name) = split_tag_name(m_token_value);
+					
+					if (cat.empty())
+					{
+						produce_category(cat_name);
+						cat = cat_name;
+					}
+					else if (not iequals(cat, cat_name))
+						error("inconsistent categories in loop_");
+					
+					tags.push_back(item_name);
+
+					match(eCIFTokenTag);
+				}
+				
+				while (m_lookahead == eCIFTokenValue)
+				{
+					produce_row();
+					
+					for (auto tag: tags)
+					{
+						produce_item(cat, tag, m_token_value);
+						match(eCIFTokenValue);
+					}
+				}
+				
+				cat.clear();
+				break;
+			}
+		
+			case eCIFTokenTag:
+			{
+				string cat_name, item_name;
+				std::tie(cat_name, item_name) = split_tag_name(m_token_value);
+
+				if (not iequals(cat, cat_name))
+				{
+					produce_category(cat_name);
+					cat = cat_name;
+					produce_row();
+				}
+
+				match(eCIFTokenTag);
+				
+				produce_item(cat, item_name, m_token_value);
+
+				match(eCIFTokenValue);
+				break;
+			}
+			
+			case eCIFTokenSAVE:
+				parse_save_frame();
+				break;
+			
+			default:
+				assert(false);
+				break;
+		}
+	}
+}
+
+void sac_parser::parse_save_frame()
+{
+	error("A regular CIF file should not contain a save frame");
+}
+
+// --------------------------------------------------------------------
+
+parser::parser(std::istream& is, file& f)
+	: sac_parser(is), m_file(f), m_data_block(nullptr)
+{
+}
+
+void parser::produce_datablock(const string& name)
+{
+	m_data_block = new datablock(name);
+	m_file.append(m_data_block);
+}
+
+void parser::produce_category(const string& name)
+{
+	if (VERBOSE >= 4)
+		cerr << "producing category " << name << endl;
+
+	std::tie(m_cat, ignore) = m_data_block->emplace(name);
+}
+
+void parser::produce_row()
+{
+	if (VERBOSE >= 4)
+		cerr << "producing row for category " << m_cat->name() << endl;
+
+	m_cat->emplace({});
+	m_row = m_cat->back();
+}
+
+void parser::produce_item(const string& category, const string& item, const string& value)
+{
+	if (VERBOSE >= 4)
+		cerr << "producing _" << category << '.' << item << " -> " << value << endl;
+
+	if (not iequals(category, m_cat->name()))
+		error("inconsistent categories in loop_");
+
+	m_row[item] = m_token_value;
+}
+
+// --------------------------------------------------------------------
+
+struct dict_parser_data_impl
+{
+	// temporary values for constructing dictionaries
+	vector<validate_category>			m_category_validators;
+	map<string,vector<validate_item>>	m_item_validators;
+};
+
+dict_parser::dict_parser(validator& validator, std::istream& is)
+	: parser(is, m_file), m_validator(validator), m_impl(new dict_parser_data_impl)
+{
+}
+
+dict_parser::~dict_parser()
+{
+	delete m_impl;
+}
+
+void dict_parser::parse_save_frame()
+{
+	if (not m_collected_item_types)
+		m_collected_item_types = collect_item_types();
+
+	string saveFrameName = m_token_value;
+
+	if (saveFrameName.empty())
+		error("Invalid save frame, should contain more than just 'save_' here");
+	
+	bool isCategorySaveFrame = m_token_value[0] != '_';
+	
+	datablock dict(m_token_value);
+	datablock::iterator cat = dict.end();
+
+	match(eCIFTokenSAVE);
+	while (m_lookahead == eCIFTokenLOOP or m_lookahead == eCIFTokenTag)
+	{
+		if (m_lookahead == eCIFTokenLOOP)
+		{
+			cat = dict.end();	// should start a new category
+				
+			match(eCIFTokenLOOP);
+			
+			vector<string> tags;
+			while (m_lookahead == eCIFTokenTag)
+			{
+				string cat_name, item_name;
+				std::tie(cat_name, item_name) = split_tag_name(m_token_value);
+					
+				if (cat == dict.end())
+					std::tie(cat, ignore) = dict.emplace(cat_name);
+				else if (not iequals(cat->name(), cat_name))
+					error("inconsistent categories in loop_");
+				
+				tags.push_back(item_name);
+				match(eCIFTokenTag);
+			}
+			
+			while (m_lookahead == eCIFTokenValue)
+			{
+				cat->emplace({});
+				auto row = cat->back();
+				
+				for (auto tag: tags)
+				{
+					row[tag] = m_token_value;
+					match(eCIFTokenValue);
+				}
+			}
+			
+			cat = dict.end();
+		}
+		else
+		{
+			string cat_name, item_name;
+			std::tie(cat_name, item_name) = split_tag_name(m_token_value);
+
+			if (cat == dict.end() or not iequals(cat->name(), cat_name))
+				std::tie(cat, ignore) = dict.emplace(cat_name);
+
+			match(eCIFTokenTag);
+			
+			if (cat->empty())
+				cat->emplace({});
+			cat->back()[item_name] = m_token_value;
+			
+			match(eCIFTokenValue);
+		}
+	}
+
+	match(eCIFTokenSAVE);
+	
+	if (isCategorySaveFrame)
+	{
+		string category = dict.first_item("_category.id");
+
+		vector<string> keys;
+		for (auto k: dict["category_key"])
+			keys.push_back(get<1>(split_tag_name(k["name"].as<string>())));
+		
+		iset groups;
+		for (auto g: dict["category_group"])
+			groups.insert(g["id"].as<string>());
+			
+		m_impl->m_category_validators.push_back(validate_category{category, keys, groups});
+	}
+	else
+	{
+		// if the type code is missing, this must be a pointer, just skip it
+		string type_code = dict.first_item("_item_type.code");
+
+		const validate_type* tv = nullptr;
+		if (not (type_code.empty() or type_code == "?"))
+			tv = m_validator.get_validator_for_type(type_code);
+
+		iset ess;
+		for (auto e: dict["item_enumeration"])
+			ess.insert(e["value"].as<string>());
+		
+		// collect the dict from our data_block and construct validators
+		for (auto i: dict["item"])
+		{
+			string tag_name, category, mandatory;
+			
+			cif::tie(tag_name, category, mandatory) = i.get("name", "category_id", "mandatory_code");
+			
+			string cat_name, item_name;
+			std::tie(cat_name, item_name) = split_tag_name(tag_name);
+			
+			if (cat_name.empty() or item_name.empty())
+				error("Invalid tag name in _item.name " + tag_name);
+
+			if (not iequals(category, cat_name) and not (category.empty() or category == "?"))
+				error("specified category id does match the implicit category name for tag '" + tag_name + '\'');
+			else
+				category = cat_name;
+			
+			auto& ivs = m_impl->m_item_validators[category];
+			
+			auto vi = find(ivs.begin(), ivs.end(), validate_item{item_name});
+			if (vi == ivs.end())
+				ivs.push_back(validate_item{item_name, iequals(mandatory, "yes"), tv, ess});
+			else
+			{
+				// need to update the item_validator?
+				if (vi->m_mandatory != (iequals(mandatory, "yes")))
+				{
+					if (VERBOSE > 2)
+					{
+						cerr << "inconsistent mandatory value for " << tag_name << " in dictionary" << endl;
+						
+						if (iequals(tag_name, saveFrameName))
+							cerr << "choosing " << mandatory << endl;
+						else
+							cerr << "choosing " << (vi->m_mandatory ? "Y" : "N") << endl;
+					}
+
+					if (iequals(tag_name, saveFrameName))
+						vi->m_mandatory = (iequals(mandatory, "yes"));
+				}
+
+				if (vi->m_type != nullptr and tv != nullptr and vi->m_type != tv)
+				{
+					if (VERBOSE > 1)
+						cerr << "inconsistent type for " << tag_name << " in dictionary" << endl;
+				}
+
+//				vi->m_mandatory = (iequals(mandatory, "yes"));
+				if (vi->m_type == nullptr)
+					vi->m_type = tv;
+
+				vi->m_enums.insert(ess.begin(), ess.end());
+
+				// anything else yet?
+				// ...
+			}
+		}
+	}
+}
+
+void dict_parser::link_items()
+{
+	if (not m_data_block)
+		error("no datablock");
+	
+	auto& dict = *m_data_block;
+	
+	for (auto gl: dict["pdbx_item_linked_group_list"])
+	{
+		string child, parent;
+		cif::tie(child, parent) = gl.get("child_name", "parent_name");
+		
+		auto civ = m_validator.get_validator_for_item(child);
+		if (civ == nullptr)
+			error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
+		
+		auto piv = m_validator.get_validator_for_item(parent);
+		if (piv == nullptr)
+			error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
+		
+		civ->set_parent(piv);
+	}
+	
+	// now make sure the item_type is specified for all item_validators
+	
+	for (auto& cv: m_validator.m_category_validators)
+	{
+		for (auto& iv: cv.m_item_validators)
+		{
+			if (iv.m_type == nullptr)
+				cerr << "Missing item_type for " << iv.m_tag << endl;
+		}
+	}	
+}
+
+void dict_parser::load_dictionary()
+{
+	unique_ptr<datablock> dict;
+	datablock* saved_datablock = m_data_block;
+	
+	try
+	{
+		while (m_lookahead != eCIFTokenEOF)
+		{
+			switch (m_lookahead)
+			{
+				case eCIFTokenGLOBAL:
+					parse_global();
+					break;
+				
+				default:
+				{
+					dict.reset(new datablock(m_token_value));	// dummy datablock, for constructing the validator only
+					m_data_block = dict.get();
+					
+					match(eCIFTokenDATA);
+					parse_data_block();
+					break;
+				}
+			}
+		}
+	}
+	catch (const exception& ex)
+	{
+		if (VERBOSE)
+			cerr << "Error parsing dictionary: '" << ex.what() << "'" << endl;
+	}
+
+	// store all validators
+	for (auto& ic: m_impl->m_category_validators)
+		m_validator.add_category_validator(move(ic));
+	m_impl->m_category_validators.clear();
+	
+	for (auto& iv: m_impl->m_item_validators)
+	{
+		auto cv = m_validator.get_validator_for_category(iv.first);
+		if (cv == nullptr)
+			error("Undefined category '" + iv.first);
+
+		for (auto& v: iv.second)
+			const_cast<validate_category*>(cv)->add_item_validator(move(v));
+	}
+		
+	// check all item validators for having a type_validator
+	
+	if (dict)
+		link_items();
+
+	// store meta information
+	datablock::iterator info;
+	bool n;
+	std::tie(info, n) = m_data_block->emplace("dictionary");
+	if (n)
+	{
+		auto r = info->front();
+		m_validator.dict_name(r["title"].as<string>());
+		m_validator.dict_version(r["version"].as<string>());
+	}
+
+	m_data_block = saved_datablock;
+
+	m_impl->m_item_validators.clear();
+}
+
+bool dict_parser::collect_item_types()
+{
+	bool result = false;
+	
+	if (not m_data_block)
+		error("no datablock");
+	
+	auto& dict = *m_data_block;
+	
+	for (auto& t: dict["item_type_list"])
+	{
+		auto ts = t.get("code", "primitive_code", "construct");
+
+		string code, primitive_code, construct;
+		cif::tie(code, primitive_code, construct) = ts;
+		
+		ba::replace_all(construct, "\\n", "\n");
+		ba::replace_all(construct, "\\t", "\t");
+		ba::replace_all(construct, "\\\n", "");
+		
+		validate_type v = {
+			code, map_to_primitive_type(primitive_code), boost::regex(construct, boost::regex::egrep)
+		};
+
+// Do not replace an already defined type validator, this won't work with pdbx_v40
+// as it has a name that is too strict for its own names :-)
+//		if (m_file_impl.m_type_validators.count(v))
+//			m_file_impl.m_type_validators.erase(v);
+		
+		m_validator.add_type_validator(move(v));
+
+		if (VERBOSE >= 5)
+			cerr << "Added type " << code << " (" << primitive_code << ") => " << construct << endl;
+		
+		result = true;
+	}
+	
+	return result;
+}
+
+
+}
--- a/src/cif-utils.cpp
+++ b/src/cif-utils.cpp
+// CIF parser
+
+#include "libcif/config.h"
+
+#include <tuple>
+#include <iostream>
+
+#include <boost/algorithm/string.hpp>
+
+#include "libcif/cif-utils.h"
+
+using namespace std;
+namespace ba = boost::algorithm;
+
+namespace cif
+{
+
+// --------------------------------------------------------------------
+// This really makes a difference, having our own tolower routines
+
+const uint8 kCharToLowerMap[256] =
+{
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 
+	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 
+	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 
+	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 
+	0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 
+	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 
+	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 
+	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 
+	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 
+	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 
+	0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 
+	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 
+	0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 
+	0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 
+	0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 
+	0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
+};
+
+// --------------------------------------------------------------------
+
+bool iequals(const string& a, const string& b)
+{
+	bool result = a.length() == b.length();
+	for (auto ai = a.begin(), bi = b.begin(); result and ai != a.end() and bi != b.end(); ++ai, ++bi)
+		result = tolower(*ai) == tolower(*bi);
+	return result;
+}
+
+bool iequals(const char* a, const char* b)
+{
+	bool result = true;
+	for (; result and *a and *b; ++a, ++b)
+		result = tolower(*a) == tolower(*b);
+
+	return result and *a == *b;
+}
+
+int icompare(const string& a, const string& b)
+{
+	int d = 0;
+	auto ai = a.begin(), bi = b.begin();
+	
+	for (; d == 0 and ai != a.end() and bi != b.end(); ++ai, ++bi)
+		d = tolower(*ai) - tolower(*bi);
+
+	if (d == 0)
+	{
+		if (ai != a.end())
+			d = 1;
+		else if (bi != b.end())
+			d = -1;
+	}
+	
+	return d;
+}
+
+int icompare(const char* a, const char* b)
+{
+	int d = 0;
+	
+	for (; d == 0 and *a != 0 and *b != 0; ++a, ++b)
+		d = tolower(*a) - tolower(*b);
+
+	if (d == 0)
+	{
+		if (*a != 0)
+			d = 1;
+		else if (*b != 0)
+			d = -1;
+	}
+	
+	return d;
+}
+
+void to_lower(string& s)
+{
+	for (auto& c: s)
+		c = tolower(c);
+}
+
+string to_lower_copy(const string& s)
+{
+	string result(s);
+	for (auto& c: result)
+		c = tolower(c);
+	return result;
+}
+
+// --------------------------------------------------------------------
+
+tuple<string,string> split_tag_name(const string& tag)
+{
+	if (tag.empty())
+		throw runtime_error("empty tag");
+	if (tag[0] != '_')
+		throw runtime_error("tag does not start with underscore");
+
+	auto s = tag.find('.');
+	if (s == string::npos)
+		throw runtime_error("tag does not contain dot");
+	return tuple<string,string>{
+		tag.substr(1, s - 1), tag.substr(s + 1)
+	};
+}	
+
+// --------------------------------------------------------------------
+// Simplified line breaking code taken from a decent text editor.
+// In this case, simplified means it only supports ASCII.
+
+enum LineBreakClass
+{
+	kLBC_OpenPunctuation,
+	kLBC_ClosePunctuation,
+	kLBC_CloseParenthesis,
+	kLBC_Quotation,
+	kLBC_NonBreaking,
+	kLBC_Nonstarter,
+	kLBC_Exlamation,
+	kLBC_SymbolAllowingBreakAfter,
+	kLBC_InfixNumericSeparator,
+	kLBC_PrefixNumeric,
+	kLBC_PostfixNumeric,
+	kLBC_Numeric,
+	kLBC_Alphabetic,
+	kLBC_Ideographic,
+	kLBC_Inseperable,
+	kLBC_Hyphen,
+	kLBC_BreakAfter,
+	kLBC_BreakBefor,
+	kLBC_BreakOpportunityBeforeAndAfter,
+	kLBC_ZeroWidthSpace,
+	kLBC_CombiningMark,
+	kLBC_WordJoiner,
+	kLBC_HangulLVSyllable,
+	kLBC_HangulLVTSyllable,
+	kLBC_HangulLJamo,
+	kLBC_HangulVJamo,
+	kLBC_HangulTJamo,
+
+	kLBC_MandatoryBreak,
+	kLBC_CarriageReturn,
+	kLBC_LineFeed,
+	kLBC_NextLine,
+	kLBC_Surrogate,
+	kLBC_Space,
+	kLBC_ContigentBreakOpportunity,
+	kLBC_Ambiguous,
+	kLBC_ComplexContext,
+	kLBC_Unknown
+};
+
+const LineBreakClass kASCII_LBTable[128] =
+{
+	kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark,
+	kLBC_CombiningMark, kLBC_BreakAfter, kLBC_LineFeed, kLBC_MandatoryBreak, kLBC_MandatoryBreak, kLBC_CarriageReturn, kLBC_CombiningMark, kLBC_CombiningMark,
+	kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark,
+	kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark,
+	kLBC_Space, kLBC_Exlamation, kLBC_Quotation, kLBC_Alphabetic, kLBC_PrefixNumeric, kLBC_PostfixNumeric, kLBC_Alphabetic, kLBC_Quotation,
+	kLBC_OpenPunctuation, kLBC_CloseParenthesis, kLBC_Alphabetic, kLBC_PrefixNumeric, 
+	
+	// comma treated differently here, it is not a numeric separator in PDB
+	kLBC_SymbolAllowingBreakAfter/*	kLBC_InfixNumericSeparator */,
+	
+	kLBC_Hyphen, kLBC_InfixNumericSeparator, kLBC_SymbolAllowingBreakAfter,
+	kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric,
+	kLBC_Numeric, kLBC_Numeric, kLBC_InfixNumericSeparator, kLBC_InfixNumericSeparator, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Exlamation,
+	kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
+	kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
+	kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
+	kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_OpenPunctuation, kLBC_PrefixNumeric, kLBC_CloseParenthesis, kLBC_Alphabetic, kLBC_Alphabetic,
+	kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
+	kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
+	kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
+	kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_OpenPunctuation, kLBC_BreakAfter, kLBC_ClosePunctuation, kLBC_Alphabetic, kLBC_CombiningMark
+};
+
+string::const_iterator next_line_break(string::const_iterator text, string::const_iterator end)
+{
+	if (text == end)
+		return text;
+	
+	enum break_action
+	{ 
+		DBK = 0, // direct break 	(blank in table)
+		IBK, 	// indirect break	(% in table)
+		PBK,	// prohibited break (^ in table)
+		CIB,	// combining indirect break
+		CPB		// combining prohibited break
+	};
+
+	const break_action brkTable[27][27] = {
+	//   	OP  	CL  	CP  	QU  	GL  	NS  	EX  	SY  	IS  	PR  	PO  	NU  	AL  	ID  	IN  	HY  	BA  	BB  	B2  	ZW  	CM  	WJ  	H2  	H3  	JL  	JV  	JT
+/* OP */ { 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	CPB, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK, 	PBK },
+/* CL */ { 	DBK, 	PBK, 	PBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	PBK, 	IBK, 	IBK, 	DBK, 	DBK, 	DBK, 	DBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK },
+/* CP */ { 	DBK, 	PBK, 	PBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	IBK, 	DBK, 	DBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK },
+/* QU */ { 	PBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	PBK, 	CIB, 	PBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK },
+/* GL */ { 	IBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	PBK, 	CIB, 	PBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK },
+/* NS */ { 	DBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK },
+/* EX */ { 	DBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK },
+/* SY */ { 	DBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	DBK, 	DBK, 	IBK, 	DBK, 	DBK, 	DBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK },
+/* IS */ { 	DBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	DBK, 	DBK, 	IBK, 	IBK, 	DBK, 	DBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK },
+/* PR */ { 	IBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	DBK, 	DBK, 	IBK, 	IBK, 	IBK, 	DBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK },
+/* PO */ { 	IBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	DBK, 	DBK, 	IBK, 	IBK, 	DBK, 	DBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK },
+/* NU */ { 	DBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	IBK, 	DBK, 	IBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK },
+/* AL */ { 	DBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	DBK, 	DBK, 	IBK, 	IBK, 	DBK, 	IBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK },
+/* ID */ { 	DBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	DBK, 	IBK, 	DBK, 	DBK, 	DBK, 	IBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK },
+/* IN */ { 	DBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	IBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK },
+/* HY */ { 	DBK, 	PBK, 	PBK, 	IBK, 	DBK, 	IBK, 	PBK, 	PBK, 	PBK, 	DBK, 	DBK, 	IBK, 	DBK, 	DBK, 	DBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK },
+/* BA */ { 	DBK, 	PBK, 	PBK, 	IBK, 	DBK, 	IBK, 	PBK, 	PBK, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK },
+/* BB */ { 	IBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	PBK, 	CIB, 	PBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK },
+/* B2 */ { 	DBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	IBK, 	IBK, 	DBK, 	PBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK },
+/* ZW */ { 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK },
+/* CM */ { 	DBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	DBK, 	DBK, 	IBK, 	IBK, 	DBK, 	IBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	DBK },
+/* WJ */ { 	IBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK, 	PBK, 	CIB, 	PBK, 	IBK, 	IBK, 	IBK, 	IBK, 	IBK },
+/* H2 */ { 	DBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	DBK, 	IBK, 	DBK, 	DBK, 	DBK, 	IBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	IBK, 	IBK },
+/* H3 */ { 	DBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	DBK, 	IBK, 	DBK, 	DBK, 	DBK, 	IBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	IBK },
+/* JL */ { 	DBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	DBK, 	IBK, 	DBK, 	DBK, 	DBK, 	IBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	IBK, 	IBK, 	IBK, 	IBK, 	DBK },
+/* JV */ { 	DBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	DBK, 	IBK, 	DBK, 	DBK, 	DBK, 	IBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	IBK, 	IBK },
+/* JT */ { 	DBK, 	PBK, 	PBK, 	IBK, 	IBK, 	IBK, 	PBK, 	PBK, 	PBK, 	DBK, 	IBK, 	DBK, 	DBK, 	DBK, 	IBK, 	IBK, 	IBK, 	DBK, 	DBK, 	PBK, 	CIB, 	PBK, 	DBK, 	DBK, 	DBK, 	DBK, 	IBK },
+		};
+
+	uint8 ch = static_cast<uint8>(*text);
+
+	LineBreakClass cls;
+	
+	if (ch == '\n')
+		cls = kLBC_MandatoryBreak;
+	else if (ch < 128)
+	{
+		cls = kASCII_LBTable[ch];
+		if (cls > kLBC_MandatoryBreak and cls != kLBC_Space)	// duh...
+			cls = kLBC_Alphabetic;
+	}
+	else
+		cls = kLBC_Unknown;
+
+	if (cls == kLBC_Space)
+		cls = kLBC_WordJoiner;
+
+	LineBreakClass ncls = cls;
+
+	while (++text != end and cls != kLBC_MandatoryBreak)
+	{
+		ch = *text;
+		
+		LineBreakClass lcls = ncls;
+		
+		if (ch == '\n')
+		{
+			++text;
+			break;
+		}
+
+		ncls = kASCII_LBTable[ch];
+	
+		if (ncls == kLBC_Space)
+			continue;
+		
+		break_action brk = brkTable[cls][ncls];
+		
+		if (brk == DBK or (brk == IBK and lcls == kLBC_Space))
+			break;
+		
+		cls = ncls;
+	}
+
+	return text;
+}
+
+vector<string> wrap_line(const string& text, unsigned int width)
+{
+	vector<string> result;
+	vector<size_t> offsets = { 0 };
+
+	auto b = text.begin();
+	while (b != text.end())
+	{
+		auto e = next_line_break(b, text.end());
+		
+		offsets.push_back(e - text.begin());
+		
+		b = e;
+	}
+	
+	size_t count = offsets.size() - 1;
+	
+	vector<size_t> minima(count + 1, 1000000);
+	minima[0] = 0;
+	vector<size_t> breaks(count + 1, 0);
+	
+	for (size_t i = 0; i < count; ++i)
+	{
+		size_t j = i + 1;
+		while (j <= count)
+		{
+			size_t w = offsets[j] - offsets[i];
+
+			if (w > width)
+				break;
+
+			while (w > 0 and isspace(text[offsets[i] + w - 1]))
+				--w;
+
+			size_t cost = minima[i];
+			if (j < count)	// last line may be shorter
+				cost += (width - w) * (width - w);
+
+			if (cost < minima[j])
+			{
+				minima[j] = cost;
+				breaks[j] = i;
+			}
+
+			++j;
+		}
+	}
+	
+	size_t j = count;
+	while (j > 0)
+	{
+		size_t i = breaks[j];
+		result.push_back(text.substr(offsets[i], offsets[j] - offsets[i])); 
+		j = i;
+	}
+	
+	reverse(result.begin(), result.end());
+
+	return result;
+}
+
+vector<string> word_wrap(const string& text, unsigned int width)
+{
+	vector<string> paragraphs;
+	ba::split(paragraphs, text, ba::is_any_of("\n"));
+	
+	vector<string> result;
+	for (auto& p: paragraphs)
+	{
+		if (p.empty())
+		{
+			result.push_back("");
+			continue;
+		}
+		
+		auto lines = wrap_line(p, width);
+		result.insert(result.end(), lines.begin(), lines.end());
+	}
+
+	return result;
+}
+
+}
--- a/src/cif-validator.cpp
+++ b/src/cif-validator.cpp
+// cif parsing library
+
+#include <boost/algorithm/string.hpp>
+
+// since gcc's regex is crashing....
+#include <boost/regex.hpp>
+
+#include "libcif/cif++.h"
+#include "libcif/cif-parser.h"
+#include "libcif/cif-validator.h"
+
+using namespace std;
+namespace ba = boost::algorithm;
+
+extern int VERBOSE;
+
+namespace cif
+{
+
+DDL_PrimitiveType map_to_primitive_type(const string& s)
+{
+	DDL_PrimitiveType result;
+	if (iequals(s, "char"))
+		result = ptChar;
+	else if (iequals(s, "uchar"))
+		result = ptUChar;
+	else if (iequals(s, "numb"))
+		result = ptNumb;
+	else
+		throw validation_error("Not a known primitive type");
+	return result;
+}
+
+// --------------------------------------------------------------------
+
+int validate_type::compare(const char* a, const char* b) const
+{
+	int result = 0;
+	
+	if (*a == 0)
+		result = *b == 0 ? 0 : -1;
+	else if (*b == 0)
+		result = *a == 0 ? 0 : +1;
+	else
+	{
+		try
+		{
+			switch (m_primitive_type)
+			{
+				case ptNumb:
+				{
+					double da = strtod(a, nullptr);
+					double db = strtod(b, nullptr);
+					
+					auto d = da - db;
+					if (abs(d) > numeric_limits<double>::epsilon())
+					{
+						if (d > 0)
+							result = 1;
+						else if (d < 0)
+							result = -1;
+					}
+					break;
+				}
+				
+				case ptUChar:
+				case ptChar:
+				{
+					// CIF is guaranteed to have ascii only, therefore this primitive code will do
+					// also, we're collapsing spaces
+					
+					auto ai = a, bi = b;
+					for (;;)
+					{
+						if (*ai == 0)
+						{
+							if (*bi != 0)
+								result = -1;
+							break;
+						}
+						else if (*bi == 0)
+						{
+							result = 1;
+							break;
+						}
+						
+						char ca = toupper(*ai);
+						char cb = toupper(*bi);
+						
+						result = ca - cb;
+						
+						if (result != 0)
+							break;
+						
+						if (ca == ' ')
+						{
+							while (ai[1] == ' ')
+								++ai;
+							while (bi[1] == ' ')
+								++bi;
+						}
+						
+						++ai;
+						++bi;
+					}
+					
+					break;
+				}
+			}
+		}
+		catch (const std::invalid_argument& ex)
+		{
+			result = 1;
+		}
+	}
+	
+	return result;
+}
+
+// --------------------------------------------------------------------
+
+void validate_item::set_parent(validate_item* parent)
+{
+	m_parent = parent;
+
+	if (m_type == nullptr and m_parent != nullptr)
+		m_type = m_parent->m_type;
+		
+	if (m_parent != nullptr)
+	{
+		m_parent->m_children.insert(this);
+	
+		if (m_category->m_keys == vector<string>{m_tag})
+			m_parent->m_foreign_keys.insert(this);
+	}
+}
+
+void validate_item::operator()(string value) const
+{
+	if (VERBOSE >= 4)
+		cout << "validating '" << value << "' for '" << m_tag << "'" << endl;
+
+	if (not value.empty() and value != "?" and value != ".")
+	{
+		if (m_type != nullptr and not boost::regex_match(value, m_type->m_rx))
+			throw validation_error("Value '" + value + "' does not match type expression for type " + m_type->m_name + " in item " + m_tag);
+
+		if (not m_enums.empty())
+		{
+			if (m_enums.count(value) == 0)
+				throw validation_error("Value '" + value + "' is not in the list of allowed values for item " + m_tag);
+		}
+	}
+}
+
+// --------------------------------------------------------------------
+
+void validate_category::add_item_validator(validate_item&& v)
+{
+	if (v.m_mandatory)
+		m_mandatory_fields.insert(v.m_tag);
+
+	v.m_category = this;
+
+	auto r = m_item_validators.insert(move(v));
+	if (not r.second and VERBOSE >= 4)
+		cout << "Could not add validator for item " << v.m_tag << " to category " << m_name << endl;
+}
+
+const validate_item* validate_category::get_validator_for_item(string tag) const
+{
+	const validate_item* result = nullptr;
+	auto i = m_item_validators.find(validate_item{tag});
+	if (i != m_item_validators.end())
+		result = &*i;
+	else if (VERBOSE > 4)
+		cout << "No validator for tag " << tag << endl;
+	return result;
+}
+
+// --------------------------------------------------------------------
+
+validator::validator()
+{
+}
+
+validator::~validator()
+{
+}
+
+void validator::add_type_validator(validate_type&& v)
+{
+	auto r = m_type_validators.insert(move(v));
+	if (not r.second and VERBOSE > 4)
+		cout << "Could not add validator for type " << v.m_name << endl;
+}
+
+const validate_type* validator::get_validator_for_type(string type_code) const
+{
+	const validate_type* result = nullptr;
+	
+	auto i = m_type_validators.find(validate_type{ type_code, ptChar, boost::regex() });
+	if (i != m_type_validators.end())
+		result = &*i;
+	else if (VERBOSE > 4)
+		cout << "No validator for type " << type_code << endl;
+	return result;
+}
+
+void validator::add_category_validator(validate_category&& v)
+{
+	auto r = m_category_validators.insert(move(v));
+	if (not r.second and VERBOSE > 4)
+		cout << "Could not add validator for category " << v.m_name << endl;
+}
+
+const validate_category* validator::get_validator_for_category(string category) const
+{
+	const validate_category* result = nullptr;
+	auto i = m_category_validators.find(validate_category{category});
+	if (i != m_category_validators.end())
+		result = &*i;
+	else if (VERBOSE > 4)
+		cout << "No validator for category " << category << endl;
+	return result;
+}
+
+validate_item* validator::get_validator_for_item(string tag) const
+{
+	validate_item* result = nullptr;
+	
+	string cat, item;
+	std::tie(cat, item) = split_tag_name(tag);
+
+	auto* cv = get_validator_for_category(cat);
+	if (cv != nullptr)
+		result = const_cast<validate_item*>(cv->get_validator_for_item(item));
+
+	if (result == nullptr and VERBOSE > 4)
+		cout << "No validator for item " << tag << endl;
+
+	return result;
+}
+
+void validator::report_error(const string& msg)
+{
+	if (m_strict)
+		throw validation_error(msg);
+	else if (VERBOSE)
+		cerr << msg << endl;
+}
+
+
+}
--- a/src/cif2pdb.cpp
+++ b/src/cif2pdb.cpp
--- a/src/compound.cpp
+++ b/src/compound.cpp
+// Lib for working with structures as contained in mmCIF and PDB files
+
+#include "libcif/config.h"
+
+#include <map>
+
+#include <boost/algorithm/string.hpp>
+#include <boost/filesystem/operations.hpp>
+#include <boost/filesystem/fstream.hpp>
+
+#include "libcif/compound.h"
+#include "libcif/cif++.h"
+
+using namespace std;
+namespace ba = boost::algorithm;
+namespace fs = boost::filesystem;
+
+namespace libcif
+{
+
+class compound_factory
+{
+  public:
+	
+	static compound_factory& instance();
+	const compound* create(string id);
+
+  private:
+	compound_factory();
+	~compound_factory();
+	
+	static compound_factory* sInstance;
+
+	fs::path m_clibd_mon;
+	vector<compound*> m_compounds;
+};
+
+
+// --------------------------------------------------------------------
+// compound
+
+string compound::formula() const
+{
+	string result;
+	
+	map<string,uint32> atoms;
+	float charge_sum = 0;
+
+	for (auto r: m_atoms)
+	{
+		atoms[atom_type_traits(r.type_symbol).symbol()] += 1;
+		charge_sum += r.partial_charge;
+	}
+	
+	auto c = atoms.find("C");
+	if (c != atoms.end())
+	{
+		result = "C";
+		
+		if (c->second > 1)
+			result += to_string(c->second);
+		
+		atoms.erase(c);
+		
+		auto h = atoms.find("H");
+		if (h != atoms.end())
+		{
+			result += " H";
+			if (h->second > 1)
+				result += to_string(h->second);
+			
+			atoms.erase(h);
+		}
+	}
+	
+	for (auto a: atoms)
+	{
+		if (not result.empty())
+			result += ' ';
+		
+		result += a.first;
+		if (a.second > 1)
+			result += to_string(a.second);	
+	}
+
+	int charge = lrint(charge_sum);
+	if (charge != 0)
+		result += ' ' + to_string(charge);
+
+	return result;
+}
+
+int compound::charge() const
+{
+	float result = 0;
+
+	for (auto r: m_atoms)
+		result += r.partial_charge;
+
+	return lrint(result);
+}
+
+string compound::type() const
+{
+	string result;
+	
+	// known groups are (counted from ccp4 monomer dictionary)
+
+	//	D-pyranose
+	//	DNA
+	//	L-PEPTIDE LINKING
+	//	L-SACCHARIDE
+	//	L-peptide
+	//	L-pyranose
+	//	M-peptide
+	//	NON-POLYMER
+	//	P-peptide
+	//	RNA
+	//	furanose
+	//	non-polymer
+	//	non_polymer
+	//	peptide
+	//	pyranose
+	//	saccharide
+	
+	if (cif::iequals(m_id, "gly"))
+		result = "peptide linking";
+	else if (cif::iequals(m_group, "l-peptide") or cif::iequals(m_group, "L-peptide linking") or cif::iequals(m_group, "peptide"))
+		result = "L-peptide linking";
+	else if (cif::iequals(m_group, "DNA"))
+		result = "DNA linking";
+	else if (cif::iequals(m_group, "RNA"))
+		result = "RNA linking";
+	
+	return result;
+}
+
+bool compound::is_water() const
+{
+	return m_id == "HOH" or m_id == "H2O";
+}
+
+comp_atom compound::get_atom_by_id(const string& atom_id) const
+{
+	comp_atom result;
+	for (auto& a: m_atoms)
+	{
+		if (a.id == atom_id)
+		{
+			result = a;
+			break;
+		}
+	}
+
+	if (result.id != atom_id)	
+		throw out_of_range("No atom " + atom_id + " in compound " + m_id);
+	
+	return result;
+}
+
+const compound* compound::create(const string& id)
+{
+	return compound_factory::instance().create(id);
+}
+
+// --------------------------------------------------------------------
+// a factory class to generate compounds
+
+compound_factory* compound_factory::sInstance = nullptr;
+
+compound_factory::compound_factory()
+{
+	const char* clibd_mon = getenv("CLIBD_MON");
+	if (clibd_mon == nullptr)
+		throw runtime_error("Cannot locate peptide list, please souce the CCP4 environment");
+	m_clibd_mon = clibd_mon;
+}
+
+compound_factory::~compound_factory()
+{
+}
+
+compound_factory& compound_factory::instance()
+{
+	if (sInstance == nullptr)
+		sInstance = new compound_factory();
+	return *sInstance;
+}
+
+// id is the three letter code
+const compound* compound_factory::create(std::string id)
+{
+	ba::to_upper(id);
+
+	compound* result = nullptr;
+	
+	for (auto cmp: m_compounds)
+	{
+		if (cmp->id() == id)
+		{
+			result = cmp;
+			break;
+		}
+	}
+	
+	if (result == nullptr)
+	{
+		fs::path resFile = m_clibd_mon / ba::to_lower_copy(id.substr(0, 1)) / (id + ".cif");
+		fs::ifstream file(resFile);
+		if (file.is_open())
+		{
+			cif::file cf;
+			
+			try
+			{
+				cf.load(file);
+			}
+			catch (const exception& ex)
+			{
+				cerr << "Error while loading " << resFile << endl;
+				throw ex;
+			}
+			
+			auto& list = cf["comp_list"];
+			auto row = list["chem_comp"][cif::key("id") == id];
+			
+			string name, group;
+			uint32 number_atoms_all, number_atoms_nh;
+			cif::tie(name, group, number_atoms_all, number_atoms_nh) =
+				row.get("name", "group", "number_atoms_all", "number_atoms_nh");
+	
+			ba::trim(name);
+			ba::trim(group);
+			
+			auto& comp_atoms = cf["comp_" + id]["chem_comp_atom"];
+			
+			vector<comp_atom> atoms;
+			for (auto row: comp_atoms)
+			{
+				string id, symbol, energy;
+				float charge;
+				
+				cif::tie(id, symbol, energy, charge) = row.get("atom_id", "type_symbol", "type_energy", "partial_charge");
+				
+				atoms.push_back({
+					id, atom_type_traits(symbol).type(), energy, charge
+				});
+			}
+
+			auto& comp_bonds = cf["comp_" + id]["chem_comp_bond"];
+			
+			map<tuple<string,string>,float> bonds;
+			for (auto row: comp_bonds)
+			{
+				string atom_id_1, atom_id_2, type;
+				
+				cif::tie(atom_id_1, atom_id_2, type) = row.get("atom_id_1", "atom_id_2", "type");
+				
+				float value = 0;
+				if (type == "single")		value = 1;
+				else if (type == "double")	value = 2;
+				else if (type == "triple")	value = 3;
+				else if (type == "deloc" or type == "aromat")
+											value = 1.5;
+				else
+				{
+					cerr << "Unimplemented chem_comp_bond.type " << type << " in file " << resFile << endl;
+					value = 1.0;
+				}
+				
+				bonds[make_tuple(atom_id_1, atom_id_2)] = value;
+			}
+			
+			result = new compound(id, name, group, move(atoms), move(bonds));
+			m_compounds.push_back(result);
+		}
+	}
+	
+	return result;
+}
+
+bool compound::atoms_bonded(const string& atom_id_1, const string& atom_id_2) const
+{
+	return m_bonds.count(make_tuple(atom_id_1, atom_id_2)) or m_bonds.count(make_tuple(atom_id_2, atom_id_1));
+}
+
+float compound::atom_bond_value(const string& atom_id_1, const string& atom_id_2) const
+{
+	auto i = m_bonds.find(make_tuple(atom_id_1, atom_id_2));
+	if (i == m_bonds.end())
+		i = m_bonds.find(make_tuple(atom_id_2, atom_id_1));
+	
+	return i == m_bonds.end() ? 0 : i->second;
+}
+
+}
--- a/src/pdb2cif-remark3.cpp
+++ b/src/pdb2cif-remark3.cpp
+#include "libpr.h"
+
+#include <map>
+#include <set>
+
+#include <boost/date_time/gregorian/gregorian.hpp>
+#include <boost/algorithm/string.hpp>
+#include <boost/format.hpp>
+#include <boost/numeric/ublas/matrix.hpp>
+
+#include "peptidedb.h"
+#include "pdb2cif.h"
+#include "libcif/atom_type.h"
+#include "libcif/compound.h"
+#include "libcif/pdb2cif-remark3.h"
+
+using namespace std;
+namespace ba = boost::algorithm;
+
+using cif::datablock;
+using cif::category;
+using cif::row;
+using cif::key;
+using cif::iequals;
+
+static const char* kRedOn = "\033[37;1;41m";
+static const char* kRedOff = "\033[0m";
+
+// --------------------------------------------------------------------
+
+struct TemplateLine
+{
+	const char*						rx;
+	int								next_state_offset;
+	const char*						category;
+	initializer_list<const char*>	items;
+	const char*						ls_restr_type = nullptr;
+	bool							create_new;
+};
+
+// --------------------------------------------------------------------
+
+const TemplateLine kBusterTNT_Template[] = {
+/* 0 */		{ R"(DATA USED IN REFINEMENT\.)", 1 },
+/* 1 */		{ R"(RESOLUTION RANGE HIGH \(ANGSTROMS\) :\s+(.+?))", 1, "refine", { "ls_d_res_high" } },
+/* 2 */		{ R"(RESOLUTION RANGE LOW \(ANGSTROMS\) :\s+(.+?))", 1, "refine", { "ls_d_res_low" } },
+/* 3 */		{ R"(DATA CUTOFF \(SIGMA\(F\)\) :\s+(.+?))", 1, "refine", { "pdbx_ls_sigma_F" } },
+/* 4 */		{ R"(COMPLETENESS FOR RANGE \(%\) :\s+(.+?))", 1, "refine", { "ls_percent_reflns_obs" } },
+/* 5 */		{ R"(NUMBER OF REFLECTIONS :\s+(.+?))", 1, "refine", { "ls_number_reflns_obs" } },
+/* 6 */		{ R"(FIT TO DATA USED IN REFINEMENT\.)", 1 },
+/* 7 */		{ R"(CROSS-VALIDATION METHOD :\s+(.+?))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
+/* 8 */		{ R"(FREE R VALUE TEST SET SELECTION :\s+(.+?))", 1, "refine", { "pdbx_R_Free_selection_details" } },
+/* 9 */		{ R"(R VALUE \(WORKING \+ TEST SET\) :\s+(.+?))", 1, "refine", { "ls_R_factor_obs" } },
+/* 10 */	{ R"(R VALUE \(WORKING SET\) :\s+(.+?))", 1, "refine", { "ls_R_factor_R_work" } },
+/* 11 */	{ R"(FREE R VALUE :\s+(.+?))", 1, "refine", { "ls_R_factor_R_free" } },
+/* 12 */	{ R"(FREE R VALUE TEST SET SIZE \(%\) :\s+(.+?))", 1, "refine", { "ls_percent_reflns_R_free" } },
+/* 13 */	{ R"(FREE R VALUE TEST SET COUNT :\s+(.+?))", 1, "refine", { "ls_number_reflns_R_free" } },
+/* 14 */	{ R"(ESTIMATED ERROR OF FREE R VALUE :\s+(.+?))", 1, "refine", { "ls_R_factor_R_free_error" } },
+/* 15 */	{ R"(FIT IN THE HIGHEST RESOLUTION BIN\.)", 1 },
+/* 16 */	{ R"(TOTAL NUMBER OF BINS USED :\s+(.+?))", 1, "refine_ls_shell", { "pdbx_total_number_of_bins_used" } },
+/* 17 */	{ R"(BIN RESOLUTION RANGE HIGH \(ANGSTROMS\) :\s+(.+?))", 1, "refine_ls_shell", { "d_res_high" } },
+/* 18 */	{ R"(BIN RESOLUTION RANGE LOW \(ANGSTROMS\) :\s+(.+?))", 1, "refine_ls_shell", { "d_res_low" } },
+/* 19 */	{ R"(BIN COMPLETENESS \(WORKING\+TEST\) \(%\) :\s+(.+?))", 1, "refine_ls_shell", { "percent_reflns_obs" } },
+/* 20 */	{ R"(REFLECTIONS IN BIN \(WORKING \+ TEST SET\) :\s+(.+?))", 1, "refine_ls_shell", { "number_reflns_all" } },
+/* 21 */	{ R"(BIN R VALUE \(WORKING \+ TEST SET\) :\s+(.+?))", 1, "refine_ls_shell", { "R_factor_all" } },
+/* 22 */	{ R"(REFLECTIONS IN BIN \(WORKING SET\) :\s+(.+?))", 1, "refine_ls_shell", { "number_reflns_R_work" } },
+/* 23 */	{ R"(BIN R VALUE \(WORKING SET\) :\s+(.+?))", 1, "refine_ls_shell", { "R_factor_R_work" } },
+/* 24 */	{ R"(BIN FREE R VALUE :\s+(.+?))", 1, "refine_ls_shell", { "R_factor_R_free" } },
+/* 25 */	{ R"(BIN FREE R VALUE TEST SET SIZE \(%\) :\s+(.+?))", 1, "refine_ls_shell", { "percent_reflns_R_free" } },
+/* 26 */	{ R"(BIN FREE R VALUE TEST SET COUNT :\s+(.+?))", 1, "refine_ls_shell", { "number_reflns_R_free" } },
+/* 27 */	{ R"(ESTIMATED ERROR OF BIN FREE R VALUE :\s+(.+?))", 1, "refine_ls_shell", { "R_factor_R_free_error" } },
+/* 28 */	{ R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
+/* 29 */	{ R"(PROTEIN ATOMS :\s+(.+?))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
+/* 30 */	{ R"(NUCLEIC ACID ATOMS :\s+(.+?))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
+/* 31 */	{ R"(HETEROGEN ATOMS :\s+(.+?))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
+/* 32 */	{ R"(SOLVENT ATOMS :\s+(.+?))", 1, "refine_hist", { "number_atoms_solvent" } },
+/* 33 */	{ R"(B VALUES\.)", 1 },
+/* 34 */	{ R"(B VALUE TYPE :\s+(.+?))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
+/* 35 */	{ R"(FROM WILSON PLOT \(A\*\*2\) :\s+(.+?))", 1, "reflns", { "B_iso_Wilson_estimate" } },
+/* 36 */	{ R"(MEAN B VALUE \(OVERALL, A\*\*2\) :\s+(.+?))", 1, "refine", { "B_iso_mean" } },
+/* 37 */	{ R"(OVERALL ANISOTROPIC B VALUE\.)", 1 },
+/* 38 */	{ R"(B11 \(A\*\*2\) :\s+(.+?))", 1, "refine", { "aniso_B[1][1]" } },
+/* 39 */	{ R"(B22 \(A\*\*2\) :\s+(.+?))", 1, "refine", { "aniso_B[2][2]" } },
+/* 40 */	{ R"(B33 \(A\*\*2\) :\s+(.+?))", 1, "refine", { "aniso_B[3][3]" } },
+/* 41 */	{ R"(B12 \(A\*\*2\) :\s+(.+?))", 1, "refine", { "aniso_B[1][2]" } },
+/* 42 */	{ R"(B13 \(A\*\*2\) :\s+(.+?))", 1, "refine", { "aniso_B[1][3]" } },
+/* 43 */	{ R"(B23 \(A\*\*2\) :\s+(.+?))", 1, "refine", { "aniso_B[2][3]" } },
+/* 44 */	{ R"(ESTIMATED COORDINATE ERROR\.)", 1 },
+/* 45 */	{ R"(ESD FROM LUZZATI PLOT \(A\) :\s+(.+?))", 1, "refine_analyze", { "Luzzati_coordinate_error_obs" } },
+/* 46 */	{ R"(DPI \(BLOW EQ-10\) BASED ON R VALUE \(A\) :\s+(.+?))", 1, "refine", { "pdbx_overall_SU_R_Blow_DPI" } },
+/* 47 */	{ R"(DPI \(BLOW EQ-9\) BASED ON FREE R VALUE \(A\) :\s+(.+?))", 1, "refine", { "pdbx_overall_SU_R_free_Blow_DPI" } },
+/* 48 */	{ R"(DPI \(CRUICKSHANK\) BASED ON R VALUE \(A\) :\s+(.+?))", 1, "refine", { "overall_SU_R_Cruickshank_DPI" } },
+/* 49 */	{ R"(DPI \(CRUICKSHANK\) BASED ON FREE R VALUE \(A\) :\s+(.+?))", 1, "refine", { "pdbx_overall_SU_R_free_Cruickshank_DPI" } },
+/* 50 */	{ R"(REFERENCES: BLOW, D\. \(2002\) ACTA CRYST D58, 792-797 CRUICKSHANK, D\.W\.J\. \(1999\) ACTA CRYST D55, 583-601)", 1 },
+/* 51 */	{ R"(CORRELATION COEFFICIENTS\.)", 1 },
+/* 52 */	{ R"(CORRELATION COEFFICIENT FO-FC :\s+(.+?))", 1, "refine", { "correlation_coeff_Fo_to_Fc" } },
+/* 53 */	{ R"(CORRELATION COEFFICIENT FO-FC FREE :\s+(.+?))", 1, "refine", { "correlation_coeff_Fo_to_Fc_free" } },
+/* 54 */	{ R"(NUMBER OF GEOMETRIC FUNCTION TERMS DEFINED : 15)", 1 },
+/* 55 */	{ R"(TERM COUNT WEIGHT FUNCTION\.)", 1 },
+/* 56 */	{ R"(BOND LENGTHS :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_bond_d", true },
+/* 57 */	{ R"(BOND ANGLES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_angle_deg", true },
+/* 58 */	{ R"(TORSION ANGLES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_dihedral_angle_d", true },
+/* 59 */	{ R"(TRIGONAL CARBON PLANES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_trig_c_planes", true },
+/* 60 */	{ R"(GENERAL PLANES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_gen_planes", true },
+/* 61 */	{ R"(ISOTROPIC THERMAL FACTORS :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_it", true },
+/* 62 */	{ R"(BAD NON-BONDED CONTACTS :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_nbd", true },
+/* 63 */	{ R"(IMPROPER TORSIONS :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_improper_torsion", true },
+/* 64 */	{ R"(PSEUDOROTATION ANGLES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_pseud_angle", true },
+/* 65 */	{ R"(CHIRAL IMPROPER TORSION :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_chiral_improper_torsion", true },
+/* 66 */	{ R"(SUM OF OCCUPANCIES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_sum_occupancies", true },
+/* 67 */	{ R"(UTILITY DISTANCES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_utility_distance", true },
+/* 68 */	{ R"(UTILITY ANGLES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_utility_angle", true },
+/* 69 */	{ R"(UTILITY TORSION :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_utility_torsion", true },
+/* 70 */	{ R"(IDEAL-DIST CONTACT TERM :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_ideal_dist_contact", true },
+/* 71 */	{ R"(RMS DEVIATIONS FROM IDEAL VALUES\.)", 1 },
+/* 72 */	{ R"(BOND LENGTHS \(A\) :\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal" }, "t_bond_d", false },
+/* 73 */	{ R"(BOND ANGLES \(DEGREES\) :\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal" }, "t_angle_deg", false },
+/* 74 */	{ R"(PEPTIDE OMEGA TORSION ANGLES \(DEGREES\) :\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal" }, "t_omega_torsion", false },
+/* 75 */	{ R"(OTHER TORSION ANGLES \(DEGREES\) :\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal" }, "t_other_torsion", false },
+/* 76 */	{ R"(TLS DETAILS\.?)", 1 },
+/* 77 */	{ R"(NUMBER OF TLS GROUPS :.+)", 1 },
+/* 78 */	{ R"(TLS GROUP :\s*(\d+))", 1, "pdbx_refine_tls", { "id" }, nullptr, true },
+/* 79 */	{ R"(SELECTION:\s+(.+?))", 1, "pdbx_refine_tls_group", { "selection_details" }, nullptr, true },
+/* 80 */	{ R"(ORIGIN FOR THE GROUP \(A\):\s+(.+?)\s+(.+?)\s+(.+?))", 1, "pdbx_refine_tls", { "origin_x", "origin_y", "origin_z" } },
+/* 81 */	{ R"(T TENSOR)", 1 },
+/* 82 */	{ R"(T11:\s+(.+?) T22:\s+(.+?))", 1, "pdbx_refine_tls", { "T[1][1]", "T[2][2]" } },
+/* 83 */	{ R"(T33:\s+(.+?) T12:\s+(.+?))", 1, "pdbx_refine_tls", { "T[3][3]", "T[1][2]" } },
+/* 84 */	{ R"(T13:\s+(.+?) T23:\s+(.+?))", 1, "pdbx_refine_tls", { "T[1][3]", "T[2][3]" } },
+/* 85 */	{ R"(L TENSOR)", 1 },
+/* 86 */	{ R"(L11:\s+(.+?) L22:\s+(.+?))", 1, "pdbx_refine_tls", { "L[1][1]", "L[2][2]" } },
+/* 87 */	{ R"(L33:\s+(.+?) L12:\s+(.+?))", 1, "pdbx_refine_tls", { "L[3][3]", "L[1][2]" } },
+/* 88 */	{ R"(L13:\s+(.+?) L23:\s+(.+?))", 1, "pdbx_refine_tls", { "L[1][3]", "L[2][3]" } },
+/* 89 */	{ R"(S TENSOR)", 1 },
+/* 90 */	{ R"(S11:\s+(.+?) S12:\s+(.+?) S13:\s+(.+?))", 1, "pdbx_refine_tls", { "S[1][1]", "S[1][2]", "S[1][3]" } },
+/* 91 */	{ R"(S21:\s+(.+?) S22:\s+(.+?) S23:\s+(.+?))", 1, "pdbx_refine_tls", { "S[2][1]", "S[2][2]", "S[2][3]" } },
+/* 92 */	{ R"(S31:\s+(.+?) S32:\s+(.+?) S33:\s+(.+?))", 78 - 92, "pdbx_refine_tls", { "S[3][1]", "S[3][2]", "S[3][3]" } },
+};
+
+class BUSTER_TNT_Remark3Parser : public Remark3Parser
+{
+  public:
+	BUSTER_TNT_Remark3Parser(const string& name, const string& expMethod, PDBRecord* r, cif::datablock& db)
+		: Remark3Parser(name, expMethod, r, db,
+			kBusterTNT_Template, sizeof(kBusterTNT_Template) / sizeof(TemplateLine),
+				regex(R"((BUSTER(?:-TNT)?)(?: (\d+(?:\..+)?))?)")) {}
+};
+
+const TemplateLine kCNS_Template[] = {
+/* 0 */		{ R"(REFINEMENT TARGET\s*:\s*(.+))", 1, "refine", { "pdbx_stereochemistry_target_values" } },
+/* 1 */		{ R"(DATA USED IN REFINEMENT\.)", 1 },
+/* 2 */		{ R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
+/* 3 */		{ R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
+/* 4 */		{ R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
+/* 5 */		{ R"(DATA CUTOFF HIGH \(ABS\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_data_cutoff_high_absF" } },
+/* 6 */		{ R"(DATA CUTOFF LOW \(ABS\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_data_cutoff_low_absF" } },
+/* 7 */		{ R"(COMPLETENESS \(WORKING\+TEST\) \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
+/* 8 */		{ R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
+/* 9 */		{ R"(FIT TO DATA USED IN REFINEMENT\.)", 1 },
+/* 10 */	{ R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
+/* 11 */	{ R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
+/* 12 */	{ R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
+/* 13 */	{ R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
+/* 14 */	{ R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
+/* 15 */	{ R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
+/* 16 */	{ R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
+/* 17 */	{ R"(ESTIMATED ERROR OF FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free_error" } },
+/* 18 */	{ R"(FIT/AGREEMENT OF MODEL WITH ALL DATA\.)", 1 },
+/* 19 */	{ R"(R VALUE \(WORKING \+ TEST SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_all_no_cutoff" } },
+/* 20 */	{ R"(R VALUE \(WORKING SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_obs_no_cutoff" } },
+/* 21 */	{ R"(FREE R VALUE \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_factor_no_cutoff" } },
+/* 22 */	{ R"(FREE R VALUE TEST SET SIZE \(%, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_size_perc_no_cutoff" } },
+/* 23 */	{ R"(FREE R VALUE TEST SET COUNT \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_ct_no_cutoff" } },
+/* 24 */	{ R"(ESTIMATED ERROR OF FREE R VALUE \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_error_no_cutoff" } },
+/* 25 */	{ R"(TOTAL NUMBER OF REFLECTIONS \(NO CUTOFF\)\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_all" } },
+/* 26 */	{ R"(FIT IN THE HIGHEST RESOLUTION BIN\.)", 1 },
+/* 27 */	{ R"(TOTAL NUMBER OF BINS USED\s*:\s*(.+))", 1, "refine_ls_shell", { "pdbx_total_number_of_bins_used" } },
+/* 28 */	{ R"(BIN RESOLUTION RANGE HIGH \(A\)\s*:\s*(.+))", 1, "refine_ls_shell", { "d_res_high" } },
+/* 29 */	{ R"(BIN RESOLUTION RANGE LOW \(A\)\s*:\s*(.+))", 1, "refine_ls_shell", { "d_res_low" } },
+/* 30 */	{ R"(BIN COMPLETENESS \(WORKING\+TEST\) \(%\)\s*:\s*(.+))", 1, "refine_ls_shell", { "percent_reflns_obs" } },
+/* 31 */	{ R"(REFLECTIONS IN BIN \(WORKING SET\)\s*:\s*(.+))", 1, "refine_ls_shell", { "number_reflns_R_work" } },
+/* 32 */	{ R"(BIN R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine_ls_shell", { "R_factor_R_work" } },
+/* 33 */	{ R"(BIN FREE R VALUE\s*:\s*(.+))", 1, "refine_ls_shell", { "R_factor_R_free" } },
+/* 34 */	{ R"(BIN FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine_ls_shell", { "percent_reflns_R_free" } },
+/* 35 */	{ R"(BIN FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine_ls_shell", { "number_reflns_R_free" } },
+/* 36 */	{ R"(ESTIMATED ERROR OF BIN FREE R VALUE\s*:\s*(.+))", 1, "refine_ls_shell", { "R_factor_R_free_error" } },
+/* 37 */	{ R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
+/* 38 */	{ R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
+/* 39 */	{ R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
+/* 40 */	{ R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
+/* 41 */	{ R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
+/* 42 */	{ R"(B VALUES\.)", 1 },
+/* 43 */	{ R"(B VALUE TYPE\s*:\s*(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
+/* 44 */	{ R"(FROM WILSON PLOT \(A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
+/* 45 */	{ R"(MEAN B VALUE \(OVERALL, A\*\*2\)\s*:\s*(.+))", 1, "refine", { "B_iso_mean" } },
+/* 46 */	{ R"(OVERALL ANISOTROPIC B VALUE\.)", 1 },
+/* 47 */	{ R"(B11 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][1]" } },
+/* 48 */	{ R"(B22 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][2]" } },
+/* 49 */	{ R"(B33 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[3][3]" } },
+/* 50 */	{ R"(B12 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][2]" } },
+/* 51 */	{ R"(B13 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][3]" } },
+/* 52 */	{ R"(B23 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][3]" } },
+/* 53 */	{ R"(ESTIMATED COORDINATE ERROR\.)", 1 },
+/* 54 */	{ R"(ESD FROM LUZZATI PLOT \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_coordinate_error_obs" } },
+/* 55 */	{ R"(ESD FROM SIGMAA \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_sigma_a_obs" } },
+/* 56 */	{ R"(LOW RESOLUTION CUTOFF \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_d_res_low_obs" } },
+/* 57 */	{ R"(CROSS-VALIDATED ESTIMATED COORDINATE ERROR\.)", 1 },
+/* 58 */	{ R"(ESD FROM C-V LUZZATI PLOT \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_coordinate_error_free" } },
+/* 59 */	{ R"(ESD FROM C-V SIGMAA \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_sigma_a_free" } },
+/* 60 */	{ R"(RMS DEVIATIONS FROM IDEAL VALUES\.)", 1 },
+/* 61 */	{ R"(BOND LENGTHS \(A\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "c_bond_d", false },
+/* 62 */	{ R"(BOND ANGLES \(DEGREES\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "c_angle_deg", false },
+/* 63 */	{ R"(DIHEDRAL ANGLES \(DEGREES\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "c_dihedral_angle_d", false },
+/* 64 */	{ R"(IMPROPER ANGLES \(DEGREES\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "c_improper_angle_d", false },
+/* 65 */	{ R"(ISOTROPIC THERMAL MODEL\s*:\s*(.+))", 1, "refine", { "pdbx_isotropic_thermal_model" } },
+/* 66 */	{ R"(ISOTROPIC THERMAL FACTOR RESTRAINTS\. RMS SIGMA)", 1 },
+/* 67 */	{ R"(MAIN-CHAIN BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "c_mcbond_it", false },
+/* 68 */	{ R"(MAIN-CHAIN ANGLE \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "c_mcangle_it", false },
+/* 69 */	{ R"(SIDE-CHAIN BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "c_scbond_it", false },
+/* 70 */	{ R"(SIDE-CHAIN ANGLE \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "c_scangle_it", false },
+/* 71 */	{ R"(BULK SOLVENT MODELING\.)", 1 },
+/* 72 */	{ R"(METHOD USED\s*:\s*(.+))", 1, "refine", { "solvent_model_details" } },
+/* 73 */	{ R"(KSOL\s*:\s*(.+))", 1, "refine", { "solvent_model_param_ksol" } },
+/* 74 */	{ R"(BSOL\s*:\s*(.+))", 1, "refine", { "solvent_model_param_bsol" } },
+/* 75 */	{ R"(NCS MODEL\s*:\s*(.+))", 1, /* "refine_ls_restr_ncs", { "ncs_model_details" } */ },
+/* 76 */	{ R"(NCS RESTRAINTS\. RMS SIGMA/WEIGHT)", 1 },
+/* 77 */	{ R"(GROUP (\d+) POSITIONAL \(A\)\s*:\s*(.+))", 1, /* "refine_ls_restr_ncs", { "dom_id", "rms_dev_position", "weight_position" } */ },
+/* 78 */	{ R"(GROUP (\d+) B-FACTOR \(A\*\*2\)\s*:\s*(.+))", 1, /* "refine_ls_restr_ncs", { "dom_id", "rms_dev_B_iso", "weight_B_iso" } */ },
+/* 79 */	{ R"(PARAMETER FILE (\d+) :\s+(.+))", 1, /* "pdbx_xplor_file", { "serial_no", "param_file" } */ },
+/* 80 */	{ R"(TOPOLOGY FILE (\d+) :\s+(.+))", 1, /* "pdbx_xplor_file", { "serial_no", "topol_file" } */ },
+};
+
+class CNS_Remark3Parser : public Remark3Parser
+{
+  public:
+	CNS_Remark3Parser(const string& name, const string& expMethod, PDBRecord* r, cif::datablock& db)
+		: Remark3Parser(name, expMethod, r, db, kCNS_Template,
+			sizeof(kCNS_Template) / sizeof(TemplateLine), regex(R"((CN[SX])(?: (\d+(?:\.\d+)?))?)")) {}
+};
+
+const TemplateLine kPHENIX_Template[] = {
+/* 0 */		{ R"(REFINEMENT TARGET\s*:\s*(.+))", 1, "refine", { "pdbx_stereochemistry_target_values" } },
+/* 1 */		{ R"(DATA USED IN REFINEMENT\.)", 1 },
+/* 2 */		{ R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
+/* 3 */		{ R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
+/* 4 */		{ R"(MIN\(FOBS/SIGMA_FOBS\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
+/* 5 */		{ R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
+/* 6 */		{ R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
+/* 7 */		{ R"(FIT TO DATA USED IN REFINEMENT\.)", 1 },
+/* 8 */		{ R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
+/* 9 */		{ R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
+/* 10 */	{ R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
+/* 11 */	{ R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
+/* 12 */	{ R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
+/* 13 */	{ R"(FIT TO DATA USED IN REFINEMENT \(IN BINS\)\.)", 1 },
+/* 14 */	{ R"(BIN RESOLUTION RANGE COMPL\. NWORK NFREE RWORK RFREE)", 1 },
+/* 15 */	{ R"(\d+ (\d+(?:\.\d+)?) - (\d+(?:\.\d+)?) (\d+(?:\.\d+)?) (\d+) (\d+) (\d+(?:\.\d+)?) (\d+(?:\.\d+)?))", 0,
+				"refine_ls_shell", { "d_res_low", "d_res_high", "percent_reflns_obs", "number_reflns_R_work", "number_reflns_R_free", "R_factor_R_work", "R_factor_R_free" },
+				nullptr, true },
+/* 16 */	{ R"(BULK SOLVENT MODELLING\.)", 1 },
+/* 17 */	{ R"(METHOD USED\s*:\s*(.+))", 1, "refine", { "solvent_model_details" } },
+/* 18 */	{ R"(SOLVENT RADIUS\s*:\s*(.+))", 1, "refine", { "pdbx_solvent_vdw_probe_radii" } },
+/* 19 */	{ R"(SHRINKAGE RADIUS\s*:\s*(.+))", 1, "refine", { "pdbx_solvent_shrinkage_radii" } },
+/* 20 */	{ R"(K_SOL\s*:\s*(.+))", 1, "refine", { "solvent_model_param_ksol" } },
+/* 21 */	{ R"(B_SOL\s*:\s*(.+))", 1, "refine", { "solvent_model_param_bsol" } },
+/* 22 */	{ R"(ERROR ESTIMATES\.)", 1 },
+/* 23 */	{ R"(COORDINATE ERROR \(MAXIMUM-LIKELIHOOD BASED\)\s*:\s*(.+))", 1, "refine", { "overall_SU_ML" } },
+/* 24 */	{ R"(PHASE ERROR \(DEGREES, MAXIMUM-LIKELIHOOD BASED\)\s*:\s*(.+))", 1, "refine", { "pdbx_overall_phase_error" } },
+/* 25 */	{ R"(B VALUES\.)", 1 },
+/* 26 */	{ R"(B VALUE TYPE\s*:\s*(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
+/* 27 */	{ R"(FROM WILSON PLOT \(A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
+/* 28 */	{ R"(MEAN B VALUE \(OVERALL, A\*\*2\)\s*:\s*(.+))", 1, "refine", { "B_iso_mean" } },
+/* 29 */	{ R"(OVERALL ANISOTROPIC B VALUE\.)", 1 },
+/* 30 */	{ R"(B11 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][1]" } },
+/* 31 */	{ R"(B22 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][2]" } },
+/* 32 */	{ R"(B33 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[3][3]" } },
+/* 33 */	{ R"(B12 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][2]" } },
+/* 34 */	{ R"(B13 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][3]" } },
+/* 35 */	{ R"(B23 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][3]" } },
+/* 36 */	{ R"(TWINNING INFORMATION\.)", 1 },
+/* 37 */	{ R"(FRACTION:\s*(.+))", 1, "pdbx_reflns_twin", { "fraction" } },
+/* 38 */	{ R"(OPERATOR:\s*(.+))", 1, "pdbx_reflns_twin", { "operator" } },
+/* 39 */	{ R"(DEVIATIONS FROM IDEAL VALUES\.)", 1 },
+/* 40 */	{ R"(RMSD COUNT)", 1 },
+/* 41 */	{ R"(BOND\s*:\s*(\d+(?:\.\d+))\s+(\d+))", 1, "refine_ls_restr", { "dev_ideal", "number" }, "f_bond_d", false },
+/* 42 */	{ R"(ANGLE\s*:\s*(\d+(?:\.\d+))\s+(\d+))", 1, "refine_ls_restr", { "dev_ideal", "number" }, "f_angle_d", false },
+/* 43 */	{ R"(CHIRALITY\s*:\s*(\d+(?:\.\d+))\s+(\d+))", 1, "refine_ls_restr", { "dev_ideal", "number" }, "f_chiral_restr", false },
+/* 44 */	{ R"(PLANARITY\s*:\s*(\d+(?:\.\d+))\s+(\d+))", 1, "refine_ls_restr", { "dev_ideal", "number" }, "f_plane_restr", false },
+/* 45 */	{ R"(DIHEDRAL\s*:\s*(\d+(?:\.\d+))\s+(\d+))", 1, "refine_ls_restr", { "dev_ideal", "number" }, "f_dihedral_angle_d", false },
+/* 46 */	{ R"(TLS DETAILS)", 1 },
+/* 47 */	{ R"(NUMBER OF TLS GROUPS\s*:\s*(.+))", 1 },
+/* 48 */	{ R"(TLS GROUP\s*:\s*(.+))", 1, "pdbx_refine_tls", { "id" }, nullptr, true },
+/* 49 */	{ R"(SELECTION:\s*(.+))", 1, "pdbx_refine_tls_group", { "selection_details" }, nullptr, true },
+/* 50 */	{ R"(ORIGIN FOR THE GROUP(?:\s*\(A\))?\s*:\s*(\S+)\s+(\S+)\s+(\S+))", 1, "pdbx_refine_tls", { "origin_x", "origin_y", "origin_z" } },
+/* 51 */	{ R"(T TENSOR)", 1 },
+/* 52 */	{ R"(T11\s*:\s*(.+) T22\s*:\s*(.+))", 1, "pdbx_refine_tls", { "T[1][1]", "T[2][2]" } },
+/* 53 */	{ R"(T33\s*:\s*(.+) T12\s*:\s*(.+))", 1, "pdbx_refine_tls", { "T[3][3]", "T[1][2]" } },
+/* 54 */	{ R"(T13\s*:\s*(.+) T23\s*:\s*(.+))", 1, "pdbx_refine_tls", { "T[1][3]", "T[2][3]" } },
+/* 55 */	{ R"(L TENSOR)", 1 },
+/* 56 */	{ R"(L11\s*:\s*(.+) L22\s*:\s*(.+))", 1, "pdbx_refine_tls", { "L[1][1]", "L[2][2]" } },
+/* 57 */	{ R"(L33\s*:\s*(.+) L12\s*:\s*(.+))", 1, "pdbx_refine_tls", { "L[3][3]", "L[1][2]" } },
+/* 58 */	{ R"(L13\s*:\s*(.+) L23\s*:\s*(.+))", 1, "pdbx_refine_tls", { "L[1][3]", "L[2][3]" } },
+/* 59 */	{ R"(S TENSOR)", 1 },
+/* 60 */	{ R"(S11\s*:\s*(.+) S12\s*:\s*(.+) S13\s*:\s*(.+))", 1, "pdbx_refine_tls", { "S[1][1]", "S[1][2]", "S[1][3]" } },
+/* 61 */	{ R"(S21\s*:\s*(.+) S22\s*:\s*(.+) S23\s*:\s*(.+))", 1, "pdbx_refine_tls", { "S[2][1]", "S[2][2]", "S[2][3]" } },
+/* 62 */	{ R"(S31\s*:\s*(.+) S32\s*:\s*(.+) S33\s*:\s*(.+))", 48 - 62, "pdbx_refine_tls", { "S[3][1]", "S[3][2]", "S[3][3]" } },
+/* 63 */	{ R"(NCS DETAILS)", 1 },
+/* 64 */	{ R"(NUMBER OF NCS GROUPS\s*:\s*(.+))", 1 },
+};
+
+class PHENIX_Remark3Parser : public Remark3Parser
+{
+  public:
+	PHENIX_Remark3Parser(const string& name, const string& expMethod, PDBRecord* r, cif::datablock& db)
+		: Remark3Parser(name, expMethod, r, db, kPHENIX_Template, sizeof(kPHENIX_Template) / sizeof(TemplateLine),
+			regex(R"((PHENIX)(?: \(PHENIX\.REFINE:) (\d+(?:\.[^)]+)?)\)?)")) {}
+
+	virtual void Fixup();
+};
+
+void PHENIX_Remark3Parser::Fixup()
+{
+	for (auto r: m_db["refine_ls_shell"])
+	{
+		try
+		{
+			float val = r["percent_reflns_obs"].as<float>();
+			int perc = static_cast<int>(val * 100);
+			r["percent_reflns_obs"] = perc;
+		}
+		catch (...) {}
+	}
+}
+
+const TemplateLine kPROLSQ_Template[] = {
+/* 0 */		{ R"(DATA USED IN REFINEMENT\.)", 1 },
+/* 1 */		{ R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
+/* 2 */		{ R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
+/* 3 */		{ R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
+/* 4 */		{ R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
+/* 5 */		{ R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
+/* 6 */		{ R"(FIT TO DATA USED IN REFINEMENT\.)", 1 },
+/* 7 */		{ R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
+/* 8 */		{ R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
+/* 9 */		{ R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
+/* 10 */	{ R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
+/* 11 */	{ R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
+/* 12 */	{ R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
+/* 13 */	{ R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
+/* 14 */	{ R"(FIT/AGREEMENT OF MODEL WITH ALL DATA\.)", 1 },
+/* 15 */	{ R"(R VALUE \(WORKING \+ TEST SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_all_no_cutoff" } },
+/* 16 */	{ R"(R VALUE \(WORKING SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_obs_no_cutoff" } },
+/* 17 */	{ R"(FREE R VALUE \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_factor_no_cutoff" } },
+/* 18 */	{ R"(FREE R VALUE TEST SET SIZE \(%, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_size_perc_no_cutoff" } },
+/* 19 */	{ R"(FREE R VALUE TEST SET COUNT \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_ct_no_cutoff" } },
+/* 20 */	{ R"(TOTAL NUMBER OF REFLECTIONS \(NO CUTOFF\)\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_all" } },
+/* 21 */	{ R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
+/* 22 */	{ R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
+/* 23 */	{ R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
+/* 24 */	{ R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
+/* 25 */	{ R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
+/* 26 */	{ R"(B VALUES\.)", 1 },
+/* 27 */	{ R"(B VALUE TYPE\s*:\s*(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
+/* 28 */	{ R"(FROM WILSON PLOT \(A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
+/* 29 */	{ R"(MEAN B VALUE \(OVERALL, A\*\*2\)\s*:\s*(.+))", 1, "refine", { "B_iso_mean" } },
+/* 30 */	{ R"(OVERALL ANISOTROPIC B VALUE\.)", 1 },
+/* 31 */	{ R"(B11 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][1]" } },
+/* 32 */	{ R"(B22 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][2]" } },
+/* 33 */	{ R"(B33 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[3][3]" } },
+/* 34 */	{ R"(B12 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][2]" } },
+/* 35 */	{ R"(B13 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][3]" } },
+/* 36 */	{ R"(B23 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][3]" } },
+/* 37 */	{ R"(ESTIMATED COORDINATE ERROR\.)", 1 },
+/* 38 */	{ R"(ESD FROM LUZZATI PLOT \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_coordinate_error_obs" } },
+/* 39 */	{ R"(ESD FROM SIGMAA \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_sigma_a_obs" } },
+/* 40 */	{ R"(LOW RESOLUTION CUTOFF \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_d_res_low_obs" } },
+/* 41 */	{ R"(RMS DEVIATIONS FROM IDEAL VALUES\.)", 1 },
+/* 42 */	{ R"(DISTANCE RESTRAINTS\. RMS SIGMA)", 1 },
+/* 43 */	{ R"(BOND LENGTH \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_bond_d", false },
+/* 44 */	{ R"(ANGLE DISTANCE \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_angle_d", false },
+/* 45 */	{ R"(INTRAPLANAR 1-4 DISTANCE \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_planar_d", false },
+/* 46 */	{ R"(H-BOND OR METAL COORDINATION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_hb_or_metal_coord", false },
+/* 47 */	{ R"(PLANE RESTRAINT \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_plane_restr", false },
+/* 48 */	{ R"(CHIRAL-CENTER RESTRAINT \(A\*\*3\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_chiral_restr", false },
+/* 49 */	{ R"(NON-BONDED CONTACT RESTRAINTS\.)", 1 },
+/* 50 */	{ R"(SINGLE TORSION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_singtor_nbd", false },
+/* 51 */	{ R"(MULTIPLE TORSION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_multtor_nbd", false },
+/* 52 */	{ R"(H-BOND \(X\.\.\.Y\) \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_xyhbond_nbd", false },
+/* 53 */	{ R"(H-BOND \(X-H\.\.\.Y\) \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_xhyhbond_nbd", false },
+/* 54 */	{ R"(CONFORMATIONAL TORSION ANGLE RESTRAINTS\.)", 1 },
+/* 55 */	{ R"(SPECIFIED \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_special_tor", false },
+/* 56 */	{ R"(PLANAR \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_planar_tor", false },
+/* 57 */	{ R"(STAGGERED \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_staggered_tor", false },
+/* 58 */	{ R"(TRANSVERSE \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_transverse_tor", false },
+/* 59 */	{ R"(ISOTROPIC THERMAL FACTOR RESTRAINTS\. RMS SIGMA)", 1 },
+/* 60 */	{ R"(MAIN-CHAIN BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_mcbond_it", false },
+/* 61 */	{ R"(MAIN-CHAIN ANGLE \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_mcangle_it", false },
+/* 62 */	{ R"(SIDE-CHAIN BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_scbond_it", false },
+/* 63 */	{ R"(SIDE-CHAIN ANGLE \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_scangle_it", false },
+};
+
+class PROLSQ_Remark3Parser : public Remark3Parser
+{
+  public:
+	PROLSQ_Remark3Parser(const string& name, const string& expMethod, PDBRecord* r, cif::datablock& db)
+		: Remark3Parser(name, expMethod, r, db, kPROLSQ_Template, sizeof(kPROLSQ_Template) / sizeof(TemplateLine),
+			regex(R"((PROLSQ|NUCLSQ)(?: (\d+(?:\.\d+)?))?)")) {}
+};
+
+const TemplateLine kREFMAC_Template[] = {
+/* 0 */		{ "DATA USED IN REFINEMENT.", 1 },
+/* 1 */		{ R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
+/* 3 */		{ R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
+/* 4 */		{ R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
+/* 5 */		{ R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
+/* 6 */		{ R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
+/* 7 */		{ R"(FIT TO DATA USED IN REFINEMENT.)", 1 },
+/* 8 */		{ R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
+/* 9 */		{ R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
+/* 10 */	{ R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
+/* 11 */	{ R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
+/* 12 */	{ R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
+/* 13 */	{ R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
+/* 14 */	{ R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
+/* 15 */	{ R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT.)", 1 },
+/* 16 */	{ R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
+/* 17 */	{ R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
+/* 18 */	{ R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
+/* 19 */	{ R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
+/* 20 */	{ R"(ALL ATOMS\s*:\s*(.+))", 1, /* "refine_hist", "pdbx_number_atoms_protein" */ },
+/* 21 */	{ R"(B VALUES\..*)", 1 },
+/* 22 */	{ R"(B VALUE TYPE\s*:\s*(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
+/* 23 */	{ R"(FROM WILSON PLOT \(A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
+/* 24 */	{ R"(MEAN B VALUE \(OVERALL, A\*\*2\)\s*:\s*(.+))", 1, "refine", { "B_iso_mean" } },
+/* 25 */	{ R"(OVERALL ANISOTROPIC B VALUE.)", 1 },
+/* 26 */	{ R"(B11 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][1]" } },
+/* 27 */	{ R"(B22 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][2]" } },
+/* 28 */	{ R"(B33 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[3][3]" } },
+/* 29 */	{ R"(B12 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][2]" } },
+/* 30 */	{ R"(B13 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][3]" } },
+/* 31 */	{ R"(B23 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][3]" } },
+/* 32 */	{ R"(ESTIMATED OVERALL COORDINATE ERROR.)", 1 },
+/* 33 */	{ R"(ESU BASED ON R VALUE(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine", { "pdbx_overall_ESU_R" } },
+/* 34 */	{ R"(ESU BASED ON FREE R VALUE(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine", { "pdbx_overall_ESU_R_Free" } },
+/* 35 */	{ R"(ESU BASED ON MAXIMUM LIKELIHOOD(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine", { "overall_SU_ML" } },
+/* 36 */	{ R"(ESU FOR B VALUES BASED ON MAXIMUM LIKELIHOOD \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "overall_SU_B" } },
+/* 37 */	{ R"(RMS DEVIATIONS FROM IDEAL VALUES.)", 1 },
+/* 38 */	{ R"(DISTANCE RESTRAINTS. RMS SIGMA)", 1 },
+/* 39 */	{ R"(BOND LENGTH \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_bond_d", false },
+/* 40 */	{ R"(ANGLE DISTANCE \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_angle_d", false },
+/* 41 */	{ R"(INTRAPLANAR 1-4 DISTANCE \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_planar_d", false },
+/* 42 */	{ R"(H-BOND OR METAL COORDINATION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_hb_or_metal_coord", false },
+/* 43 */	{ R"(PLANE RESTRAINT \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_plane_restr", false },
+/* 44 */	{ R"(CHIRAL-CENTER RESTRAINT \(A\*\*3\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_chiral_restr", false },
+/* 45 */	{ R"(NON-BONDED CONTACT RESTRAINTS.)", 1 },
+/* 46 */	{ R"(SINGLE TORSION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_singtor_nbd", false },
+/* 47 */	{ R"(MULTIPLE TORSION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_multtor_nbd", false },
+/* 48 */	{ R"(H-BOND \(X\.\..Y\) \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_xyhbond_nbd", false },
+/* 49 */	{ R"(H-BOND \(X-H\.\.\.Y\) \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_xhyhbond_nbd", false },
+/* 50 */	{ R"(CONFORMATIONAL TORSION ANGLE RESTRAINTS.)", 1 },
+/* 51 */	{ R"(SPECIFIED \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_special_tor", false },
+/* 52 */	{ R"(PLANAR \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_planar_tor", false },
+/* 53 */	{ R"(STAGGERED \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_staggered_tor", false },
+/* 54 */	{ R"(TRANSVERSE \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_transverse_tor", false },
+/* 55 */	{ R"(ISOTROPIC THERMAL FACTOR RESTRAINTS. RMS SIGMA)", 1 },
+/* 56 */	{ R"(MAIN-CHAIN BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_mcbond_it", false },
+/* 57 */	{ R"(MAIN-CHAIN ANGLE \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_mcangle_it", false },
+/* 58 */	{ R"(SIDE-CHAIN BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_scbond_it", false },
+/* 59 */	{ R"(SIDE-CHAIN ANGLE \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_scangle_it", false },
+};
+
+
+class REFMAC_Remark3Parser : public Remark3Parser
+{
+  public:
+	REFMAC_Remark3Parser(const string& name, const string& expMethod, PDBRecord* r, cif::datablock& db)
+		: Remark3Parser(name, expMethod, r, db, kREFMAC_Template, sizeof(kREFMAC_Template) / sizeof(TemplateLine),
+			regex(".+")) {}
+
+	virtual string Program()	{ return "REFMAC"; }
+	virtual string Version()	{ return ""; }
+};
+
+const TemplateLine kREFMAC5_Template[] = {
+/* 0 */		{ R"(REFINEMENT TARGET\s*:\s*(.+))", 1, "refine", { "pdbx_stereochemistry_target_values" } },
+/* 1 */		{ R"(DATA USED IN REFINEMENT\.)", 1 },
+/* 2 */		{ R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
+/* 3 */		{ R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
+/* 4 */		{ R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
+/* 5 */		{ R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
+/* 6 */		{ R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
+/* 7 */		{ R"(FIT TO DATA USED IN REFINEMENT.)", 1 },
+/* 8 */		{ R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
+/* 9 */		{ R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
+/* 10 */	{ R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
+/* 11 */	{ R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
+/* 12 */	{ R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
+/* 13 */	{ R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
+/* 14 */	{ R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
+/* 15 */	{ R"(FIT IN THE HIGHEST RESOLUTION BIN.)", 1 },
+/* 16 */	{ R"(TOTAL NUMBER OF BINS USED\s*:\s*(.+))", 1, "refine_ls_shell", { "pdbx_total_number_of_bins_used" } },
+/* 17 */	{ R"(BIN RESOLUTION RANGE HIGH(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine_ls_shell", { "d_res_high" } },
+/* 18 */	{ R"(BIN RESOLUTION RANGE LOW(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine_ls_shell", { "d_res_low" } },
+/* 19 */	{ R"(REFLECTION IN BIN \(WORKING SET\)\s*:\s*(.+))", 1, "refine_ls_shell", { "number_reflns_R_work" } },
+/* 20 */	{ R"(BIN COMPLETENESS \(WORKING\+TEST\) \(%\)\s*:\s*(.+))", 1, "refine_ls_shell", { "percent_reflns_obs" } },
+/* 21 */	{ R"(BIN R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine_ls_shell", { "R_factor_R_work" } },
+/* 22 */	{ R"(BIN FREE R VALUE SET COUNT\s*:\s*(.+))", 1, "refine_ls_shell", { "number_reflns_R_free" } },
+/* 23 */	{ R"(BIN FREE R VALUE\s*:\s*(.+))", 1, "refine_ls_shell", { "R_factor_R_free" } },
+/* 24 */	{ R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT.)", 1 },
+/* 25 */	{ R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
+/* 26 */	{ R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
+/* 27 */	{ R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
+/* 28 */	{ R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
+/* 29 */	{ R"(ALL ATOMS\s*:\s*(.+))", 1, /* "refine_hist", { "pdbx_number_atoms_protein" } */ },
+/* 30 */	{ R"(B VALUES\..*)", 1 },
+/* 31 */	{ R"(B VALUE TYPE\s*:\s*(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
+/* 32 */	{ R"(FROM WILSON PLOT \(A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
+/* 33 */	{ R"(MEAN B VALUE \(OVERALL, A\*\*2\)\s*:\s*(.+))", 1, "refine", { "B_iso_mean" } },
+/* 34 */	{ R"(OVERALL ANISOTROPIC B VALUE.)", 1 },
+/* 35 */	{ R"(B11 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][1]" } },
+/* 36 */	{ R"(B22 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][2]" } },
+/* 37 */	{ R"(B33 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[3][3]" } },
+/* 38 */	{ R"(B12 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][2]" } },
+/* 39 */	{ R"(B13 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][3]" } },
+/* 40 */	{ R"(B23 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][3]" } },
+/* 41 */	{ R"(ESTIMATED OVERALL COORDINATE ERROR.)", 1 },
+/* 42 */	{ R"(ESU BASED ON R VALUE(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine", { "pdbx_overall_ESU_R" } },
+/* 43 */	{ R"(ESU BASED ON FREE R VALUE(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine", { "pdbx_overall_ESU_R_Free" } },
+/* 44 */	{ R"(ESU BASED ON MAXIMUM LIKELIHOOD(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine", { "overall_SU_ML" } },
+/* 45 */	{ R"(ESU FOR B VALUES BASED ON MAXIMUM LIKELIHOOD \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "overall_SU_B" } },
+/* 46 */	{ R"(CORRELATION COEFFICIENTS.)", 1 },
+/* 47 */	{ R"(CORRELATION COEFFICIENT FO-FC\s*:\s*(.+))", 1, "refine", { "correlation_coeff_Fo_to_Fc" } },
+/* 48 */	{ R"(CORRELATION COEFFICIENT FO-FC FREE\s*:\s*(.+))", 1, "refine", { "correlation_coeff_Fo_to_Fc_free" } },
+/* 49 */	{ R"(RMS DEVIATIONS FROM IDEAL VALUES COUNT RMS WEIGHT)", 1 },
+/* 50 */	{ R"(BOND LENGTHS REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_bond_refined_d", false },
+/* 51 */	{ R"(BOND LENGTHS OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_bond_other_d", false },
+/* 52 */	{ R"(BOND ANGLES REFINED ATOMS \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_angle_refined_deg", false },
+/* 53 */	{ R"(BOND ANGLES OTHERS \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_angle_other_deg", false },
+/* 54 */	{ R"(TORSION ANGLES, PERIOD 1 \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_dihedral_angle_1_deg", false },
+/* 55 */	{ R"(TORSION ANGLES, PERIOD 2 \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_dihedral_angle_2_deg", false },
+/* 56 */	{ R"(TORSION ANGLES, PERIOD 3 \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_dihedral_angle_3_deg", false },
+/* 57 */	{ R"(TORSION ANGLES, PERIOD 4 \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_dihedral_angle_4_deg", false },
+/* 58 */	{ R"(CHIRAL-CENTER RESTRAINTS \(A\*\*3\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_chiral_restr", false },
+/* 59 */	{ R"(GENERAL PLANES REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_gen_planes_refined", false },
+/* 60 */	{ R"(GENERAL PLANES OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_gen_planes_other", false },
+/* 61 */	{ R"(NON-BONDED CONTACTS REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_nbd_refined", false },
+/* 62 */	{ R"(NON-BONDED CONTACTS OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_nbd_other", false },
+/* 63 */	{ R"(NON-BONDED TORSION REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_nbtor_refined", false },
+/* 64 */	{ R"(NON-BONDED TORSION OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_nbtor_other", false },
+/* 65 */	{ R"(H-BOND \(X...Y\) REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_xyhbond_nbd_refined", false },
+/* 66 */	{ R"(H-BOND \(X...Y\) OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_xyhbond_nbd_other", false },
+/* 67 */	{ R"(POTENTIAL METAL-ION REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_metal_ion_refined", false },
+/* 68 */	{ R"(POTENTIAL METAL-ION OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_metal_ion_other", false },
+/* 69 */	{ R"(SYMMETRY VDW REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_symmetry_vdw_refined", false },
+/* 70 */	{ R"(SYMMETRY VDW OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_symmetry_vdw_other", false },
+/* 71 */	{ R"(SYMMETRY H-BOND REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_symmetry_hbond_refined", false },
+/* 72 */	{ R"(SYMMETRY H-BOND OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_symmetry_hbond_other", false },
+/* 73 */	{ R"(SYMMETRY METAL-ION REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_symmetry_metal_ion_refined", false },
+/* 74 */	{ R"(SYMMETRY METAL-ION OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_symmetry_metal_ion_other", false },
+/* 75 */	{ R"(ISOTROPIC THERMAL FACTOR RESTRAINTS. COUNT RMS WEIGHT)", 1 },
+/* 76 */	{ R"(MAIN-CHAIN BOND REFINED ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_mcbond_it", false },
+/* 77 */	{ R"(MAIN-CHAIN BOND OTHER ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_mcbond_other", false },
+/* 78 */	{ R"(MAIN-CHAIN ANGLE REFINED ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_mcangle_it", false },
+/* 79 */	{ R"(MAIN-CHAIN ANGLE OTHER ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_mcangle_other", false },
+/* 80 */	{ R"(SIDE-CHAIN BOND REFINED ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_scbond_it", false },
+/* 81 */	{ R"(SIDE-CHAIN BOND OTHER ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_scbond_other", false },
+/* 82 */	{ R"(SIDE-CHAIN ANGLE REFINED ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_scangle_it", false },
+/* 83 */	{ R"(SIDE-CHAIN ANGLE OTHER ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_scangle_other", false },
+/* 84 */	{ R"(LONG RANGE B REFINED ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_long_range_B_refined", false },
+/* 85 */	{ R"(LONG RANGE B OTHER ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_long_range_B_other", false },
+/* 86 */	{ R"(ANISOTROPIC THERMAL FACTOR RESTRAINTS. COUNT RMS WEIGHT)", 1 },
+/* 87 */	{ R"(RIGID-BOND RESTRAINTS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_rigid_bond_restr", false },
+/* 88 */	{ R"(SPHERICITY; FREE ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_sphericity_free", false },
+/* 89 */	{ R"(SPHERICITY; BONDED ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_sphericity_bonded", false },
+		//	Simply ignore NCS, you can ask Robbie why
+/* 90 */	{ R"(NCS RESTRAINTS STATISTICS)", 1 },
+/* 91 */	{ R"(NUMBER OF DIFFERENT NCS GROUPS\s*:\s*(.+))", 1 },
+/* 92 */	{ R"(NCS GROUP NUMBER\s*:\s*(\d+))", 1, /*"struct_ncs_dom", { "pdbx_ens_id" }*/ },
+/* 93 */	{ R"(CHAIN NAMES\s*:\s*(.+))", 1, /*"struct_ncs_dom", { "details" }*/ },
+/* 94 */	{ R"(NUMBER OF COMPONENTS NCS GROUP\s*:\s*(\d+))", 1 },
+/* 95 */	{ R"(COMPONENT C SSSEQI TO C SSSEQI CODE)", 1 },
+		//// This sucks.... The following line is fixed format
+/* 97 */	{ R"((\d+)\s+(.)\s+(\d+)(.)\s+(.)\s+(\d+)(.)\s+(.+))", 0 },//, "struct_ncs_dom_lim", { "pdbx_component_id", "beg_auth_asym_id", "beg_auth_seq_id", "beg_auth_icode", "end_auth_asym_id", "end_auth_seq_id", "end_auth_icode", "pdbx_refine_code" }, {}, 1 },
+/* 98 */	{ R"((\d+)\s+(.)\s+(\d+)\s+(.)\s+(\d+)\s+(.+))", 0 },//, "struct_ncs_dom_lim", { "pdbx_component_id", "beg_auth_asym_id", "beg_auth_seq_id", "end_auth_asym_id", "end_auth_seq_id", "pdbx_refine_code" }, {}, 1 },
+/* 96 */	{ R"(GROUP CHAIN COUNT RMS WEIGHT)", 1 }, /*, "refine_ls_restr_ncs", { "pdbx_type", "dom_id", "pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position", }*/
+/* 99 */	{ R"(TIGHT POSITIONAL\s+\d+\s+(.)\s+\(A\):\s+(\d+)\s*;\s*(\d+(?:\.\d*)?)\s*;\s*(\d+(?:\.\d*)?))", 0 },// , "refine_ls_restr_ncs", {"pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position"}, { "pdbx_type", "tight positional"}, 1 },
+/* 100 */	{ R"(MEDIUM POSITIONAL\s+\d+\s+(.)\s+\(A\):\s+(\d+)\s*;\s*(\d+(?:\.\d*)?)\s*;\s*(\d+(?:\.\d*)?))", 0 },// , "refine_ls_restr_ncs", {"pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position"}, { "pdbx_type", "medium positional"}, 1 },
+/* 101 */	{ R"(LOOSE POSITIONAL\s+\d+\s+(.)\s+\(A\):\s+(\d+)\s*;\s*(\d+(?:\.\d*)?)\s*;\s*(\d+(?:\.\d*)?))", 0 },// , "refine_ls_restr_ncs", {"pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position"}, { "pdbx_type", "loose positional"}, 1 },
+/* 102 */	{ R"(TIGHT THERMAL\s+\d+\s+(.)\s+\(A\*\*2\):\s+(\d+)\s*;\s*(\d+(?:\.\d*)?)\s*;\s*(\d+(?:\.\d*)?))", 0 },// , "refine_ls_restr_ncs", {"pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position"}, { "pdbx_type", "tight thermal", }, 1 },
+/* 103 */	{ R"(MEDIUM THERMAL\s+\d+\s+(.)\s+\(A\*\*2\):\s+(\d+)\s*;\s*(\d+(?:\.\d*)?)\s*;\s*(\d+(?:\.\d*)?))", 0 },// , "refine_ls_restr_ncs", {"pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position"}, { "pdbx_type", "medium thermal", }, 1 },
+/* 104 */	{ R"(LOOSE THERMAL\s+\d+\s+(.)\s+\(A\*\*2\):\s+(\d+)\s*;\s*(\d+(?:\.\d*)?)\s*;\s*(\d+(?:\.\d*)?))", 0 },// , "refine_ls_restr_ncs", {"pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position"}, { "pdbx_type", "loose thermal", }, 10 },
+/* 105 */	{ R"(NCS GROUP NUMBER\s*:\s*(\d+))", 93 - 105, /*"struct_ncs_dom", { "pdbx_ens_id" }*/ },
+/* 106 */	{ R"(TWIN DETAILS)", 1 },
+/* 107 */	{ R"(NUMBER OF TWIN DOMAINS\s*:\s*(\d*))", 1 },
+/* 108 */	{ R"(TWIN DOMAIN\s*:\s*(.+))", 1, "pdbx_reflns_twin", { "domain_id" }, nullptr, true },
+/* 109 */	{ R"(TWIN OPERATOR\s*:\s*(.+))", 1, "pdbx_reflns_twin", { "operator" } },
+/* 110 */	{ R"(TWIN FRACTION\s*:\s*(.+))", 108 - 110, "pdbx_reflns_twin", { "fraction" } },
+/* 111 */	{ R"(TLS DETAILS)", 1 },
+/* 112 */	{ R"(NUMBER OF TLS GROUPS\s*:\s*(.+))", 1 },
+/* 113 */	{ R"(TLS GROUP\s*:\s*(.+))", 1, "pdbx_refine_tls", { "id" }, nullptr, true },
+/* 114 */	{ R"(NUMBER OF COMPONENTS GROUP\s*:\s*(.+))", 1 },
+/* 115 */	{ R"(COMPONENTS C SSSEQI TO C SSSEQI)", 1 },
+/* 116 */	{ R"(RESIDUE RANGE\s*:\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+))", 0, "pdbx_refine_tls_group", { "beg_auth_asym_id", "beg_auth_seq_id", "end_auth_asym_id", "end_auth_seq_id" }, nullptr, true },
+/* 117 */	{ R"(ORIGIN FOR THE GROUP(?:\s*\(A\))?\s*:\s*(\S+)\s+(\S+)\s+(\S+))", 1, "pdbx_refine_tls", { "origin_x", "origin_y", "origin_z" } },
+/* 118 */	{ R"(T TENSOR)", 1 },
+/* 119 */	{ R"(T11\s*:\s*(.+) T22\s*:\s*(.+))", 1, "pdbx_refine_tls", { "T[1][1]", "T[2][2]" } },
+/* 120 */	{ R"(T33\s*:\s*(.+) T12\s*:\s*(.+))", 1, "pdbx_refine_tls", { "T[3][3]", "T[1][2]" } },
+/* 121 */	{ R"(T13\s*:\s*(.+) T23\s*:\s*(.+))", 1, "pdbx_refine_tls", { "T[1][3]", "T[2][3]" } },
+/* 122 */	{ R"(L TENSOR)", 1 },
+/* 123 */	{ R"(L11\s*:\s*(.+) L22\s*:\s*(.+))", 1, "pdbx_refine_tls", { "L[1][1]", "L[2][2]" } },
+/* 124 */	{ R"(L33\s*:\s*(.+) L12\s*:\s*(.+))", 1, "pdbx_refine_tls", { "L[3][3]", "L[1][2]" } },
+/* 125 */	{ R"(L13\s*:\s*(.+) L23\s*:\s*(.+))", 1, "pdbx_refine_tls", { "L[1][3]", "L[2][3]" } },
+/* 126 */	{ R"(S TENSOR)", 1 },
+/* 127 */	{ R"(S11\s*:\s*(.+) S12\s*:\s*(.+) S13\s*:\s*(.+))", 1, "pdbx_refine_tls", { "S[1][1]", "S[1][2]", "S[1][3]" } },
+/* 128 */	{ R"(S21\s*:\s*(.+) S22\s*:\s*(.+) S23\s*:\s*(.+))", 1, "pdbx_refine_tls", { "S[2][1]", "S[2][2]", "S[2][3]" } },
+/* 129 */	{ R"(S31\s*:\s*(.+) S32\s*:\s*(.+) S33\s*:\s*(.+))", 113 - 129, "pdbx_refine_tls", { "S[3][1]", "S[3][2]", "S[3][3]" } },
+/* 130 */	{ R"(BULK SOLVENT MODELLING.)", 1 },
+/* 131 */	{ R"(METHOD USED\s*:\s*(.+))", 1, "refine", { "solvent_model_details" } },
+/* 132 */	{ R"(PARAMETERS FOR MASK CALCULATION)", 1 },
+/* 133 */	{ R"(VDW PROBE RADIUS\s*:\s*(.+))", 1, "refine", { "pdbx_solvent_vdw_probe_radii" } },
+/* 134 */	{ R"(ION PROBE RADIUS\s*:\s*(.+))", 1, "refine", { "pdbx_solvent_ion_probe_radii" } },
+/* 135 */	{ R"(SHRINKAGE RADIUS\s*:\s*(.+))", 1, "refine", { "pdbx_solvent_shrinkage_radii" } },
+};
+
+class REFMAC5_Remark3Parser : public Remark3Parser
+{
+  public:
+	REFMAC5_Remark3Parser(const string& name, const string& expMethod, PDBRecord* r, cif::datablock& db)
+		: Remark3Parser(name, expMethod, r, db, kREFMAC5_Template, sizeof(kREFMAC5_Template) / sizeof(TemplateLine),
+			regex(R"((REFMAC)(?: (\d+(?:\..+)?))?)")) {}
+};
+
+const TemplateLine kSHELXL_Template[] = {
+/* 0 */		{ R"(DATA USED IN REFINEMENT\.)", 1 },
+/* 1 */		{ R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
+/* 2 */		{ R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
+/* 3 */		{ R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
+/* 4 */		{ R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
+/* 5 */		{ R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
+/* 6 */		{ R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
+/* 7 */		{ R"(FIT TO DATA USED IN REFINEMENT \(NO CUTOFF\)\.)", 1 },
+/* 8 */		{ R"(R VALUE \(WORKING \+ TEST SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_all_no_cutoff" } },
+/* 9 */		{ R"(R VALUE \(WORKING SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_obs_no_cutoff" } },
+/* 10 */	{ R"(FREE R VALUE \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_factor_no_cutoff" } },
+/* 11 */	{ R"(FREE R VALUE TEST SET SIZE \(%, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_size_perc_no_cutoff" } },
+/* 12 */	{ R"(FREE R VALUE TEST SET COUNT \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_ct_no_cutoff" } },
+/* 13 */	{ R"(TOTAL NUMBER OF REFLECTIONS \(NO CUTOFF\)\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_all" } },
+/* 14 */	{ R"(FIT/AGREEMENT OF MODEL FOR DATA WITH F>4SIG\(F\)\.)", 1 },
+/* 15 */	{ R"(R VALUE \(WORKING \+ TEST SET, F>4SIG\(F\)\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_all_4sig_cutoff" } },
+/* 16 */	{ R"(R VALUE \(WORKING SET, F>4SIG\(F\)\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_obs_4sig_cutoff" } },
+/* 17 */	{ R"(FREE R VALUE \(F>4SIG\(F\)\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_factor_4sig_cutoff" } },
+/* 18 */	{ R"(FREE R VALUE TEST SET SIZE \(%, F>4SIG\(F\)\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_size_perc_4sig_cutoff" } },
+/* 19 */	{ R"(FREE R VALUE TEST SET COUNT \(F>4SIG\(F\)\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_ct_4sig_cutoff" } },
+/* 20 */	{ R"(TOTAL NUMBER OF REFLECTIONS \(F>4SIG\(F\)\)\s*:\s*(.+))", 1, "pdbx_refine", { "number_reflns_obs_4sig_cutoff" } },
+/* 21 */	{ R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
+/* 22 */	{ R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
+/* 23 */	{ R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
+/* 24 */	{ R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
+/* 25 */	{ R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
+/* 26 */	{ R"(MODEL REFINEMENT\.)", 1 },
+/* 27 */	{ R"(OCCUPANCY SUM OF NON-HYDROGEN ATOMS\s*:\s*(.+))", 1, "refine_analyze", { "occupancy_sum_non_hydrogen" } },
+/* 28 */	{ R"(OCCUPANCY SUM OF HYDROGEN ATOMS\s*:\s*(.+))", 1, "refine_analyze", { "occupancy_sum_hydrogen" } },
+/* 29 */	{ R"(NUMBER OF DISCRETELY DISORDERED RESIDUES\s*:\s*(.+))", 1, "refine_analyze", { "number_disordered_residues" } },
+/* 30 */	{ R"(NUMBER OF LEAST-SQUARES PARAMETERS\s*:\s*(.+))", 1, "refine", { "ls_number_parameters" } },
+/* 31 */	{ R"(NUMBER OF RESTRAINTS\s*:\s*(.+))", 1, "refine", { "ls_number_restraints" } },
+/* 32 */	{ R"(RMS DEVIATIONS FROM RESTRAINT TARGET VALUES\.)", 1 },
+/* 33 */	{ R"(BOND LENGTHS \(A\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_bond_d", false },
+/* 34 */	{ R"(ANGLE DISTANCES \(A\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_angle_d", false },
+/* 35 */	{ R"(SIMILAR DISTANCES \(NO TARGET VALUES\) \(A\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_similar_dist", false },
+/* 36 */	{ R"(DISTANCES FROM RESTRAINT PLANES \(A\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_from_restr_planes", false },
+/* 37 */	{ R"(ZERO CHIRAL VOLUMES \(A\*\*3\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_zero_chiral_vol", false },
+/* 38 */	{ R"(NON-ZERO CHIRAL VOLUMES \(A\*\*3\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_non_zero_chiral_vol", false },
+/* 39 */	{ R"(ANTI-BUMPING DISTANCE RESTRAINTS \(A\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_anti_bump_dis_restr", false },
+/* 40 */	{ R"(RIGID-BOND ADP COMPONENTS \(A\*\*2\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_rigid_bond_adp_cmpnt", false },
+/* 41 */	{ R"(SIMILAR ADP COMPONENTS \(A\*\*2\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_similar_adp_cmpnt", false },
+/* 42 */	{ R"(APPROXIMATELY ISOTROPIC ADPS \(A\*\*2\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_approx_iso_adps", false },
+/* 43 */	{ R"(BULK SOLVENT MODELING\.)", 1 },
+/* 44 */	{ R"(METHOD USED\s*:\s*(.+))", 1, "refine", { "solvent_model_details" } },
+/* 45 */	{ R"(STEREOCHEMISTRY TARGET VALUES\s*:\s*(.+))", 1, "refine", { "pdbx_stereochemistry_target_values" } },
+/* 46 */	{ R"(SPECIAL CASE\s*:\s*(.+))", 1, "refine", { "pdbx_stereochem_target_val_spec_case" } },
+};
+
+class SHELXL_Remark3Parser : public Remark3Parser
+{
+  public:
+	SHELXL_Remark3Parser(const string& name, const string& expMethod, PDBRecord* r, cif::datablock& db)
+		: Remark3Parser(name, expMethod, r, db, kSHELXL_Template, sizeof(kSHELXL_Template) / sizeof(TemplateLine),
+			regex(R"((SHELXL)(?:-(\d+(?:\..+)?)))")) {}
+};
+
+const TemplateLine kTNT_Template[] = {
+/* 0 */		{ R"(DATA USED IN REFINEMENT\.)", 1 },
+/* 1 */		{ R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
+/* 2 */		{ R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
+/* 3 */		{ R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
+/* 4 */		{ R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
+/* 5 */		{ R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
+/* 6 */		{ R"(USING DATA ABOVE SIGMA CUTOFF\.)", 1 },
+/* 7 */		{ R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
+/* 8 */		{ R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
+/* 9 */		{ R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
+/* 10 */	{ R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
+/* 11 */	{ R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
+/* 12 */	{ R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
+/* 13 */	{ R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
+/* 14 */	{ R"(USING ALL DATA, NO SIGMA CUTOFF\.)", 1 },
+/* 15 */	{ R"(R VALUE \(WORKING \+ TEST SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_all_no_cutoff" } },
+/* 16 */	{ R"(R VALUE \(WORKING SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_obs_no_cutoff" } },
+/* 17 */	{ R"(FREE R VALUE \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_factor_no_cutoff" } },
+/* 18 */	{ R"(FREE R VALUE TEST SET SIZE \(%, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_size_perc_no_cutoff" } },
+/* 19 */	{ R"(FREE R VALUE TEST SET COUNT \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_ct_no_cutoff" } },
+/* 20 */	{ R"(TOTAL NUMBER OF REFLECTIONS \(NO CUTOFF\)\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_all" } },
+/* 21 */	{ R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
+/* 22 */	{ R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
+/* 23 */	{ R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
+/* 24 */	{ R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
+/* 25 */	{ R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
+/* 26 */	{ R"(WILSON B VALUE \(FROM FCALC, A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
+/* 27 */	{ R"(RMS DEVIATIONS FROM IDEAL VALUES\. RMS WEIGHT COUNT)", 1 },
+/* 28 */	{ R"(BOND LENGTHS \(A\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_bond_d", false },
+/* 29 */	{ R"(BOND ANGLES \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_angle_deg", false },
+/* 30 */	{ R"(TORSION ANGLES \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_dihedral_angle_d", false },
+/* 31 */	{ R"(PSEUDOROTATION ANGLES \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_pseud_angle", false },
+/* 32 */	{ R"(TRIGONAL CARBON PLANES \(A\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_trig_c_planes", false },
+/* 33 */	{ R"(GENERAL PLANES \(A\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_gen_planes", false },
+/* 34 */	{ R"(ISOTROPIC THERMAL FACTORS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_it", false },
+/* 35 */	{ R"(NON-BONDED CONTACTS \(A\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_nbd", false },
+/* 36 */	{ R"(INCORRECT CHIRAL-CENTERS \(COUNT\)\s*:\s*(.+)\s*)", 1, "refine_ls_restr", { "number" }, "t_incorr_chiral_ct", false },
+/* 37 */	{ R"(BULK SOLVENT MODELING\.)", 1 },
+/* 38 */	{ R"(METHOD USED\s*:\s*(.+))", 1, "refine", { "solvent_model_details" } },
+/* 39 */	{ R"(KSOL\s*:\s*(.+))", 1, "refine", { "solvent_model_param_ksol" } },
+/* 40 */	{ R"(BSOL\s*:\s*(.+))", 1, "refine", { "solvent_model_param_bsol" } },
+/* 41 */	{ R"(RESTRAINT LIBRARIES\.)", 1 },
+/* 42 */	{ R"(STEREOCHEMISTRY\s*:\s*(.+))", 1, "refine", { "pdbx_stereochemistry_target_values" } },
+/* 43 */	{ R"(ISOTROPIC THERMAL FACTOR RESTRAINTS\s*:\s*(.+))", 1, "refine", { "pdbx_isotropic_thermal_model" } },
+};
+
+class TNT_Remark3Parser : public Remark3Parser
+{
+  public:
+	TNT_Remark3Parser(const string& name, const string& expMethod, PDBRecord* r, cif::datablock& db)
+		: Remark3Parser(name, expMethod, r, db, kTNT_Template, sizeof(kTNT_Template) / sizeof(TemplateLine),
+			regex(R"((TNT)(?: V. (\d+.+)?)?)")) {}
+};
+
+const TemplateLine kXPLOR_Template[] = {
+/* 0 */		{ R"(DATA USED IN REFINEMENT\.)", 1 },
+/* 1 */		{ R"(RESOLUTION RANGE HIGH \(ANGSTROMS\) :\s+(.+))", 1, "refine", { "ls_d_res_high" } },
+/* 2 */		{ R"(RESOLUTION RANGE LOW \(ANGSTROMS\) :\s+(.+))", 1, "refine", { "ls_d_res_low" } },
+/* 3 */		{ R"(DATA CUTOFF \(SIGMA\(F\)\) :\s+(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
+/* 4 */		{ R"(DATA CUTOFF HIGH \(ABS\(F\)\) :\s+(.+))", 1, "refine", { "pdbx_data_cutoff_high_absF" } },
+/* 5 */		{ R"(DATA CUTOFF LOW \(ABS\(F\)\) :\s+(.+))", 1, "refine", { "pdbx_data_cutoff_low_absF" } },
+/* 6 */		{ R"(COMPLETENESS \(WORKING\+TEST\) \(%\) :\s+(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
+/* 7 */		{ R"(NUMBER OF REFLECTIONS :\s+(.+))", 1, "refine", { "ls_number_reflns_obs" } },
+/* 8 */		{ R"(FIT TO DATA USED IN REFINEMENT\.)", 1 },
+/* 9 */		{ R"(CROSS-VALIDATION METHOD :\s+(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
+/* 10 */	{ R"(FREE R VALUE TEST SET SELECTION :\s+(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
+/* 11 */	{ R"(R VALUE \(WORKING SET\) :\s+(.+))", 1, "refine", { "ls_R_factor_R_work" } },
+/* 12 */	{ R"(FREE R VALUE :\s+(.+))", 1, "refine", { "ls_R_factor_R_free" } },
+/* 13 */	{ R"(FREE R VALUE TEST SET SIZE \(%\) :\s+(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
+/* 14 */	{ R"(FREE R VALUE TEST SET COUNT :\s+(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
+/* 15 */	{ R"(ESTIMATED ERROR OF FREE R VALUE :\s+(.+))", 1, "refine", { "ls_R_factor_R_free_error" } },
+/* 16 */	{ R"(FIT IN THE HIGHEST RESOLUTION BIN\.)", 1 },
+/* 17 */	{ R"(TOTAL NUMBER OF BINS USED :\s+(.+))", 1, "refine_ls_shell", { "pdbx_total_number_of_bins_used" } },
+/* 18 */	{ R"(BIN RESOLUTION RANGE HIGH \(A\) :\s+(.+))", 1, "refine_ls_shell", { "d_res_high" } },
+/* 19 */	{ R"(BIN RESOLUTION RANGE LOW \(A\) :\s+(.+))", 1, "refine_ls_shell", { "d_res_low" } },
+/* 20 */	{ R"(BIN COMPLETENESS \(WORKING\+TEST\) \(%\) :\s+(.+))", 1, "refine_ls_shell", { "percent_reflns_obs" } },
+/* 21 */	{ R"(REFLECTIONS IN BIN \(WORKING SET\) :\s+(.+))", 1, "refine_ls_shell", { "number_reflns_R_work" } },
+/* 22 */	{ R"(BIN R VALUE \(WORKING SET\) :\s+(.+))", 1, "refine_ls_shell", { "R_factor_R_work" } },
+/* 23 */	{ R"(BIN FREE R VALUE :\s+(.+))", 1, "refine_ls_shell", { "R_factor_R_free" } },
+/* 24 */	{ R"(BIN FREE R VALUE TEST SET SIZE \(%\) :\s+(.+))", 1, "refine_ls_shell", { "percent_reflns_R_free" } },
+/* 25 */	{ R"(BIN FREE R VALUE TEST SET COUNT :\s+(.+))", 1, "refine_ls_shell", { "number_reflns_R_free" } },
+/* 26 */	{ R"(ESTIMATED ERROR OF BIN FREE R VALUE :\s+(.+))", 1, "refine_ls_shell", { "R_factor_R_free_error" } },
+/* 27 */	{ R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
+/* 28 */	{ R"(PROTEIN ATOMS :\s+(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
+/* 29 */	{ R"(NUCLEIC ACID ATOMS :\s+(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
+/* 30 */	{ R"(HETEROGEN ATOMS :\s+(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
+/* 31 */	{ R"(SOLVENT ATOMS :\s+(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
+/* 32 */	{ R"(B VALUES\.)", 1 },
+/* 33 */	{ R"(B VALUE TYPE :\s+(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
+/* 34 */	{ R"(FROM WILSON PLOT \(A\*\*2\) :\s+(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
+/* 35 */	{ R"(MEAN B VALUE \(OVERALL, A\*\*2\) :\s+(.+))", 1, "refine", { "B_iso_mean" } },
+/* 36 */	{ R"(OVERALL ANISOTROPIC B VALUE\.)", 1 },
+/* 37 */	{ R"(B11 \(A\*\*2\) :\s+(.+))", 1, "refine", { "aniso_B[1][1]" } },
+/* 38 */	{ R"(B22 \(A\*\*2\) :\s+(.+))", 1, "refine", { "aniso_B[2][2]" } },
+/* 39 */	{ R"(B33 \(A\*\*2\) :\s+(.+))", 1, "refine", { "aniso_B[3][3]" } },
+/* 40 */	{ R"(B12 \(A\*\*2\) :\s+(.+))", 1, "refine", { "aniso_B[1][2]" } },
+/* 41 */	{ R"(B13 \(A\*\*2\) :\s+(.+))", 1, "refine", { "aniso_B[1][3]" } },
+/* 42 */	{ R"(B23 \(A\*\*2\) :\s+(.+))", 1, "refine", { "aniso_B[2][3]" } },
+/* 43 */	{ R"(ESTIMATED COORDINATE ERROR\.)", 1 },
+/* 44 */	{ R"(ESD FROM LUZZATI PLOT \(A\) :\s+(.+))", 1, "refine_analyze", { "Luzzati_coordinate_error_obs" } },
+/* 45 */	{ R"(ESD FROM SIGMAA \(A\) :\s+(.+))", 1, "refine_analyze", { "Luzzati_sigma_a_obs" } },
+/* 46 */	{ R"(LOW RESOLUTION CUTOFF \(A\) :\s+(.+))", 1, "refine_analyze", { "Luzzati_d_res_low_obs" } },
+/* 47 */	{ R"(CROSS-VALIDATED ESTIMATED COORDINATE ERROR\.)", 1 },
+/* 48 */	{ R"(ESD FROM C-V LUZZATI PLOT \(A\) :\s+(.+))", 1, "refine_analyze", { "Luzzati_coordinate_error_free" } },
+/* 49 */	{ R"(ESD FROM C-V SIGMAA \(A\) :\s+(.+))", 1, "refine_analyze", { "Luzzati_sigma_a_free" } },
+/* 50 */	{ R"(RMS DEVIATIONS FROM IDEAL VALUES\..*)", 1 },
+/* 51 */	{ R"(BOND LENGTHS \(A\) :\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_bond_d", false },
+/* 52 */	{ R"(BOND ANGLES \(DEGREES\) :\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_angle_deg", false },
+/* 53 */	{ R"(DIHEDRAL ANGLES \(DEGREES\) :\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_dihedral_angle_d", false },
+/* 54 */	{ R"(IMPROPER ANGLES \(DEGREES\) :\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_improper_angle_d", false },
+/* 55 */	{ R"(ISOTROPIC THERMAL MODEL :\s+(.+))", 1, "refine", { "pdbx_isotropic_thermal_model" } },
+/* 56 */	{ R"(ISOTROPIC THERMAL FACTOR RESTRAINTS\. RMS SIGMA)", 1 },
+/* 57 */	{ R"(MAIN-CHAIN BOND \(A\*\*2\) :\s+(.+?);\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_mcbond_it", false },
+/* 58 */	{ R"(MAIN-CHAIN ANGLE \(A\*\*2\) :\s+(.+?);\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_mcangle_it", false },
+/* 59 */	{ R"(SIDE-CHAIN BOND \(A\*\*2\) :\s+(.+?);\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_scbond_it", false },
+/* 60 */	{ R"(SIDE-CHAIN ANGLE \(A\*\*2\) :\s+(.+?);\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_scangle_it", false },
+/* 61 */	{ R"(NCS MODEL :\s+(.+))", 1, /* "refine_ls_restr_ncs", { "ncs_model_details" } */ },
+/* 62 */	{ R"(NCS RESTRAINTS\. RMS SIGMA/WEIGHT)", 1 },
+/* 63 */	{ R"(GROUP (\d+) POSITIONAL \(A\) :\s+(.+?);\s+(.+))", 1, /* "refine_ls_restr_ncs", { ":dom_id", "rms_dev_position", "weight_position" } */ },
+/* 64 */	{ R"(GROUP (\d+) B-FACTOR \(A\*\*2\) :\s+(.+?);\s+(.+))", 63 - 64, /* "refine_ls_restr_ncs", { ":dom_id", "rms_dev_B_iso", "weight_B_iso" } */ },
+/* 65 */	{ R"(PARAMETER FILE (\d+) :\s+(.+))", 0, /* "pdbx_xplor_file", { "serial_no", "param_file" } */ },
+/* 66 */	{ R"(TOPOLOGY FILE (\d+) :\s+(.+))", 0, /* "pdbx_xplor_file", { "serial_no", "topol_file" } */ },
+};
+
+class XPLOR_Remark3Parser : public Remark3Parser
+{
+  public:
+	XPLOR_Remark3Parser(const string& name, const string& expMethod, PDBRecord* r, cif::datablock& db)
+		: Remark3Parser(name, expMethod, r, db, kXPLOR_Template, sizeof(kXPLOR_Template) / sizeof(TemplateLine),
+			regex(R"((X-PLOR)(?: (\d+(?:\.\d+)?))?)")) {}
+};
+
+// --------------------------------------------------------------------
+
+Remark3Parser::Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::datablock& db,
+			const TemplateLine templatelines[], uint32 templateLineCount, std::regex program_version)
+	: m_name(name), m_expMethod(expMethod), m_rec(r), m_db(db.name())
+	, m_template(templatelines), m_templateCount(templateLineCount), m_program_version(program_version)
+{
+}
+
+string Remark3Parser::NextLine()
+{
+	m_line.clear();
+
+	while (m_rec != nullptr and m_rec->is("REMARK   3"))
+	{
+		size_t valueIndent = 0;
+		for (size_t i = 4; i < m_rec->m_vlen; ++i)
+		{
+			if (m_rec->m_value[i] == ' ')
+				continue;
+
+			if (m_rec->m_value[i] == ':')
+			{
+				valueIndent = i;
+				while (valueIndent < m_rec->m_vlen and m_rec->m_value[i] == ' ')
+					++valueIndent;
+				break;
+			}
+		}
+
+		m_line = m_rec->v_s(12);
+		m_rec = m_rec->m_next;
+
+		if (m_line.empty())
+			continue;
+
+		// concatenate value that is wrapped over multiple lines (tricky code...)
+
+		if (valueIndent > 4)
+		{
+			string indent(valueIndent - 4, ' ');
+
+			while (m_rec->is("REMARK   3") and m_rec->m_vlen > valueIndent)
+			{
+				string v(m_rec->m_value + 4, m_rec->m_value + m_rec->m_vlen);
+				if (not ba::starts_with(v, indent))
+					break;
+
+				m_line += ' ';
+				m_line.append(m_rec->m_value + valueIndent, m_rec->m_value + m_rec->m_vlen);
+
+				m_rec = m_rec->m_next;
+			}
+		}
+
+
+		// collapse multiple spaces
+		bool space = false;
+		auto i = m_line.begin(), j = i;
+
+		while (i != m_line.end())
+		{
+			bool nspace = isspace(*i);
+
+			if (nspace == false)
+			{
+				if (space)
+					*j++ = ' ';
+				*j++ = *i;
+			}
+			space = nspace;
+			++i;
+		}
+		m_line.erase(j, m_line.end());
+
+		break;
+	}
+
+	if (VERBOSE >= 2)
+		cerr << "RM3: " << m_line << endl;
+
+	return m_line;
+}
+
+bool Remark3Parser::Match(const char* expr, int nextState)
+{
+	regex rx(expr);
+
+	bool result = regex_match(m_line, m_m, rx);
+
+	if (result)
+		m_state = nextState;
+	else if (VERBOSE >= 3)
+		cerr << kRedOn << "No Match:" << kRedOff << " '" << expr << '\'' << endl;
+
+	return result;
+}
+
+float Remark3Parser::Parse()
+{
+	int lineCount = 0, dropped = 0;
+	string remarks;
+	m_state = 0;
+
+	while (m_rec != nullptr)
+	{
+		NextLine();
+
+		if (m_line.empty())
+			break;
+
+		++lineCount;
+
+		// Skip over AUTHORS lines
+		if (m_state == 0 and Match(R"(AUTHORS\s*:.+)", 0))
+			continue;
+
+		auto state = m_state;
+		for (state = m_state; state < m_templateCount; ++state)
+		{
+			const TemplateLine& tmpl = m_template[state];
+
+			if (Match(tmpl.rx, state + tmpl.next_state_offset))
+			{
+				if (not (tmpl.category == nullptr or tmpl.items.size() == 0))
+				{
+					if (tmpl.ls_restr_type == nullptr)
+						StoreCapture(tmpl.category, tmpl.items, tmpl.create_new);
+					else if (tmpl.create_new)
+						StoreRefineLsRestr(tmpl.ls_restr_type, tmpl.items);
+					else
+						UpdateRefineLsRestr(tmpl.ls_restr_type, tmpl.items);
+				}
+				break;
+			}
+		}
+
+		if (state < m_templateCount)
+			continue;
+
+		if (state == m_templateCount and Match(R"(OTHER REFINEMENT REMARKS\s*:\s*(.*))", m_templateCount + 1))
+		{
+			remarks = m_m[1].str();
+			continue;
+		}
+
+		if (state == m_templateCount + 1)
+		{
+			remarks = remarks + '\n' + m_line;
+			continue;
+		}
+
+		if (VERBOSE >= 2)
+			cerr << kRedOn << "Dropping line:" << kRedOff << " '" << m_line << '\'' << endl;
+
+		++dropped;
+	}
+
+	if (not remarks.empty() and not iequals(remarks, "NULL"))
+		m_db["refine"].front()["details"] = remarks;
+
+	float score = float(lineCount - dropped) / lineCount;
+
+	return score;
+}
+
+string Remark3Parser::Program()
+{
+	string result = m_name;
+
+	smatch m;
+	if (regex_match(m_name, m, m_program_version))
+		result = m[1].str();
+
+	return result;
+}
+
+string Remark3Parser::Version()
+{
+	string result;
+
+	smatch m;
+	if (regex_match(m_name, m, m_program_version))
+		result = m[2].str();
+
+	return result;
+}
+
+void Remark3Parser::StoreCapture(const char* category, initializer_list<const char*> items, bool createNew)
+{
+	int capture = 0;
+	for (auto item: items)
+	{
+		++capture;
+
+		string value = m_m[capture].str();
+		ba::trim(value);
+
+		if (iequals(value, "NULL") or iequals(value, "NONE"))
+			continue;
+
+		if (VERBOSE >= 3)
+			cerr << "storing: '" << value << "' in _" << category << '.' << item << endl;
+
+		auto& cat = m_db[category];
+		if (cat.empty() or createNew)
+		{
+			if (iequals(category, "refine"))
+				cat.emplace({
+					{ "pdbx_refine_id", m_expMethod },
+					{ "entry_id", m_db.name() },
+#warning("???")
+					{ "pdbx_diffrn_id", 1 }
+				});
+			else if (iequals(category, "refine_analyze") or iequals(category, "pdbx_refine"))
+				cat.emplace({
+					{ "pdbx_refine_id", m_expMethod },
+					{ "entry_id", m_db.name() },
+//					{ "pdbx_diffrn_id", 1 }
+				});
+			else if (iequals(category, "refine_hist"))
+			{
+				string d_res_high, d_res_low;
+				for (auto r: m_db["refine"])
+				{
+					cif::tie(d_res_high, d_res_low) = r.get("ls_d_res_high", "ls_d_res_low");
+					break;
+				}
+
+				cat.emplace({
+					{ "pdbx_refine_id", m_expMethod },
+					{ "cycle_id", "LAST" },
+					{ "d_res_high", d_res_high.empty() ? "." : d_res_high },
+					{ "d_res_low", d_res_low.empty() ? "." : d_res_low }
+				});
+			}
+			else if (iequals(category, "refine_ls_shell"))
+			{
+				cat.emplace({
+					{ "pdbx_refine_id", m_expMethod },
+				});
+			}
+			else if (iequals(category, "pdbx_refine_tls_group"))
+			{
+				string tls_group_id;
+				if (not m_db["pdbx_refine_tls"].empty())
+					tls_group_id = m_db["pdbx_refine_tls"].back()["id"].as<string>();
+
+				cat.emplace({
+					{ "pdbx_refine_id", m_expMethod },
+					{ "id", tls_group_id },
+					{ "refine_tls_id", tls_group_id }
+				});
+			}
+			else if (iequals(category, "pdbx_refine_tls"))
+			{
+				cat.emplace({
+					{ "pdbx_refine_id", m_expMethod },
+					{ "method", "refined" }
+				});
+			}
+//			else if (iequals(category, "struct_ncs_dom"))
+//			{
+//				size_t id = cat.size() + 1;
+//
+//				cat.emplace({
+//					{ "id", id }
+//				});
+//			}
+			else if (iequals(category, "pdbx_reflns_twin"))
+			{
+				cat.emplace({
+#warning("???")
+					{ "crystal_id", 1 },
+					{ "diffrn_id", 1 }
+				});
+			}
+			else
+				cat.emplace({});
+			
+			createNew = false;
+		}
+
+		cat.back()[item] = value;
+	}
+}
+
+void Remark3Parser::StoreRefineLsRestr(const char* type, initializer_list<const char*> items)
+{
+	row r;
+	int capture = 0;
+
+	for (auto item: items)
+	{
+		++capture;
+
+		string value = m_m[capture].str();
+		ba::trim(value);
+		if (value.empty() or iequals(value, "NULL"))
+			continue;
+
+		if (not r)
+		{
+			std::tie(r, std::ignore) = m_db["refine_ls_restr"].emplace({});
+
+			r["pdbx_refine_id"] = m_expMethod;
+			r["type"] = type;
+		}
+
+		r[item] = value;
+	}
+}
+
+void Remark3Parser::UpdateRefineLsRestr(const char* type, initializer_list<const char*> items)
+{
+	auto rows = m_db["refine_ls_restr"].find(cif::key("type") == type and cif::key("pdbx_refine_id") == m_expMethod);
+	if (rows.empty())
+		StoreRefineLsRestr(type, items);
+	else
+	{
+		for (row r: rows)
+		{
+			int capture = 0;
+			for (auto item: items)
+			{
+				++capture;
+
+				string value = m_m[capture].str();
+				ba::trim(value);
+				if (iequals(value, "NULL"))
+					value.clear();
+
+				r[item] = value;
+			}
+
+			break;
+		}
+	}
+}
+
+// --------------------------------------------------------------------
+
+bool Remark3Parser::Parse(const string& expMethod, PDBRecord* r, cif::datablock& db)
+{
+	// simple version, only for the first few lines
+	auto GetNextLine = [&]()
+	{
+		string result;
+
+		while (result.empty() and r != nullptr and r->is("REMARK   3"))
+		{
+			result = r->v_s(12);
+			r = r->m_next;
+		}
+
+		return result;
+	};
+
+	// All remark 3 records should start with the same data.
+
+	string line = GetNextLine();
+
+	if (line != "REFINEMENT.")
+		throw runtime_error("Unexpected data in REMARK 3");
+
+	line = GetNextLine();
+
+	regex rxp(R"(^PROGRAM\s*:\s*(.+))");
+	smatch m;
+
+	if (not regex_match(line, m, rxp))
+		throw runtime_error("Expected valid PROGRAM line in REMARK 3");
+	line = m[1].str();
+
+	struct program_score
+	{
+		program_score(const string& program, Remark3Parser* parser, float score)
+			: program(program), parser(parser), score(score) {}
+
+		string program;
+		unique_ptr<Remark3Parser> parser;
+		float score;
+
+		bool operator<(const program_score& rhs) const
+		{
+			return score > rhs.score;
+		}
+	};
+
+	vector<program_score> scores;
+
+	auto tryParser = [&](Remark3Parser* p)
+	{
+		unique_ptr<Remark3Parser> parser(p);
+		float score = parser->Parse();
+
+		if (VERBOSE >= 2)
+			cerr << "Score for " << parser->Program() << ": " << score << endl;
+
+		if (score > 0)
+		{
+			auto& software = db["software"];
+			string program = parser->Program();
+			string version = parser->Version();
+
+			software.emplace({
+				{ "name", program },
+				{ "classification", "refinement" },
+				{ "version", version },
+				{ "pdbx_ordinal", software.size() + 1 }
+			});
+
+			scores.emplace_back(program, parser.release(), score);
+		}
+	};
+
+	for (auto p = make_split_iterator(line, ba::first_finder(", "));
+		not p.eof(); ++p)
+	{
+		string program(p->begin(), p->end());
+
+		unique_ptr<Remark3Parser> parser;
+
+		if (ba::starts_with(program, "BUSTER"))
+			tryParser(new BUSTER_TNT_Remark3Parser(program, expMethod, r, db));
+		else if (ba::starts_with(program, "CNS") or ba::starts_with(program, "CNX"))
+			tryParser(new CNS_Remark3Parser(program, expMethod, r, db));
+		else if (ba::starts_with(program, "PHENIX"))
+			tryParser(new PHENIX_Remark3Parser(program, expMethod, r, db));
+		else if (ba::starts_with(program, "PROLSQ") or ba::starts_with(program, "NUCLSQ"))
+			tryParser(new PROLSQ_Remark3Parser(program, expMethod, r, db));
+		else if (ba::starts_with(program, "REFMAC"))
+		{
+			// simply try both and take the best
+			tryParser(new REFMAC_Remark3Parser(program, expMethod, r, db));
+			tryParser(new REFMAC5_Remark3Parser(program, expMethod, r, db));
+		}
+		else if (ba::starts_with(program, "SHELXL"))
+			tryParser(new SHELXL_Remark3Parser(program, expMethod, r, db));
+		else if (ba::starts_with(program, "TNT"))
+			tryParser(new TNT_Remark3Parser(program, expMethod, r, db));
+		else if (ba::starts_with(program, "X-PLOR"))
+			tryParser(new XPLOR_Remark3Parser(program, expMethod, r, db));
+		else if (VERBOSE)
+			cerr << "Skipping unknown program (" << program << ") in REMARK 3" << endl;
+	}
+
+	bool result = false;
+
+	if (not scores.empty())
+	{
+		result = true;
+
+		sort(scores.begin(), scores.end());
+
+		auto& best = scores.front();
+
+		if (VERBOSE >= 2)
+			cerr << "Choosing " << best.parser->Program() << " version '" << best.parser->Version() << "' as refinement program. Score = " << best.score << endl;
+
+		best.parser->Fixup();
+
+		for (auto& cat1: best.parser->m_db)
+		{
+			auto& cat2 = db[cat1.name()];
+
+			// copy only the values in the first row for the following categories
+			if (cat1.name() == "reflns" or cat1.name() == "refine")
+			{
+				if (cat2.empty())			// duh... this will generate a validation error anyway...
+					cat2.emplace({});
+
+				row r1 = cat1.front();
+				row r2 = cat2.front();
+
+				for (auto& i: r1)
+					r2[i.name()] = i.value();
+			}
+			else
+			{
+				for (auto r: cat1)
+					cat2.emplace(r);
+			}
+		}
+	}
+
+	return result;
+}
--- a/src/pdb2cif.cpp
+++ b/src/pdb2cif.cpp
--- a/src/point.cpp
+++ b/src/point.cpp
+// Lib for working with structures as contained in mmCIF and PDB files
+
+#include "libcif/point.h"
+
+using namespace std;
+
+namespace libcif
+{
+
+// --------------------------------------------------------------------
+
+quaternion Normalize(quaternion q)
+{
+	valarray<double> t(4);
+	
+	t[0] = q.R_component_1();
+	t[1] = q.R_component_2();
+	t[2] = q.R_component_3();
+	t[3] = q.R_component_4();
+	
+	t *= t;
+	
+	double length = sqrt(t.sum());
+
+	if (length > 0.001)
+		q /= length;
+	else
+		q = quaternion(1, 0, 0, 0);
+
+	return q;
+}
+
+// --------------------------------------------------------------------
+
+float DihedralAngle(const point& p1, const point& p2, const point& p3, const point& p4)
+{
+	point v12 = p1 - p2;	// vector from p2 to p1
+	point v43 = p4 - p3;	// vector from p3 to p4
+	
+	point z = p2 - p3;		// vector from p3 to p2
+	
+	point p = CrossProduct(z, v12);
+	point x = CrossProduct(z, v43);
+	point y = CrossProduct(z, x);
+	
+	double u = DotProduct(x, x);
+	double v = DotProduct(y, y);
+	
+	double result = 360;
+	if (u > 0 and v > 0)
+	{
+		u = DotProduct(p, x) / sqrt(u);
+		v = DotProduct(p, y) / sqrt(v);
+		if (u != 0 or v != 0)
+			result = atan2(v, u) * 180 / kPI;
+	}
+	
+	return result;
+}
+
+float CosinusAngle(const point& p1, const point& p2, const point& p3, const point& p4)
+{
+	point v12 = p1 - p2;
+	point v34 = p3 - p4;
+	
+	double result = 0;
+	
+	double x = DotProduct(v12, v12) * DotProduct(v34, v34);
+	if (x > 0)
+		result = DotProduct(v12, v34) / sqrt(x);
+	
+	return result;
+}
+
+// --------------------------------------------------------------------
+
+tuple<double,point> QuaternionToAngleAxis(quaternion q)
+{
+	if (q.R_component_1() > 1)
+		q = Normalize(q);
+
+	// angle:
+	double angle = 2 * acos(q.R_component_1());
+	angle = angle * 180 / kPI;
+
+	// axis:
+	double s = sqrt(1 - q.R_component_1() * q.R_component_1());
+	if (s < 0.001)
+		s = 1;
+	
+	point axis(q.R_component_2() / s, q.R_component_3() / s, q.R_component_4() / s);
+
+	return make_tuple(angle, axis);
+}
+
+point CenterPoints(vector<point>& points)
+{
+	point t;
+	
+	for (point& pt : points)
+	{
+		t.x() += pt.x();
+		t.y() += pt.y();
+		t.z() += pt.z();
+	}
+	
+	t.x() /= points.size();
+	t.y() /= points.size();
+	t.z() /= points.size();
+	
+	for (point& pt : points)
+	{
+		pt.x() -= t.x();
+		pt.y() -= t.y();
+		pt.z() -= t.z();
+	}
+	
+	return t;
+}
+
+point Centroid(vector<point>& points)
+{
+	point result;
+	
+	for (point& pt : points)
+		result += pt;
+	
+	result /= points.size();
+	
+	return result;
+}
+
+double RMSd(const vector<point>& a, const vector<point>& b)
+{
+	double sum = 0;
+	for (uint32 i = 0; i < a.size(); ++i)
+	{
+		valarray<double> d(3);
+		
+		d[0] = b[i].x() - a[i].x();
+		d[1] = b[i].y() - a[i].y();
+		d[2] = b[i].z() - a[i].z();
+
+		d *= d;
+		
+		sum += d.sum();
+	}
+	
+	return sqrt(sum / a.size());
+}
+
+// The next function returns the largest solution for a quartic equation
+// based on Ferrari's algorithm.
+// A depressed quartic is of the form:
+//
+//   x^4 + ax^2 + bx + c = 0
+//
+// (since I'm too lazy to find out a better way, I've implemented the
+//  routine using complex values to avoid nan's as a result of taking
+//  sqrt of a negative number)
+double LargestDepressedQuarticSolution(double a, double b, double c)
+{
+	complex<double> P = - (a * a) / 12 - c;
+	complex<double> Q = - (a * a * a) / 108 + (a * c) / 3 - (b * b) / 8;
+	complex<double> R = - Q / 2.0 + sqrt((Q * Q) / 4.0 + (P * P * P) / 27.0);
+	
+	complex<double> U = pow(R, 1 / 3.0);
+	
+	complex<double> y;
+	if (U == 0.0)
+		y = -5.0 * a / 6.0 + U - pow(Q, 1.0 / 3.0);
+	else
+		y = -5.0 * a / 6.0 + U - P / (3.0 * U);
+
+	complex<double> W = sqrt(a + 2.0 * y);
+	
+	// And to get the final result:
+	// result = (±W + sqrt(-(3 * alpha + 2 * y ± 2 * beta / W))) / 2;
+	// We want the largest result, so:
+
+	valarray<double> t(4);
+
+	t[0] = (( W + sqrt(-(3.0 * a + 2.0 * y + 2.0 * b / W))) / 2.0).real();
+	t[1] = (( W + sqrt(-(3.0 * a + 2.0 * y - 2.0 * b / W))) / 2.0).real();
+	t[2] = ((-W + sqrt(-(3.0 * a + 2.0 * y + 2.0 * b / W))) / 2.0).real();
+	t[3] = ((-W + sqrt(-(3.0 * a + 2.0 * y - 2.0 * b / W))) / 2.0).real();
+
+	return t.max();
+}
+
+//quaternion AlignPoints(const vector<point>& pa, const vector<point>& pb)
+//{
+//	// First calculate M, a 3x3 matrix containing the sums of products of the coordinates of A and B
+//	matrix<double> M(3, 3, 0);
+//
+//	for (uint32 i = 0; i < pa.size(); ++i)
+//	{
+//		const point& a = pa[i];
+//		const point& b = pb[i];
+//		
+//		M(0, 0) += a.x() * b.x();	M(0, 1) += a.x() * b.y();	M(0, 2) += a.x() * b.z();
+//		M(1, 0) += a.y() * b.x();	M(1, 1) += a.y() * b.y();	M(1, 2) += a.y() * b.z();
+//		M(2, 0) += a.z() * b.x();	M(2, 1) += a.z() * b.y();	M(2, 2) += a.z() * b.z();
+//	}
+//	
+//	// Now calculate N, a symmetric 4x4 matrix
+//	symmetric_matrix<double> N(4);
+//	
+//	N(0, 0) =  M(0, 0) + M(1, 1) + M(2, 2);
+//	N(0, 1) =  M(1, 2) - M(2, 1);
+//	N(0, 2) =  M(2, 0) - M(0, 2);
+//	N(0, 3) =  M(0, 1) - M(1, 0);
+//	
+//	N(1, 1) =  M(0, 0) - M(1, 1) - M(2, 2);
+//	N(1, 2) =  M(0, 1) + M(1, 0);
+//	N(1, 3) =  M(0, 2) + M(2, 0);
+//	
+//	N(2, 2) = -M(0, 0) + M(1, 1) - M(2, 2);
+//	N(2, 3) =  M(1, 2) + M(2, 1);
+//	
+//	N(3, 3) = -M(0, 0) - M(1, 1) + M(2, 2);
+//
+//	// det(N - λI) = 0
+//	// find the largest λ (λm)
+//	//
+//	// Aλ4 + Bλ3 + Cλ2 + Dλ + E = 0
+//	// A = 1
+//	// B = 0
+//	// and so this is a so-called depressed quartic
+//	// solve it using Ferrari's algorithm
+//	
+//	double C = -2 * (
+//		M(0, 0) * M(0, 0) + M(0, 1) * M(0, 1) + M(0, 2) * M(0, 2) +
+//		M(1, 0) * M(1, 0) + M(1, 1) * M(1, 1) + M(1, 2) * M(1, 2) +
+//		M(2, 0) * M(2, 0) + M(2, 1) * M(2, 1) + M(2, 2) * M(2, 2));
+//	
+//	double D = 8 * (M(0, 0) * M(1, 2) * M(2, 1) +
+//					M(1, 1) * M(2, 0) * M(0, 2) +
+//					M(2, 2) * M(0, 1) * M(1, 0)) -
+//			   8 * (M(0, 0) * M(1, 1) * M(2, 2) +
+//					M(1, 2) * M(2, 0) * M(0, 1) +
+//					M(2, 1) * M(1, 0) * M(0, 2));
+//	
+//	double E = 
+//		(N(0,0) * N(1,1) - N(0,1) * N(0,1)) * (N(2,2) * N(3,3) - N(2,3) * N(2,3)) +
+//		(N(0,1) * N(0,2) - N(0,0) * N(2,1)) * (N(2,1) * N(3,3) - N(2,3) * N(1,3)) +
+//		(N(0,0) * N(1,3) - N(0,1) * N(0,3)) * (N(2,1) * N(2,3) - N(2,2) * N(1,3)) +
+//		(N(0,1) * N(2,1) - N(1,1) * N(0,2)) * (N(0,2) * N(3,3) - N(2,3) * N(0,3)) +
+//		(N(1,1) * N(0,3) - N(0,1) * N(1,3)) * (N(0,2) * N(2,3) - N(2,2) * N(0,3)) +
+//		(N(0,2) * N(1,3) - N(2,1) * N(0,3)) * (N(0,2) * N(1,3) - N(2,1) * N(0,3));
+//	
+//	// solve quartic
+//	double lm = LargestDepressedQuarticSolution(C, D, E);
+//	
+//	// calculate t = (N - λI)
+//	matrix<double> li = identity_matrix<double>(4) * lm;
+//	matrix<double> t = N - li;
+//	
+//	// calculate a matrix of cofactors for t
+//	matrix<double> cf(4, 4);
+//
+//	const uint32 ixs[4][3] =
+//	{
+//		{ 1, 2, 3 },
+//		{ 0, 2, 3 },
+//		{ 0, 1, 3 },
+//		{ 0, 1, 2 }
+//	};
+//
+//	uint32 maxR = 0;
+//	for (uint32 r = 0; r < 4; ++r)
+//	{
+//		const uint32* ir = ixs[r];
+//		
+//		for (uint32 c = 0; c < 4; ++c)
+//		{
+//			const uint32* ic = ixs[c];
+//
+//			cf(r, c) =
+//				t(ir[0], ic[0]) * t(ir[1], ic[1]) * t(ir[2], ic[2]) +
+//				t(ir[0], ic[1]) * t(ir[1], ic[2]) * t(ir[2], ic[0]) +
+//				t(ir[0], ic[2]) * t(ir[1], ic[0]) * t(ir[2], ic[1]) -
+//				t(ir[0], ic[2]) * t(ir[1], ic[1]) * t(ir[2], ic[0]) -
+//				t(ir[0], ic[1]) * t(ir[1], ic[0]) * t(ir[2], ic[2]) -
+//				t(ir[0], ic[0]) * t(ir[1], ic[2]) * t(ir[2], ic[1]);
+//		}
+//		
+//		if (r > maxR and cf(r, 0) > cf(maxR, 0))
+//			maxR = r;
+//	}
+//	
+//	// NOTE the negation of the y here, why? Maybe I swapped r/c above?
+//	quaternion q(cf(maxR, 0), cf(maxR, 1), -cf(maxR, 2), cf(maxR, 3));
+//	q = Normalize(q);
+//	
+//	return q;
+//}
+
+}
--- a/src/structure.cpp
+++ b/src/structure.cpp
+// Lib for working with structures as contained in file and PDB files
+
+#include "libcif/structure.h"
+
+#include <boost/algorithm/string.hpp>
+#include <boost/filesystem/fstream.hpp>
+#include <boost/iostreams/filtering_stream.hpp>
+#include <boost/iostreams/filter/bzip2.hpp>
+#include <boost/iostreams/filter/gzip.hpp>
+
+#include "pdb2cif.h"
+#include "libcif/cif-parser.h"
+#include "cif2pdb.h"
+
+using namespace std;
+
+namespace ba = boost::algorithm;
+namespace fs = boost::filesystem;
+namespace io = boost::iostreams;
+
+extern int VERBOSE;
+
+namespace libcif
+{
+	
+// --------------------------------------------------------------------
+// file_impl
+	
+struct file_impl
+{
+	cif::file			m_data;
+	cif::datablock*		m_db = nullptr;
+	
+	void load(fs::path p);
+	void save(fs::path p);
+};
+
+void file_impl::load(fs::path p)
+{
+	fs::ifstream infile(p, ios_base::in | ios_base::binary);
+	if (not infile.is_open())
+		throw runtime_error("No such file: " + p.string());
+	
+	io::filtering_stream<io::input> in;
+	string ext;
+	
+	if (p.extension() == ".bz2")
+	{
+		in.push(io::bzip2_decompressor());
+		ext = p.stem().extension().string();
+	}
+	else if (p.extension() == ".gz")
+	{
+		in.push(io::gzip_decompressor());
+		ext = p.stem().extension().string();
+	}
+	
+	in.push(infile);
+
+	// OK, we've got the file, now create a protein
+	if (ext == ".cif")
+		m_data.load(in);
+	else if (ext == ".pdb" or ext == ".ent")
+		ReadPDBFile(in, m_data);
+	else
+	{
+		try
+		{
+			if (VERBOSE)
+				cerr << "unrecognized file extension, trying cif" << endl;
+
+			m_data.load(in);
+		}
+		catch (const cif::cif_parser_error& e)
+		{
+			if (VERBOSE)
+				cerr << "Not cif, trying plain old PDB" << endl;
+
+			// pffft...
+			in.reset();
+
+			if (infile.is_open())
+				infile.seekg(0);
+			else
+				infile.open(p, ios_base::in | ios::binary);
+
+			if (p.extension() == ".bz2")
+				in.push(io::bzip2_decompressor());
+			else if (p.extension() == ".gz")
+				in.push(io::gzip_decompressor());
+			
+			in.push(infile);
+
+			ReadPDBFile(in, m_data);
+		}
+	}
+	
+	// Yes, we've parsed the data. Now locate the datablock.
+	m_db = &m_data.first_datablock();
+	
+	// And validate, otherwise lots of functionality won't work
+//	if (m_data.get_validator() == nullptr)
+		m_data.load_dictionary("mmcif_pdbx");
+	m_data.validate();
+}
+
+void file_impl::save(fs::path p)
+{
+	fs::ofstream outfile(p, ios_base::out | ios_base::binary);
+	io::filtering_stream<io::output> out;
+	
+	if (p.extension() == ".gz")
+	{
+		out.push(io::gzip_compressor());
+		p = p.stem();
+	}
+	else if (p.extension() == ".bz2")
+	{
+		out.push(io::bzip2_compressor());
+		p = p.stem();
+	}
+	
+	out.push(outfile);
+	
+	if (p.extension() == ".pdb")
+		WritePDBFile(out, m_data);
+	else
+		m_data.save(out);
+}
+
+
+// --------------------------------------------------------------------
+// atom
+
+struct atom_impl
+{
+	atom_impl(const file& f, const string& id)
+		: m_file(f), m_id(id), m_refcount(1), m_compound(nullptr)
+	{
+		auto& db = *m_file.impl().m_db;
+		auto& cat = db["atom_site"];
+		
+		m_row = cat[cif::key("id") == m_id];
+
+		prefetch();
+	}
+	
+	atom_impl(const file& f, const string& id, cif::row row)
+		: m_file(f), m_id(id), m_refcount(1), m_row(row), m_compound(nullptr)
+	{
+		prefetch();
+	}
+	
+	void prefetch()
+	{
+		// Prefetch some data
+		string symbol;
+		cif::tie(symbol) = m_row.get("type_symbol");
+		
+		m_type = atom_type_traits(symbol).type();
+
+		float x, y, z;
+		cif::tie(x, y, z) = m_row.get("Cartn_x", "Cartn_y", "Cartn_z");
+		
+		m_location = point(x, y, z);
+		
+		try
+		{
+			comp();
+		}
+		catch (...) {}
+	}
+
+	void reference()
+	{
+		++m_refcount;
+	}
+	
+	void release()
+	{
+		if (--m_refcount < 0)
+			delete this;
+	}
+	
+	const compound& comp()
+	{
+		if (m_compound == nullptr)
+		{
+			string comp_id;
+			cif::tie(comp_id) = m_row.get("label_comp_id");
+			
+			m_compound = compound::create(comp_id);
+		}
+		
+		if (m_compound == nullptr)
+			throw runtime_error("no compound");
+	
+		return *m_compound;
+	}
+	
+	bool is_water() const
+	{
+		return m_compound != nullptr and m_compound->is_water();
+	}
+
+	const file&			m_file;
+	string				m_id;
+	int					m_refcount;
+	cif::row			m_row;
+	const compound*		m_compound;
+	point				m_location;
+	atom_type			m_type;
+	
+//	const entity&		m_entity;
+//	std::string			m_asym_id;
+//	std::string			m_atom_id;
+//	point				m_loc;
+//	property_list		m_properties;
+};
+
+atom::atom(const file& f, const string& id)
+	: m_impl(new atom_impl(f, id))
+{
+}
+
+atom::atom(atom_impl* impl)
+	: m_impl(impl)
+{
+}
+
+atom::atom(const atom& rhs)
+	: m_impl(rhs.m_impl)
+{
+	m_impl->reference();
+}
+
+atom::~atom()
+{
+	if (m_impl)
+		m_impl->release();
+}
+
+atom& atom::operator=(const atom& rhs)
+{
+	if (this != &rhs)
+	{
+		m_impl->release();
+		m_impl = rhs.m_impl;
+		m_impl->reference();
+	}
+
+	return *this;
+}
+
+string atom::id() const
+{
+	return m_impl->m_id;
+}
+
+atom_type atom::type() const
+{
+	return m_impl->m_type;
+}
+
+int atom::charge() const
+{
+	int charge;
+	cif::tie(charge) = m_impl->m_row.get("pdbx_formal_charge");
+	
+	return charge;
+}
+
+string atom::label_atom_id() const
+{
+	string atom_id;
+	cif::tie(atom_id) = m_impl->m_row.get("label_atom_id");
+	
+	return atom_id;
+}
+
+string atom::label_comp_id() const
+{
+	string comp_id;
+	cif::tie(comp_id) = m_impl->m_row.get("label_comp_id");
+	
+	return comp_id;
+}
+
+string atom::label_asym_id() const
+{
+	string asym_id;
+	cif::tie(asym_id) = m_impl->m_row.get("label_asym_id");
+	
+	return asym_id;
+}
+
+int atom::label_seq_id() const
+{
+	int seq_id;
+	cif::tie(seq_id) = m_impl->m_row.get("label_seq_id");
+	
+	return seq_id;
+}
+
+string atom::auth_asym_id() const
+{
+	string asym_id;
+	cif::tie(asym_id) = m_impl->m_row.get("auth_asym_id");
+	
+	return asym_id;
+}
+
+int atom::auth_seq_id() const
+{
+	int seq_id;
+	cif::tie(seq_id) = m_impl->m_row.get("auth_seq_id");
+	
+	return seq_id;
+}
+
+point atom::location() const
+{
+	return m_impl->m_location;
+}
+
+const compound& atom::comp() const
+{
+	return m_impl->comp();
+}
+
+bool atom::is_water() const
+{
+	return m_impl->is_water();
+}
+
+boost::any atom::property(const std::string& name) const
+{
+	string s = m_impl->m_row[name].as<string>();
+	
+	return boost::any(s);
+}
+
+bool atom::operator==(const atom& rhs) const
+{
+	return m_impl == rhs.m_impl or
+		(&m_impl->m_file == &rhs.m_impl->m_file and m_impl->m_id == rhs.m_impl->m_id); 	
+}
+
+const file& atom::get_file() const
+{
+	assert(m_impl);
+	return m_impl->m_file;
+}
+
+// --------------------------------------------------------------------
+// residue
+
+//atom_view residue::atoms()
+//{
+//	assert(false);
+//}
+
+// --------------------------------------------------------------------
+// monomer
+
+// --------------------------------------------------------------------
+// polymer
+
+// --------------------------------------------------------------------
+// file
+
+file::file()
+	: m_impl(new file_impl)
+{
+}
+
+file::file(fs::path file)
+	: m_impl(new file_impl)
+{
+	load(file);
+}
+
+file::~file()
+{
+	delete m_impl;
+}
+
+void file::load(fs::path p)
+{
+	m_impl->load(p);
+	
+//	// all data is now in m_file, construct atoms and others
+//	
+//	auto& db = m_file.first_datablock();
+//	
+//	// the entities
+//	
+//	struct entity
+//	{
+//		string				id;
+//		string				type;
+//	};
+//	vector<entity> entities;
+//	
+//	for (auto& _e: db["entity"])
+//	{
+//		string type = _e["type"];
+//		ba::to_lower(type);
+//		entities.push_back({ _e["id"], type });
+//	}
+//
+//	auto& atom_sites = db["atom_site"];
+//	for (auto& atom_site: atom_sites)
+//	{
+//		atom_ptr ap(new atom(this, atom_site));
+//
+//		string entity_id = atom_site["entity_id"];
+//		
+//		auto e = find_if(entities.begin(), entities.end(), [=](entity& e) -> bool { return e.id == entity_id; });
+//		if (e == entities.end())
+//			throw runtime_error("Entity " + entity_id + " is not defined");
+//
+//		string comp_id, asym_id, seq_id;
+//		cif::tie(comp_id, seq_id) = atom_site.get("label_comp_id", "label_asym_id", "label_seq_id");
+//
+//		auto r = find_if(m_residues.begin(), m_residues.end(), [=](residue_ptr& res) -> bool
+//		{
+////			return res.entities
+//			return false;
+//		});
+//		
+//		if (e->type == "water")
+//			;
+//		else if (e->type == "polymer")
+//			;
+//		else	
+//			;
+//		
+//		m_atoms.push_back(ap);
+//	}
+	
+}
+
+void file::save(boost::filesystem::path file)
+{
+	m_impl->save(file);
+}
+
+cif::datablock& file::data()
+{
+	assert(m_impl);
+	assert(m_impl->m_db);
+	
+	if (m_impl == nullptr or m_impl->m_db == nullptr)
+		throw runtime_error("No data loaded");
+	
+	return *m_impl->m_db;
+}
+
+// --------------------------------------------------------------------
+//	structure
+
+struct structure_impl
+{
+	structure_impl(structure& s, file& f, uint32 model_nr)
+		: m_file(&f), m_model_nr(model_nr)
+	{
+		auto& db = *m_file->impl().m_db;
+		auto& atom_cat = db["atom_site"];
+		
+		for (auto& a: atom_cat)
+		{
+			auto model_nr = a["pdbx_PDB_model_num"];
+			
+			if (model_nr.empty() or model_nr.as<uint32>() == m_model_nr)
+				m_atoms.emplace_back(new atom_impl(f, a["id"].as<string>(), a));
+		}
+	}
+	
+	void remove_atom(atom& a);
+	
+	file*			m_file;
+	uint32			m_model_nr;
+	atom_view		m_atoms;
+};
+
+void structure_impl::remove_atom(atom& a)
+{
+	cif::datablock& db = *m_file->impl().m_db;
+	
+	auto& atom_sites = db["atom_site"];
+	
+	for (auto i = atom_sites.begin(); i != atom_sites.end(); ++i)
+	{
+		string id;
+		cif::tie(id) = i->get("id");
+		
+		if (id == a.id())
+		{
+			atom_sites.erase(i);
+			break;
+		}
+	}
+	
+	m_atoms.erase(remove(m_atoms.begin(), m_atoms.end(), a), m_atoms.end());
+}
+
+structure::structure(file& f, uint32 model_nr)
+	: m_impl(new structure_impl(*this, f, model_nr))
+{
+}
+
+structure::~structure()
+{
+	delete m_impl;
+}
+
+atom_view structure::atoms() const
+{
+	return m_impl->m_atoms;
+}
+
+atom_view structure::waters() const
+{
+	atom_view result;
+	
+	auto& db = *get_file().impl().m_db;
+	
+	// Get the entity id for water
+	auto& entity_cat = db["entity"];
+	string water_entity_id;
+	for (auto& e: entity_cat)
+	{
+		string id, type;
+		cif::tie(id, type) = e.get("id", "type");
+		if (ba::iequals(type, "water"))
+		{
+			water_entity_id = id;
+			break;
+		}
+	}
+
+	for (auto& a: m_impl->m_atoms)
+	{
+		if (boost::any_cast<string>(a.property("label_entity_id")) == water_entity_id)
+			result.push_back(a);
+	}
+	
+	return result;
+}
+
+atom structure::get_atom_by_id(string id) const
+{
+	for (auto& a: m_impl->m_atoms)
+	{
+		if (a.id() == id)
+			return a;
+	}
+	
+	throw out_of_range("Could not find atom with id " + id);
+}
+
+file& structure::get_file() const
+{
+	return *m_impl->m_file;
+}
+
+//tuple<string,string> structure::MapLabelToAuth(
+//	const string& asym_id, int seq_id)
+//{
+//	auto& db = *get_file().impl().m_db;
+//	
+//	tuple<string,int,string,string> result;
+//	bool found = false;
+//	
+//	for (auto r: db["pdbx_poly_seq_scheme"].find(
+//						cif::key("asym_id") == asym_id and
+//						cif::key("seq_id") == seq_id))
+//	{
+//		string auth_asym_id, pdb_mon_id, pdb_ins_code;
+//		int pdb_seq_num;
+//		
+//		cif::tie(pdb_strand_id, pdb_seq_num, pdb_mon_id, pdb_ins_code) =
+//			r.get("pdb_strand_id", "pdb_seq_num", "pdb_mon_id", "pdb_ins_code");
+//
+//		result = make_tuple(pdb_strand_id, pdb_seq_num, pdb_mon_id, pdb_ins_code);
+//
+//		found = true;
+//		break;
+//	}
+//						
+//	for (auto r: db["pdbx_nonpoly_scheme"].find(
+//						cif::key("asym_id") == asym_id and
+//						cif::key("seq_id") == seq_id and
+//						cif::key("mon_id") == mon_id))
+//	{
+//		string pdb_strand_id, pdb_mon_id, pdb_ins_code;
+//		int pdb_seq_num;
+//		
+//		cif::tie(pdb_strand_id, pdb_seq_num, pdb_mon_id, pdb_ins_code) =
+//			r.get("pdb_strand_id", "pdb_seq_num", "pdb_mon_id", "pdb_ins_code");
+//
+//		result = make_tuple(pdb_strand_id, pdb_seq_num, pdb_mon_id, pdb_ins_code);
+//
+//		found = true;
+//		break;
+//	}
+//
+//	return result;
+//}
+
+tuple<string,int,string,string> structure::MapLabelToPDB(
+	const string& asym_id, int seq_id, const string& mon_id)
+{
+	auto& db = *get_file().impl().m_db;
+	
+	tuple<string,int,string,string> result;
+	
+	for (auto r: db["pdbx_poly_seq_scheme"].find(
+						cif::key("asym_id") == asym_id and
+						cif::key("seq_id") == seq_id and
+						cif::key("mon_id") == mon_id))
+	{
+		string pdb_strand_id, pdb_mon_id, pdb_ins_code;
+		int pdb_seq_num;
+		
+		cif::tie(pdb_strand_id, pdb_seq_num, pdb_mon_id, pdb_ins_code) =
+			r.get("pdb_strand_id", "pdb_seq_num", "pdb_mon_id", "pdb_ins_code");
+
+		result = make_tuple(pdb_strand_id, pdb_seq_num, pdb_mon_id, pdb_ins_code);
+
+		break;
+	}
+						
+	for (auto r: db["pdbx_nonpoly_scheme"].find(
+						cif::key("asym_id") == asym_id and
+						cif::key("seq_id") == seq_id and
+						cif::key("mon_id") == mon_id))
+	{
+		string pdb_strand_id, pdb_mon_id, pdb_ins_code;
+		int pdb_seq_num;
+		
+		cif::tie(pdb_strand_id, pdb_seq_num, pdb_mon_id, pdb_ins_code) =
+			r.get("pdb_strand_id", "pdb_seq_num", "pdb_mon_id", "pdb_ins_code");
+
+		result = make_tuple(pdb_strand_id, pdb_seq_num, pdb_mon_id, pdb_ins_code);
+
+		break;
+	}
+
+	return result;
+}
+
+// --------------------------------------------------------------------
+// actions
+
+void structure::remove_atom(atom& a)
+{
+	m_impl->remove_atom(a);
+}
+
+}