refactored main using nested exceptions, fixed PDB parser a bit

git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@180 a1961a4f-ab94-4bcc-80e8-33b5a54de466

refactored main using nested exceptions, fixed PDB parser a bit
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@180 a1961a4f-ab94-4bcc-80e8-33b5a54de466
c5d277fb · maarten · d0b7e21c · c5d277fb · c5d277fb · c5d277fb
Commit c5d277fb authored Dec 06, 2017 by maarten
Hide whitespace changes
Inline Side-by-side

Showing with 2985 additions and 890 deletions

include/cif++/TlsParser.h
+55 -0

src/Cif++.cpp
+2 -2

src/Compound.cpp
+2 -1

src/PDB2Cif.cpp
+1009 -886

src/PDB2CifRemark3.cpp
+4 -1

src/TlsParser.cpp
+1913 -0

No files found.
--- a/include/cif++/TlsParser.h
+++ b/include/cif++/TlsParser.h
+/* 
+   Created by: Maarten L. Hekkelman
+   Date: dinsdag 07 november, 2017
+
+	Copyright 2017 NKI AVL
+	
+	Permission is hereby granted, free of charge, to any person obtaining 
+	a copy of this software and associated documentation files (the 
+	"Software"), to deal in the Software without restriction, including 
+	without limitation the rights to use, copy, modify, merge, publish, 
+	distribute, sublicense, and/or sell copies of the Software, and to 
+	permit persons to whom the Software is furnished to do so, subject to 
+	the following conditions:
+	
+	The above copyright notice and this permission notice shall be 
+	included in all copies or substantial portions of the Software.
+	
+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+	EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
+	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
+	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
+	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
+	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include <vector>
+#include <string>
+#include <tuple>
+
+#include "cif++/Cif++.h"
+
+namespace cif
+{
+	
+extern const int
+	kResidueNrWildcard,
+	kNoSeqNum;
+
+struct TLSSelection;
+typedef std::unique_ptr<TLSSelection> TLSSelectionPtr;
+
+struct TLSResidue;
+
+struct TLSSelection
+{
+	virtual ~TLSSelection() {}
+	virtual void CollectResidues(cif::Datablock& db, std::vector<TLSResidue>& residues, int indentLevel = 0) const = 0;
+	std::vector<std::tuple<std::string,int,int>> GetRanges(cif::Datablock& db, bool pdbNamespace) const;
+};
+
+// Low level: get the selections
+TLSSelectionPtr ParseSelectionDetails(const std::string& program, const std::string& selection);
+
+}
--- a/src/Cif++.cpp
+++ b/src/Cif++.cpp
@@ -1773,7 +1773,7 @@ Row& Row::operator=(const Row& rhs)
 void Row::assign(const string& name, const string& value, bool emplacing)
 {
 	if (mData == nullptr)
-		throw logic_error("invalid Row, no data");
+		throw logic_error("invalid Row, no data assigning value '" + value + "' to " + name);
 	
 	auto cat = mData->mCategory;
 	auto cix = cat->addColumn(name);
@@ -1905,7 +1905,7 @@ bool Row::empty() const

 auto Row::begin() const -> const_iterator
 {
-	return const_iterator(mData, mData->mValues);
+	return const_iterator(mData, mData ? mData->mValues : nullptr);
 }

 auto Row::end() const -> const_iterator

--- a/src/Compound.cpp
+++ b/src/Compound.cpp
@@ -264,7 +264,8 @@ const Compound* CompoundFactory::create(std::string id)
 											value = 1.5;
 				else
 				{
-					cerr << "Unimplemented chem_comp_bond.type " << type << " in file " << resFile << endl;
+					if (VERBOSE)
+						cerr << "Unimplemented chem_comp_bond.type " << type << " in file " << resFile << endl;
 					value = 1.0;
 				}
 				

--- a/src/PDB2Cif.cpp
+++ b/src/PDB2Cif.cpp
@@ -9,6 +9,8 @@
 #include <boost/format.hpp>
 #include <boost/numeric/ublas/matrix.hpp>

+#include <clipper/core/spacegroup.h>
+
 #include "cif++/PDB2Cif.h"
 #include "cif++/AtomType.h"
 #include "cif++/Compound.h"
@@ -31,7 +33,8 @@ namespace error
 {
 	enum pdbErrors
 	{
-		residueNotFound	= 1000
+		residueNotFound	= 1000,
+		invalidDate
 	};
 	
 	namespace detail
@@ -52,6 +55,9 @@ namespace error
 					case residueNotFound:
 						return "Residue not found";
 					
+					case invalidDate:
+						return "Invalid date";
+					
 					default:
 						return "Error in PDB format";
 				}
@@ -123,7 +129,6 @@ PDBRecord::PDBRecord(uint32 lineNr, const string& name, const string& value)

 PDBRecord::~PDBRecord()
 {
-	delete mNext;
 }

 void* PDBRecord::operator new(size_t size, size_t vLen)
@@ -373,7 +378,13 @@ class PDBFileParser
 	
 	~PDBFileParser()
 	{
-		delete mData;
+		PDBRecord* r = mData;
+		while (r != nullptr)
+		{
+			PDBRecord* d = r;
+			r = d->mNext;
+			delete d;
+		}	
 	}
 	
 	void Parse(istream& is, cif::File& result);
@@ -489,8 +500,8 @@ class PDBFileParser
 			int					mSeqNum;
 			char				mIcode;
 			
-			bool operator==(const AtomRes& rhs) const		{ return mMonId == rhs.mMonId and mSeqNum == rhs.mSeqNum and mIcode == rhs.mIcode; }
-			bool operator!=(const AtomRes& rhs) const		{ return mMonId != rhs.mMonId or mSeqNum != rhs.mSeqNum or mIcode != rhs.mIcode; }
+			bool operator==(const AtomRes& rhs) const		{ return mSeqNum == rhs.mSeqNum and mIcode == rhs.mIcode; }
+			bool operator!=(const AtomRes& rhs) const		{ return mSeqNum != rhs.mSeqNum or mIcode != rhs.mIcode; }
 		};
 		vector<AtomRes>			mResiduesSeen;
 		
@@ -593,16 +604,7 @@ class PDBFileParser

 	int vI(int columnFirst, int columnLast) const
 	{
-		int result = 0;
-		try
-		{
-			result = mRec->vI(columnFirst, columnLast);
-		}
-		catch (const exception& ex)
-		{
-			Error(ex.what());
-		}
-		return result;
+		return mRec->vI(columnFirst, columnLast);
 	}

 	// ----------------------------------------------------------------
@@ -643,14 +645,14 @@ class PDBFileParser

 	void GetNextRecord();
 	void Match(const string& expected);
-	void Error(const string& msg) const
-	{
-		string lineNr;
-		if (mRec != nullptr)
-			lineNr = " (at line " + to_string(mRec->mLineNr) + ')';
-		
-		throw runtime_error("Error parsing PDB file" + lineNr + ": " + msg);
-	}
+//	void Error(const string& msg) const
+//	{
+//		string lineNr;
+//		if (mRec != nullptr)
+//			lineNr = " (at line " + to_string(mRec->mLineNr) + ')';
+//		
+//		throw runtime_error("Error parsing PDB file" + lineNr + ": " + msg);
+//	}

 	void ParseTitle();
 	void ParseCitation(const string& id);
@@ -686,7 +688,7 @@ class PDBFileParser
 	
 	vector<string> SplitCSV(const string& value);

-	string pdb2cifDate(string s)
+	string pdb2cifDate(string s, boost::system::error_code& ec)
 	{
 		smatch m;
 		const regex
@@ -700,7 +702,7 @@ class PDBFileParser
 			int day = stoi(m[1].str());
 			auto mi = kMonths.find(m[2].str());
 			if (mi == kMonths.end())
-				Error("Invalid month");
+				throw runtime_error("Invalid month: '" + m[2].str() + '\'');
 			int month = mi->second;
 			int year = 1900 + stoi(m[3].str());
 			if (year < 1950)
@@ -714,7 +716,7 @@ class PDBFileParser
 		{
 			auto mi = kMonths.find(m[1].str());
 			if (mi == kMonths.end())
-				Error("Invalid month");
+				throw runtime_error("Invalid month: '" + m[1].str() + '\'');
 			int month = mi->second;
 			int year = 1900 + stoi(m[2].str());
 			if (year < 1950)
@@ -722,10 +724,19 @@ class PDBFileParser
 		
 			s = (boost::format("%04d-%02d") % year % month).str();
 		}
+		else
+			ec = error::make_error_code(error::pdbErrors::invalidDate);
 		
 		return s;
 	}
 	
+	string pdb2cifDate(string s)
+	{
+		boost::system::error_code ec;		
+		pdb2cifDate(s, ec);
+		return s;
+	}
+	
 	string pdb2cifAuth(string author)
 	{
 		ba::trim(author);
@@ -925,7 +936,16 @@ void PDBFileParser::PreParseInput(istream& is)
 	getline(is, lookahead);

 	if (ba::starts_with(lookahead, "HEADER") == false)
-		Error("This does not look like a PDB file, should start with a HEADER line");
+		throw runtime_error("This does not look like a PDB file, should start with a HEADER line");
+
+	auto contNr = [&lookahead](int offset, int len) -> int
+	{
+		string cs = lookahead.substr(offset, len);
+		ba::trim(cs);
+		
+		int result = cs.empty() ? 0 : stoi(cs);
+		return result;
+	};

 	PDBRecord* last = nullptr;
 	set<string> dropped;
@@ -964,7 +984,7 @@ void PDBFileParser::PreParseInput(istream& is)
 			type == "TITLE ")
 		{
 			int n = 2;
-			while (lookahead.substr(0, 6) == type and stoi(lookahead.substr(7, 3)) == n)
+			while (lookahead.substr(0, 6) == type and contNr(7, 3) == n)
 			{
 				value += ba::trim_right_copy(lookahead.substr(10));
 				getline(is, lookahead);
@@ -976,7 +996,7 @@ void PDBFileParser::PreParseInput(istream& is)
 		{
 			int n = 2;
 			value += '\n';
-			while (lookahead.substr(0, 6) == type and stoi(lookahead.substr(7, 3)) == n)
+			while (lookahead.substr(0, 6) == type and contNr(7, 3) == n)
 			{
 				value += ba::trim_right_copy(lookahead.substr(10));
 				value += '\n';
@@ -991,7 +1011,7 @@ void PDBFileParser::PreParseInput(istream& is)
 			int n = 2;
 			while (lookahead.substr(0, 6) == type and
 				stoi(lookahead.substr(7, 3)) == revNr and
-				stoi(lookahead.substr(10, 2)) == n)
+				contNr(10, 2) == n)
 			{
 				value += lookahead.substr(38);
 				getline(is, lookahead);
@@ -1002,7 +1022,7 @@ void PDBFileParser::PreParseInput(istream& is)
 		else if (type == "CAVEAT")
 		{
 			int n = 2;
-			while (lookahead.substr(0, 6) == type and stoi(lookahead.substr(7, 3)) == n)
+			while (lookahead.substr(0, 6) == type and contNr(7, 3) == n)
 			{
 				value += ba::trim_right_copy(lookahead.substr(13));
 				getline(is, lookahead);
@@ -1023,7 +1043,7 @@ void PDBFileParser::PreParseInput(istream& is)
 		{
 			value += '\n';
 			int n = 2;
-			while (lookahead.substr(0, 6) == type and stoi(lookahead.substr(7, 3)) == n)
+			while (lookahead.substr(0, 6) == type and contNr(7, 3) == n)
 			{
 				value += ba::trim_copy(lookahead.substr(10));
 				value += '\n';
@@ -1038,8 +1058,7 @@ void PDBFileParser::PreParseInput(istream& is)
 			int n = 2;
 			while (lookahead.substr(0, 6) == type and
 				stoi(lookahead.substr(7, 3)) == compNr and
-				lookahead.substr(16, 2) != "  " and
-				stoi(lookahead.substr(16, 2)) == n)
+				contNr(16, 2) == n)
 			{
 				value += lookahead.substr(19);
 				getline(is, lookahead);
@@ -1051,9 +1070,7 @@ void PDBFileParser::PreParseInput(istream& is)
 				 type == "HETSYN")
 		{
 			int n = 2;
-			while (lookahead.substr(0, 6) == type and
-				lookahead.substr(8, 2) != "  " and
-				stoi(lookahead.substr(8, 2)) == n)
+			while (lookahead.substr(0, 6) == type and contNr(8, 2) == n)
 			{
 				value += lookahead.substr(16);
 				getline(is, lookahead);
@@ -1116,6 +1133,9 @@ void PDBFileParser::PreParseInput(istream& is)
 			last->mNext = cur;

 		last = cur;
+
+		if (type == "END   ")
+			break;
 	}
 	
 	if (not dropped.empty())
@@ -1141,7 +1161,7 @@ void PDBFileParser::GetNextRecord()
 void PDBFileParser::Match(const string& expected)
 {
 	if (mRec->mName != expected)
-		Error("Expected record " + expected + " but found " + mRec->mName);
+		throw runtime_error("At line " + to_string(mRec->mLineNr) + ": expected record " + expected + " but found " + mRec->mName);
 }

 vector<string> PDBFileParser::SplitCSV(const string& value)
@@ -1224,17 +1244,20 @@ void PDBFileParser::ParseTitle()
 		//	 1 -  6        Record  name  "SPLIT "
 		//	 9 - 10        Continuation  continuation  Allows concatenation of multiple records.
 		//	12 - 15        IDcode        idCode        ID code of related datablock.
-		if (VERBOSE)	
-			Error("skipping unimplemented SPLIT record");
-		GetNextRecord();
+		
+		throw runtime_error("SPLIT PDB files are not supported");
+		
+//		if (VERBOSE)	
+//			Error("skipping unimplemented SPLIT record");
+//		GetNextRecord();
 	}
 	
 	// CAVEAT
-	if (mRec->is("CAVEAT"))							//	  1 - 6       Record name   "CAVEAT"
-	{													//	 9 - 10       Continuation  continuation   Allows concatenation of multiple records.
+	int caveatID = 1;
+	while (mRec->is("CAVEAT"))							//	  1 - 6       Record name   "CAVEAT"
+	{
 		getCategory("database_PDB_caveat")->emplace({
-//			{ "id", vS(12, 15) },						//	12 - 15       IDcode        idCode         PDB ID code of this datablock.                  
-			{ "id", 1 },								//	12 - 15       IDcode        idCode         PDB ID code of this datablock.                  
+			{ "id", caveatID++ },
 			{ "text", string{mRec->vS(20) } }    		//	20 - 79       String        comment        Free text giving the reason for the  CAVEAT.
 		});
 		
@@ -1347,7 +1370,7 @@ void PDBFileParser::ParseTitle()
 			}
 			
 			if (source == nullptr)
-				Error("MOL_ID missing");
+				throw runtime_error("At line " + to_string(mRec->mLineNr) + ": missing MOL_ID in SOURCE");
 			
 			(*source)[key] = value;
 		}
@@ -1380,13 +1403,23 @@ void PDBFileParser::ParseTitle()
 	if (mRec->is("EXPDTA"))
 	{
 		mExpMethod = vS(11);
-		
+
 		cat = getCategory("exptl");
-		cat->emplace({
-			{ "entry_id", mStructureId },
-			{ "method", mExpMethod },
-			{ "crystals_number", mRemark200["NUMBER OF CRYSTALS USED"] }
-		});
+		
+		for (auto si = ba::make_split_iterator(mExpMethod, ba::token_finder(ba::is_any_of(";"), ba::token_compress_on)); not si.eof(); ++si)
+		{
+			string expMethod(si->begin(), si->end());
+			ba::trim(expMethod);
+			
+			if (expMethod.empty())
+				continue;
+			
+			cat->emplace({
+				{ "entry_id", mStructureId },
+				{ "method", expMethod },
+				{ "crystals_number", mRemark200["NUMBER OF CRYSTALS USED"] }
+			});
+		}
 		
 		GetNextRecord();
 	}
@@ -1395,7 +1428,7 @@ void PDBFileParser::ParseTitle()
 	if (mRec->is("NUMMDL"))
 	{
 		if (VERBOSE)	
-			Error("skipping unimplemented NUMMDL record");
+			cerr << "skipping unimplemented NUMMDL record" << endl;
 		GetNextRecord();
 	}
 	
@@ -1624,835 +1657,844 @@ void PDBFileParser::ParseRemarks()
 	{
 		int remarkNr = vI(8, 10);
 		
-		switch (remarkNr)
+		try
 		{
-			case 1:
-				while (mRec->is("REMARK   1"))
-				{
-					if (mRec->mVlen > 15 and vS(12, 20) == "REFERENCE")
-					{
-						string id = vS(22, 70);
-						GetNextRecord();
-						
-						ParseCitation(id);
-					}
-					else
-						GetNextRecord();
-				}
-				break;
-			
-			case 3:
-				// we skip REMARK 3 until we know the mapping
-				while (mRec->is("REMARK   3"))
-					GetNextRecord();
-				break;
-
-			case 4:
-				// who cares...
-				while (mRec->is("REMARK   4"))
-					GetNextRecord();
-				break;
-
-			case 100:
-			{
-				const regex rx(R"(THE (\S+) ID CODE IS (\S+?)\.?\s*)");
-				smatch m;
-				string r = vS(12);
-				
-				if (regex_match(r, m, rx))
-				{
-					auto cat = getCategory("database_2");
-					cat->emplace({
-						{ "database_id", m[1].str() },
-						{ "database_code", m[2].str() }
-					});
-				}
-				
-				GetNextRecord();
-				break;
-			}
-			
-			case 200:
+			switch (remarkNr)
 			{
-				// we already parsed most of this remark, but the "REMARK:" part might have been split.
-
-				bool remark = false;
-				
-				do
-				{
-					string r = mRec->vS(12);
-
-					if (ba::starts_with(r, "REMARK: "))
+				case 1:
+					while (mRec->is("REMARK   1"))
 					{
-						mRemark200["REMARK"] = r.substr(8);
-						remark = true;
-					}
-					else if (remark)
-					{
-						if (r.empty())
-							remark = false;
+						if (mRec->mVlen > 15 and vS(12, 20) == "REFERENCE")
+						{
+							string id = vS(22, 70);
+							GetNextRecord();
+							
+							ParseCitation(id);
+						}
 						else
-							mRemark200["REMARK"] += r;
+							GetNextRecord();
 					}
-					
-					GetNextRecord();
-				}
-				while (mRec->is("REMARK 200"));
-				break;
-			}
-
-			case 280:
-			{
-				string density_Matthews, densityPercentSol, conditions;
-				
-				const regex rx1(R"(SOLVENT CONTENT, VS +\(%\): *(.+))"),
-					rx2(R"(MATTHEWS COEFFICIENT, VM \(ANGSTROMS\*\*3/DA\): *(.+))");
+					break;
 				
-				smatch m;
-
-				do
+				case 3:
+					// we skip REMARK 3 until we know the mapping
+					while (mRec->is("REMARK   3"))
+						GetNextRecord();
+					break;
+	
+				case 4:
+					// who cares...
+					while (mRec->is("REMARK   4"))
+						GetNextRecord();
+					break;
+	
+				case 100:
 				{
+					const regex rx(R"(THE (\S+) ID CODE IS (\S+?)\.?\s*)");
+					smatch m;
 					string r = vS(12);
-
-					if (conditions.empty())
+					
+					if (regex_match(r, m, rx))
 					{
-						if (regex_match(r, m, rx1))
-							densityPercentSol = m[1].str();
-						else if (regex_match(r, m, rx2))
-							density_Matthews = m[1].str();
-						else if (ba::starts_with(r, "CRYSTALLIZATION CONDITIONS: "))
-							conditions = r.substr(28);
+						auto cat = getCategory("database_2");
+						cat->emplace({
+							{ "database_id", m[1].str() },
+							{ "database_code", m[2].str() }
+						});
 					}
-					else
-						conditions = conditions + ' ' + r;
 					
 					GetNextRecord();
+					break;
 				}
-				while (mRec->is("REMARK 280"));
-				
-				string desc = mRemark200["REMARK"];
-				if (desc == "NULL")
-					desc.clear();
-				
-				getCategory("exptl_crystal")->emplace({
-					{ "id", 1 },
-					{ "density_Matthews", iequals(density_Matthews, "NULL") ? "" : density_Matthews  },
-					{ "density_percent_sol", iequals(densityPercentSol, "NULL") ? "" : densityPercentSol },
-					{ "description", desc }
-				});
-				
-				// now try to parse the conditions
-				const regex rx3(R"(TEMPERATURE +(\d+)K)"), rx4(R"(PH *(?:: *)?(\d+(?:\.\d+)?))")/*, rx5(R"(\b(\d+)C\b)")*/;
 				
-				string temp, ph, method;
-
-				for (auto i = make_split_iterator(conditions, ba::token_finder(ba::is_any_of(","), ba::token_compress_on)); not i.eof(); ++i)
+				case 200:
 				{
-					string s(i->begin(), i->end());
-
-					ba::trim(s);
-
-					if (regex_search(s, m, rx3))
-						temp = m[1].str();
-					if (regex_search(s, m, rx4))
-						ph = m[1].str();
-					if (s.length() < 60 and
-						(ba::icontains(s, "drop") or ba::icontains(s, "vapor") or ba::icontains(s, "batch")))
+					// we already parsed most of this remark, but the "REMARK:" part might have been split.
+	
+					bool remark = false;
+					
+					do
 					{
-						if (not method.empty())
-							method = method + ", " + s;
-						else
-							method = s;
+						string r = mRec->vS(12);
+	
+						if (ba::starts_with(r, "REMARK: "))
+						{
+							mRemark200["REMARK"] = r.substr(8);
+							remark = true;
+						}
+						else if (remark)
+						{
+							if (r.empty())
+								remark = false;
+							else
+								mRemark200["REMARK"] += r;
+						}
+						
+						GetNextRecord();
 					}
+					while (mRec->is("REMARK 200"));
+					break;
 				}
-				
-				if (not (method.empty() and temp.empty() and ph.empty() and (conditions.empty() or conditions == "NULL")))
-				{
-					getCategory("exptl_crystal_grow")->emplace({
-						{ "crystal_id", 1 },
-						{ "method", method },
-						{ "temp", temp },
-						{ "pH", ph },
-						{ "pdbx_details", conditions }
-					});
-				}
-
-				break;
-			}
-			
-//			case 290:
-//				
-//				break;
-			
-			case 350:
-				// postponed since we don't have the required information yet
-				for (; mRec->is("REMARK 350"); GetNextRecord())
-					;
-				break;
-			
-			case 400:
-			{
-				stringstream s;
-				GetNextRecord();
-				if (vS(12) == "COMPOUND")
-					GetNextRecord();
-				
-				while (mRec->is("REMARK 400"))
-				{
-					s << vS(12) << endl;
-					GetNextRecord();
-				}
-				
-				compoundDetails = s.str();
-				break;
-			}
-			
-			case 450:
-			{
-				stringstream s;
-				GetNextRecord();
-				if (vS(12) == "SOURCE")
-					GetNextRecord();
-				
-				while (mRec->is("REMARK 450"))
-				{
-					s << vS(12) << endl;
-					GetNextRecord();
-				}
-				
-				sourceDetails = s.str();
-				break;
-			}
-			
-			case 465:
-			{
-				bool headerSeen = false;
-				regex rx(R"( *MODELS *(\d+)-(\d+))");
-				int models[2] = { -1, -1 };
-				
-				for (; mRec->is("REMARK 465"); GetNextRecord())
+	
+				case 280:
 				{
-					if (not headerSeen)
+					string density_Matthews, densityPercentSol, conditions;
+					
+					const regex rx1(R"(SOLVENT CONTENT, VS +\(%\): *(.+))"),
+						rx2(R"(MATTHEWS COEFFICIENT, VM \(ANGSTROMS\*\*3/DA\): *(.+))");
+					
+					smatch m;
+	
+					do
 					{
-						string line = vS(12);
-						smatch m;
-
-						if (regex_match(line, m, rx))
+						string r = vS(12);
+	
+						if (conditions.empty())
 						{
-							models[0] = stoi(m[1].str());
-							models[1] = stoi(m[2].str());
+							if (regex_match(r, m, rx1))
+								densityPercentSol = m[1].str();
+							else if (regex_match(r, m, rx2))
+								density_Matthews = m[1].str();
+							else if (ba::starts_with(r, "CRYSTALLIZATION CONDITIONS: "))
+								conditions = r.substr(28);
 						}
 						else
-							headerSeen = ba::contains(line, "RES C SSSEQI");
-						continue;
+							conditions = conditions + ' ' + r;
+						
+						GetNextRecord();
 					}
+					while (mRec->is("REMARK 280"));
+					
+					string desc = mRemark200["REMARK"];
+					if (desc == "NULL")
+						desc.clear();
 					
-					if (models[0] == models[1])
-						models[0] = models[1] = vI(12, 14);
+					getCategory("exptl_crystal")->emplace({
+						{ "id", 1 },
+						{ "density_Matthews", iequals(density_Matthews, "NULL") ? "" : density_Matthews  },
+						{ "density_percent_sol", iequals(densityPercentSol, "NULL") ? "" : densityPercentSol },
+						{ "description", desc }
+					});
+					
+					// now try to parse the conditions
+					const regex rx3(R"(TEMPERATURE +(\d+)K)"), rx4(R"(PH *(?:: *)?(\d+(?:\.\d+)?))")/*, rx5(R"(\b(\d+)C\b)")*/;
 					
-					string res = vS(16, 18);
-					char chain = vC(20);
-					int seq = vI(22, 26);
-					char iCode = vC(27);
+					string temp, ph, method;
+	
+					for (auto i = make_split_iterator(conditions, ba::token_finder(ba::is_any_of(","), ba::token_compress_on)); not i.eof(); ++i)
+					{
+						string s(i->begin(), i->end());
+	
+						ba::trim(s);
+	
+						if (regex_search(s, m, rx3))
+							temp = m[1].str();
+						if (regex_search(s, m, rx4))
+							ph = m[1].str();
+						if (s.length() < 60 and
+							(ba::icontains(s, "drop") or ba::icontains(s, "vapor") or ba::icontains(s, "batch")))
+						{
+							if (not method.empty())
+								method = method + ", " + s;
+							else
+								method = s;
+						}
+					}
 					
-					for (int modelNr = models[0]; modelNr <= models[1]; ++modelNr)
-						mUnobs.push_back({modelNr, res, chain, seq, iCode});
+					if (not (method.empty() and temp.empty() and ph.empty() and (conditions.empty() or conditions == "NULL")))
+					{
+						getCategory("exptl_crystal_grow")->emplace({
+							{ "crystal_id", 1 },
+							{ "method", method },
+							{ "temp", temp },
+							{ "pH", ph },
+							{ "pdbx_details", conditions }
+						});
+					}
+	
+					break;
 				}
 				
-				break;
-			}
-			
-			case 470:
-			{
-				bool headerSeen = false;
-				regex rx(R"( *MODELS *(\d+)-(\d+))");
-				int models[2] = { -1, -1 };
+	//			case 290:
+	//				
+	//				break;
+				
+				case 350:
+					// postponed since we don't have the required information yet
+					for (; mRec->is("REMARK 350"); GetNextRecord())
+						;
+					break;
 				
-				for (; mRec->is("REMARK 470"); GetNextRecord())
+				case 400:
 				{
-					if (not headerSeen)
+					stringstream s;
+					GetNextRecord();
+					if (vS(12) == "COMPOUND")
+						GetNextRecord();
+					
+					while (mRec->is("REMARK 400"))
 					{
-						string line = vS(12);
-						smatch m;
-
-						if (regex_match(line, m, rx))
-						{
-							models[0] = stoi(m[1].str());
-							models[1] = stoi(m[2].str());
-						}
-						else
-							headerSeen = ba::contains(line, "RES CSSEQI  ATOMS");
-						continue;
+						s << vS(12) << endl;
+						GetNextRecord();
 					}
 					
-					if (models[0] == models[1])
-						models[0] = models[1] = vI(12, 14);
-					
-					string res = vS(16, 18);
-					char chain = vC(20);
-					int seq = vI(21, 24);
-					char iCode = vC(25);
+					compoundDetails = s.str();
+					break;
+				}
+				
+				case 450:
+				{
+					stringstream s;
+					GetNextRecord();
+					if (vS(12) == "SOURCE")
+						GetNextRecord();
 					
-					vector<string> atoms;
-					string atomStr = mRec->vS(29);
-					for (auto i = make_split_iterator(atomStr, ba::token_finder(ba::is_any_of(" "), ba::token_compress_on)); not i.eof(); ++i)
-						atoms.push_back({ i-> begin(), i->end() });
+					while (mRec->is("REMARK 450"))
+					{
+						s << vS(12) << endl;
+						GetNextRecord();
+					}
 					
-					for (int modelNr = models[0]; modelNr <= models[1]; ++modelNr)
-						mUnobs.push_back({modelNr, res, chain, seq, iCode, atoms});
+					sourceDetails = s.str();
+					break;
 				}
 				
-				break;
-			}
-			
-			case 500:
-			{
-				GetNextRecord();
-				
-				enum State { eStart, eCCinSAU, eCC, eCBL, eCBA, eTA, eCTg, ePG, eMCP, eChC } state = eStart;
-				bool headerSeen = false;
-				int id = 0;
-				
-				for (; mRec->is("REMARK 500"); GetNextRecord())
+				case 465:
 				{
-					string line = vS(12);
-					
-					if (line == "GEOMETRY AND STEREOCHEMISTRY")
-						continue;
+					bool headerSeen = false;
+					regex rx(R"( *MODELS *(\d+)-(\d+))");
+					int models[2] = { -1, -1 };
 					
-					switch (state)
+					for (; mRec->is("REMARK 465"); GetNextRecord())
 					{
-						case eStart:
-						{
-							if (line.empty() or not ba::starts_with(line, "SUBTOPIC: "))
-								continue;
-
-							string subtopic = line.substr(10);
-
-							if (subtopic == "CLOSE CONTACTS IN SAME ASYMMETRIC UNIT")	state = eCCinSAU;
-							else if (subtopic == "CLOSE CONTACTS")						state = eCC;
-							else if (subtopic == "COVALENT BOND LENGTHS")				state = eCBL;
-							else if (subtopic == "COVALENT BOND ANGLES")				state = eCBA;
-							else if (subtopic == "TORSION ANGLES")						state = eTA;
-							else if (subtopic == "NON-CIS, NON-TRANS")					state = eCTg;
-							else if (subtopic == "PLANAR GROUPS")						state = ePG;
-							else if (subtopic == "MAIN CHAIN PLANARITY")				state = eMCP;
-							else if (subtopic == "CHIRAL CENTERS")						state = eChC;
-							else if (VERBOSE)
-								Error("Unknown subtopic in REMARK 500: " + subtopic);
-							
-							headerSeen = false;
-							id = 0;
-							break;
-						}
-						
-						case eCCinSAU:
-						{
-							if (not headerSeen)
-								headerSeen =
-									line == "ATM1  RES C  SSEQI   ATM2  RES C  SSEQI           DISTANCE";
-							else if (line.empty())
-								state = eStart;
-							else
-							{
-								string atom1 = vS(13, 16);
-								string res1 = vS(19, 21);
-								string alt1 = vS(17, 17);
-								char chain1 = vC(23);
-								int seq1 = vI(25, 29);
-								string iCode1 = vS(30, 30);
-								
-								string atom2 = vS(34, 37);
-								string alt2 = vS(38, 38);
-								string res2 = vS(40, 42);
-								char chain2 = vC(44);
-								int seq2 = vI(46, 50);
-								string iCode2 = vS(51, 51);
-								
-								string distance = vF(63, 71);
-								
-								getCategory("pdbx_validate_close_contact")->emplace({
-									{ "id",				to_string(++id) },
-									{ "PDB_model_num",	1 },
-									{ "auth_atom_id_1",	atom1 },
-									{ "auth_asym_id_1", string{ chain1 } },
-									{ "auth_comp_id_1", res1 },
-									{ "auth_seq_id_1",	seq1 },
-									{ "PDB_ins_code_1", iCode1 },
-									{ "label_alt_id_1", alt1 },
-									{ "auth_atom_id_2",	atom2 },
-									{ "auth_asym_id_2", string { chain2 } },
-									{ "auth_comp_id_2", res2 },
-									{ "auth_seq_id_2",	seq2 },
-									{ "PDB_ins_code_2", iCode2 },
-									{ "label_alt_id_2", alt2 },
-									{ "dist", distance }
-								});
-							}
-							break;
-						}
-						
-						case eCC:
-						{
-							if (not headerSeen)
-								headerSeen = line == "ATM1  RES C  SSEQI   ATM2  RES C  SSEQI  SSYMOP   DISTANCE";
-							else if (line.empty())
-								state = eStart;
-							else
-							{
-								string atom1 = vS(13, 16);
-								string res1 = vS(19, 21);
-								char chain1 = vC(23);
-								int seq1 = vI(25, 29);
-								
-								string atom2 = vS(34, 37);
-								string res2 = vS(40, 42);
-								char chain2 = vC(44);
-								int seq2 = vI(46, 50);
-								
-								string symop = pdb2cifSymmetry(vS(54, 59));
-								
-								string distance = vF(63, 71);
-								
-								getCategory("pdbx_validate_symm_contact")->emplace({
-									{ "id",				to_string(++id) },
-									{ "PDB_model_num",	1 },
-									{ "auth_atom_id_1",	atom1 },
-									{ "auth_asym_id_1", string{ chain1 } },
-									{ "auth_comp_id_1", res1 },
-									{ "auth_seq_id_1",	seq1 },
-//									{ "PDB_ins_code_1", "" },
-//									{ "label_alt_id_1", "" },
-									{ "site_symmetry_1", "1_555" },
-									{ "auth_atom_id_2",	atom2 },
-									{ "auth_asym_id_2", string { chain2 } },
-									{ "auth_comp_id_2", res2 },
-									{ "auth_seq_id_2",	seq2 },
-//									{ "PDB_ins_code_2", "" },
-//									{ "label_alt_id_2", "" },
-									{ "site_symmetry_2", symop },
-									{ "dist", distance }
-								});
-							}
-							break;
-						}
-						
-						case eCBL:
+						if (not headerSeen)
 						{
-							if (not headerSeen)
-							{
-								if (ba::starts_with(line, "FORMAT: ") and line != "FORMAT: (10X,I3,1X,2(A3,1X,A1,I4,A1,1X,A4,3X),1X,F6.3)")
-									Error("Unexpected format in REMARK 500");
-								
-								headerSeen = line == "M RES CSSEQI ATM1   RES CSSEQI ATM2   DEVIATION";
-							}
-							else if (line.empty())
-								state = eStart;
-							else
-							{
-								int model = vI(11, 13);
-								string resNam1 = vS(15, 17);
-								string chainID1 { vC(19) };
-								int seqNum1 = vI(20, 23);
-								string iCode1 { vC(24) };
-								string alt1 = vS(30, 30);
-								string atm1 = vS(26, 29);
-
-								string resNam2 = vS(33, 35);
-								string chainID2 { vC(37) };
-								int seqNum2 = vI(38, 41);
-								string iCode2 { vC(42) };
-								string alt2 = vS(48, 48);
-								string atm2 = vS(44, 47);
-								
-								string deviation = vF(51, 57);
-								
-								if (iCode1 == " ") iCode1.clear();
-								if (iCode2 == " ") iCode2.clear();
-								
-								getCategory("pdbx_validate_rmsd_bond")->emplace({
-									{ "id",				to_string(++id) },
-									{ "PDB_model_num",	model ? model : 1 },
-									{ "auth_atom_id_1",	atm1 },
-									{ "auth_asym_id_1", chainID1 },
-									{ "auth_comp_id_1", resNam1 },
-									{ "auth_seq_id_1",	seqNum1 },
-									{ "PDB_ins_code_1",	iCode1 },
-									{ "label_alt_id_1", alt1 },
-									{ "auth_atom_id_2",	atm2 },
-									{ "auth_asym_id_2", chainID2 },
-									{ "auth_comp_id_2", resNam2 },
-									{ "auth_seq_id_2",	seqNum2 },
-									{ "PDB_ins_code_2",	iCode2 },
-									{ "label_alt_id_2", alt2 },
-									{ "bond_deviation",deviation }
-								});
-							}
-							
-							break;
-						}
-						
-						case eCBA:
-							if (not headerSeen)
-							{
-								if (ba::starts_with(line, "FORMAT: ") and line != "FORMAT: (10X,I3,1X,A3,1X,A1,I4,A1,3(1X,A4,2X),12X,F5.1)")
-									Error("Unexpected format in REMARK 500");
-								
-								headerSeen = line == "M RES CSSEQI ATM1   ATM2   ATM3";
-							}
-							else if (line.empty())
-								state = eStart;
-							else if (vS(64) == "DEGREES")
-							{
-								int model = vI(11, 13);
-								string resNam = vS(15, 17);
-								string chainID { vC(19) };
-								int seqNum = vI(20, 23);
-								string iCode { vC(24) };
-								
-								if (iCode == " ")
-									iCode.clear();
-								
-								string atoms[3] = { vS(27, 30), vS(34, 37), vS(41, 44) };
-								string deviation = vF(57, 62);
-								
-								getCategory("pdbx_validate_rmsd_angle")->emplace({
-									{ "id",				to_string(++id) },
-									{ "PDB_model_num",	model ? model : 1 },
-									{ "auth_atom_id_1",	atoms[0] },
-									{ "auth_asym_id_1", chainID },
-									{ "auth_comp_id_1", resNam },
-									{ "auth_seq_id_1",	seqNum },
-									{ "PDB_ins_code_1",	iCode },
-									{ "auth_atom_id_2",	atoms[1] },
-									{ "auth_asym_id_2", chainID },
-									{ "auth_comp_id_2", resNam },
-									{ "auth_seq_id_2",	seqNum },
-									{ "PDB_ins_code_2",	iCode },
-									{ "auth_atom_id_3",	atoms[2] },
-									{ "auth_asym_id_3", chainID },
-									{ "auth_comp_id_3", resNam },
-									{ "auth_seq_id_3",	seqNum },
-									{ "PDB_ins_code_3",	iCode },
-									{ "angle_deviation",deviation }
-								});
-							}
-							
-							break;
-						
-						case eTA:
-							if (not headerSeen)
-							{
-								if (ba::starts_with(line, "FORMAT: ") and line != "FORMAT:(10X,I3,1X,A3,1X,A1,I4,A1,4X,F7.2,3X,F7.2)")
-									Error("Unexpected format in REMARK 500");
-								
-								headerSeen = line == "M RES CSSEQI        PSI       PHI";
-							}
-							else if (line.empty())
-								state = eStart;
-							else
-							{
-								int model = vI(11, 13);
-								string resNam = vS(15, 17);
-								string chainID { vC(19) };
-								int seqNum = vI(20, 23);
-								string iCode { vC(24) };
-								
-								if (iCode == " ")
-									iCode.clear();
-								
-								string psi = vF(27, 35);
-								string phi = vF(37, 45);
-								
-								getCategory("pdbx_validate_torsion")->emplace({
-									{ "id",				to_string(++id) },
-									{ "PDB_model_num",	model ? model : 1 },
-									{ "auth_comp_id", 	resNam },
-									{ "auth_asym_id", 	chainID },
-									{ "auth_seq_id",	seqNum },
-									{ "PDB_ins_code",	iCode },
-									{ "phi",			phi },
-									{ "psi",			psi }
-								});
-							}
-							break;
-						
-						case eCTg:
-							if (not headerSeen)
-								headerSeen = line == "MODEL     OMEGA";
-							else if (line.empty())
-								state = eStart;
-							else
-							{
-								int model = vI(45, 48);
-
-								string resNam1 = vS(12, 14);
-								string chainID1 { vC(16) };
-								int seqNum1 = vI(17, 21);
-								string iCode1 { vC(22) };
-								
-								if (iCode1 == " ")
-									iCode1.clear();
-								
-								string resNam2 = vS(27, 29);
-								string chainID2 { vC(31) };
-								int seqNum2 = vI(32, 36);
-								string iCode2 { vC(37) };
-								
-								if (iCode2 == " ")
-									iCode2.clear();
-								
-								string omega = vF(54, 60);
-								
-								getCategory("pdbx_validate_peptide_omega")->emplace({
-									{ "id",				to_string(++id) },
-									{ "PDB_model_num",	model ? model : 1 },
-									{ "auth_comp_id_1",	resNam1 },
-									{ "auth_asym_id_1",	chainID1 },
-									{ "auth_seq_id_1",	seqNum1 },
-									{ "PDB_ins_code_1",	iCode1 },
-									{ "auth_comp_id_2",	resNam2 },
-									{ "auth_asym_id_2",	chainID2 },
-									{ "auth_seq_id_2",	seqNum2 },
-									{ "PDB_ins_code_2",	iCode2 },
-									{ "omega",			omega }
-								});
-							}
-							break;
-						
-						case ePG:
-							if (not headerSeen)
-								headerSeen = line == "M RES CSSEQI        RMS     TYPE";
-							else if (line.empty())
-								state = eStart;
-							else
-							{
-								int model = vI(11, 13);
-								string resNam = vS(15, 17);
-								string chainID { vC(19) };
-								int seqNum = vI(20, 23);
-								string iCode { vC(24) };
-								
-								if (iCode == " ")
-									iCode.clear();
-								
-								string rmsd = vF(32, 36);
-								string type = vS(41);
-								
-								getCategory("pdbx_validate_planes")->emplace({
-									{ "id",				to_string(++id) },
-									{ "PDB_model_num",	model ? model : 1 },
-									{ "auth_comp_id",	resNam },
-									{ "auth_asym_id",	chainID },
-									{ "auth_seq_id",	seqNum },
-									{ "PDB_ins_code",	iCode },
-									{ "rmsd",			rmsd },
-									{ "type",			type }
-								});
+							string line = vS(12);
+							smatch m;
+	
+							if (regex_match(line, m, rx))
+							{
+								models[0] = stoi(m[1].str());
+								models[1] = stoi(m[2].str());
 							}
-							break;
+							else
+								headerSeen = ba::contains(line, "RES C SSSEQI");
+							continue;
+						}
 						
+						if (models[0] == models[1])
+							models[0] = models[1] = vI(12, 14);
 						
-						default:
-							state = eStart;
-							break;
+						string res = vS(16, 18);
+						char chain = vC(20);
+						int seq = vI(22, 26);
+						char iCode = vC(27);
+						
+						for (int modelNr = models[0]; modelNr <= models[1]; ++modelNr)
+							mUnobs.push_back({modelNr, res, chain, seq, iCode});
 					}
+					
+					break;
 				}
 				
-				break;
-			}
-			
-			case 610:
-			{
-				bool headerSeen = false;
-				
-				for (; mRec->is("REMARK 610"); GetNextRecord())
+				case 470:
 				{
-					if (not headerSeen)
-					{
-						string line = vS(12);
-						headerSeen = ba::contains(line, "RES C SSEQI");
-						continue;
-					}
-					
-					int modelNr = vI(12, 14);
-					if (modelNr == 0)
-						modelNr = 1;
-					string res = vS(16, 18);
-					char chain = vC(20);
-					int seq = vI(22, 25);
-					char iCode = vC(26);
-					
-					auto compound = libcif::Compound::create(res);
-					if (compound == nullptr)
-						continue;
+					bool headerSeen = false;
+					regex rx(R"( *MODELS *(\d+)-(\d+))");
+					int models[2] = { -1, -1 };
 					
-					vector<string> atoms;
-					for (auto atom: compound->atoms())
+					for (; mRec->is("REMARK 470"); GetNextRecord())
 					{
-						if (atom.typeSymbol != libcif::H)
-							atoms.push_back(atom.id);
+						if (not headerSeen)
+						{
+							string line = vS(12);
+							smatch m;
+	
+							if (regex_match(line, m, rx))
+							{
+								models[0] = stoi(m[1].str());
+								models[1] = stoi(m[2].str());
+							}
+							else
+								headerSeen = ba::contains(line, "RES CSSEQI  ATOMS");
+							continue;
+						}
+						
+						if (models[0] == models[1])
+							models[0] = models[1] = vI(12, 14);
+						
+						string res = vS(16, 18);
+						char chain = vC(20);
+						int seq = vI(21, 24);
+						char iCode = vC(25);
+						
+						vector<string> atoms;
+						string atomStr = mRec->vS(29);
+						for (auto i = make_split_iterator(atomStr, ba::token_finder(ba::is_any_of(" "), ba::token_compress_on)); not i.eof(); ++i)
+							atoms.push_back({ i-> begin(), i->end() });
+						
+						for (int modelNr = models[0]; modelNr <= models[1]; ++modelNr)
+							mUnobs.push_back({modelNr, res, chain, seq, iCode, atoms});
 					}
 					
-					mUnobs.push_back({modelNr, res, chain, seq, iCode, { atoms }});
+					break;
 				}
 				
-				break;
-			}
-			
-			case 800:
-			{
-				const regex rx1(R"(SITE_IDENTIFIER: (.+))"),
-					rx2(R"(EVIDENCE_CODE: (.+))"),
-					rx3(R"(SITE_DESCRIPTION: (binding site for residue ([[:alnum:]]{1,3}) ([[:alnum:]]) (\d+)|.+))", regex_constants::icase);
-				
-				string id, evidence, desc;
-				string pdbxAuthAsymId, pdbxAuthCompId, pdbxAuthSeqId, pdbxAuthInsCode;
-				smatch m;
-				
-				enum State { sStart, sId, sEvidence, sDesc, sDesc2 } state = sStart;
-				
-				auto store = [&]()
+				case 500:
 				{
-					// Find the matching SITE record
-					auto site = FindRecord([id](PDBRecord& r) -> bool
-					{
-						return r.is("SITE  ") and r.vS(12, 14) == id;
-					});
+					GetNextRecord();
 					
-					if (site == nullptr)
-						throw runtime_error("Invalid REMARK 800, no SITE record for id " + id);
+					enum State { eStart, eCCinSAU, eCC, eCBL, eCBA, eTA, eCTg, ePG, eMCP, eChC } state = eStart;
+					bool headerSeen = false;
+					int id = 0;
 					
-					// next record, store what we have	
-					getCategory("struct_site")->emplace({
-						{ "id", id },
-						{ "details", desc },
-						{ "pdbx_auth_asym_id", pdbxAuthAsymId },
-						{ "pdbx_auth_comp_id", pdbxAuthCompId },
-						{ "pdbx_auth_seq_id", pdbxAuthSeqId },
-						{ "pdbx_num_residues", site->vI(16, 17) },
-						{ "pdbx_evidence_code", evidence }
-					});
-				};
-
-				for ( ; mRec->is("REMARK 800"); GetNextRecord())
-				{
-					string s = mRec->vS(12);
-					if (s.empty())
-						continue;
-
-					switch (state)
+					for (; mRec->is("REMARK 500"); GetNextRecord())
 					{
-						case sStart:
- 							if (s == "SITE")
- 								state = sId;
-							else if (VERBOSE)
-								Error("Invalid REMARK 800 record, expected SITE");
- 							break;
+						string line = vS(12);
+						
+						if (line == "GEOMETRY AND STEREOCHEMISTRY")
+							continue;
 						
-						case sId:
-							if (regex_match(s, m, rx1))
+						switch (state)
+						{
+							case eStart:
 							{
-								id = m[1].str();
-								state = sEvidence;
+								if (line.empty() or not ba::starts_with(line, "SUBTOPIC: "))
+									continue;
+	
+								string subtopic = line.substr(10);
+	
+								if (subtopic == "CLOSE CONTACTS IN SAME ASYMMETRIC UNIT")	state = eCCinSAU;
+								else if (subtopic == "CLOSE CONTACTS")						state = eCC;
+								else if (subtopic == "COVALENT BOND LENGTHS")				state = eCBL;
+								else if (subtopic == "COVALENT BOND ANGLES")				state = eCBA;
+								else if (subtopic == "TORSION ANGLES")						state = eTA;
+								else if (subtopic == "NON-CIS, NON-TRANS")					state = eCTg;
+								else if (subtopic == "PLANAR GROUPS")						state = ePG;
+								else if (subtopic == "MAIN CHAIN PLANARITY")				state = eMCP;
+								else if (subtopic == "CHIRAL CENTERS")						state = eChC;
+								else if (VERBOSE)
+									throw runtime_error("Unknown subtopic in REMARK 500: " + subtopic);
+								
+								headerSeen = false;
+								id = 0;
+								break;
 							}
-							else if (VERBOSE)
-								Error("Invalid REMARK 800 record, expected SITE_IDENTIFIER");
-							break;
-						
-						case sEvidence:
-							if (regex_match(s, m, rx2))
+							
+							case eCCinSAU:
 							{
-								evidence = m[1].str();
-								state = sDesc;
+								if (not headerSeen)
+									headerSeen =
+										line == "ATM1  RES C  SSEQI   ATM2  RES C  SSEQI           DISTANCE";
+								else if (line.empty())
+									state = eStart;
+								else
+								{
+									string atom1 = vS(13, 16);
+									string res1 = vS(19, 21);
+									string alt1 = vS(17, 17);
+									char chain1 = vC(23);
+									int seq1 = vI(25, 29);
+									string iCode1 = vS(30, 30);
+									
+									string atom2 = vS(34, 37);
+									string alt2 = vS(38, 38);
+									string res2 = vS(40, 42);
+									char chain2 = vC(44);
+									int seq2 = vI(46, 50);
+									string iCode2 = vS(51, 51);
+									
+									string distance = vF(63, 71);
+									
+									getCategory("pdbx_validate_close_contact")->emplace({
+										{ "id",				to_string(++id) },
+										{ "PDB_model_num",	1 },
+										{ "auth_atom_id_1",	atom1 },
+										{ "auth_asym_id_1", string{ chain1 } },
+										{ "auth_comp_id_1", res1 },
+										{ "auth_seq_id_1",	seq1 },
+										{ "PDB_ins_code_1", iCode1 },
+										{ "label_alt_id_1", alt1 },
+										{ "auth_atom_id_2",	atom2 },
+										{ "auth_asym_id_2", string { chain2 } },
+										{ "auth_comp_id_2", res2 },
+										{ "auth_seq_id_2",	seq2 },
+										{ "PDB_ins_code_2", iCode2 },
+										{ "label_alt_id_2", alt2 },
+										{ "dist", distance }
+									});
+								}
+								break;
 							}
-							else if (VERBOSE)
-								Error("Invalid REMARK 800 record, expected SITE_IDENTIFIER");
-							break;
-						
-						case sDesc:
-							if (regex_match(s, m, rx3))
+							
+							case eCC:
 							{
-								desc = m[1].str();
-								pdbxAuthCompId = m[2].str();
-								pdbxAuthAsymId = m[3].str();
-								pdbxAuthSeqId = m[4].str();
-
-								state = sDesc2;
+								if (not headerSeen)
+									headerSeen = line == "ATM1  RES C  SSEQI   ATM2  RES C  SSEQI  SSYMOP   DISTANCE";
+								else if (line.empty())
+									state = eStart;
+								else
+								{
+									string atom1 = vS(13, 16);
+									string res1 = vS(19, 21);
+									char chain1 = vC(23);
+									int seq1 = vI(25, 29);
+									
+									string atom2 = vS(34, 37);
+									string res2 = vS(40, 42);
+									char chain2 = vC(44);
+									int seq2 = vI(46, 50);
+									
+									string symop = pdb2cifSymmetry(vS(54, 59));
+									
+									string distance = vF(63, 71);
+									
+									getCategory("pdbx_validate_symm_contact")->emplace({
+										{ "id",				to_string(++id) },
+										{ "PDB_model_num",	1 },
+										{ "auth_atom_id_1",	atom1 },
+										{ "auth_asym_id_1", string{ chain1 } },
+										{ "auth_comp_id_1", res1 },
+										{ "auth_seq_id_1",	seq1 },
+	//									{ "PDB_ins_code_1", "" },
+	//									{ "label_alt_id_1", "" },
+										{ "site_symmetry_1", "1_555" },
+										{ "auth_atom_id_2",	atom2 },
+										{ "auth_asym_id_2", string { chain2 } },
+										{ "auth_comp_id_2", res2 },
+										{ "auth_seq_id_2",	seq2 },
+	//									{ "PDB_ins_code_2", "" },
+	//									{ "label_alt_id_2", "" },
+										{ "site_symmetry_2", symop },
+										{ "dist", distance }
+									});
+								}
+								break;
 							}
-							break;
-						
-						case sDesc2:
-							if (regex_match(s, m, rx1))
+							
+							case eCBL:
 							{
-								store();
+								if (not headerSeen)
+								{
+									if (ba::starts_with(line, "FORMAT: ") and line != "FORMAT: (10X,I3,1X,2(A3,1X,A1,I4,A1,1X,A4,3X),1X,F6.3)")
+										throw runtime_error("Unexpected format in REMARK 500");
+									
+									headerSeen = line == "M RES CSSEQI ATM1   RES CSSEQI ATM2   DEVIATION";
+								}
+								else if (line.empty())
+									state = eStart;
+								else
+								{
+									int model = vI(11, 13);
+									string resNam1 = vS(15, 17);
+									string chainID1 { vC(19) };
+									int seqNum1 = vI(20, 23);
+									string iCode1 { vC(24) };
+									string alt1 = vS(30, 30);
+									string atm1 = vS(26, 29);
+	
+									string resNam2 = vS(33, 35);
+									string chainID2 { vC(37) };
+									int seqNum2 = vI(38, 41);
+									string iCode2 { vC(42) };
+									string alt2 = vS(48, 48);
+									string atm2 = vS(44, 47);
+									
+									string deviation = vF(51, 57);
+									
+									if (iCode1 == " ") iCode1.clear();
+									if (iCode2 == " ") iCode2.clear();
+									
+									getCategory("pdbx_validate_rmsd_bond")->emplace({
+										{ "id",				to_string(++id) },
+										{ "PDB_model_num",	model ? model : 1 },
+										{ "auth_atom_id_1",	atm1 },
+										{ "auth_asym_id_1", chainID1 },
+										{ "auth_comp_id_1", resNam1 },
+										{ "auth_seq_id_1",	seqNum1 },
+										{ "PDB_ins_code_1",	iCode1 },
+										{ "label_alt_id_1", alt1 },
+										{ "auth_atom_id_2",	atm2 },
+										{ "auth_asym_id_2", chainID2 },
+										{ "auth_comp_id_2", resNam2 },
+										{ "auth_seq_id_2",	seqNum2 },
+										{ "PDB_ins_code_2",	iCode2 },
+										{ "label_alt_id_2", alt2 },
+										{ "bond_deviation",deviation }
+									});
+								}
 								
-								id = m[1].str();
-								state = sEvidence;
-								evidence.clear();
-								desc.clear();
+								break;
 							}
-							else
-								desc = desc + ' ' + s;
-							break;
+							
+							case eCBA:
+								if (not headerSeen)
+								{
+									if (ba::starts_with(line, "FORMAT: ") and line != "FORMAT: (10X,I3,1X,A3,1X,A1,I4,A1,3(1X,A4,2X),12X,F5.1)")
+										throw runtime_error("Unexpected format in REMARK 500");
+									
+									headerSeen = line == "M RES CSSEQI ATM1   ATM2   ATM3";
+								}
+								else if (line.empty())
+									state = eStart;
+								else if (vS(64) == "DEGREES")
+								{
+									int model = vI(11, 13);
+									string resNam = vS(15, 17);
+									string chainID { vC(19) };
+									int seqNum = vI(20, 23);
+									string iCode { vC(24) };
+									
+									if (iCode == " ")
+										iCode.clear();
+									
+									string atoms[3] = { vS(27, 30), vS(34, 37), vS(41, 44) };
+									string deviation = vF(57, 62);
+									if (deviation == "*****")
+										deviation.clear();
+									
+									getCategory("pdbx_validate_rmsd_angle")->emplace({
+										{ "id",				to_string(++id) },
+										{ "PDB_model_num",	model ? model : 1 },
+										{ "auth_atom_id_1",	atoms[0] },
+										{ "auth_asym_id_1", chainID },
+										{ "auth_comp_id_1", resNam },
+										{ "auth_seq_id_1",	seqNum },
+										{ "PDB_ins_code_1",	iCode },
+										{ "auth_atom_id_2",	atoms[1] },
+										{ "auth_asym_id_2", chainID },
+										{ "auth_comp_id_2", resNam },
+										{ "auth_seq_id_2",	seqNum },
+										{ "PDB_ins_code_2",	iCode },
+										{ "auth_atom_id_3",	atoms[2] },
+										{ "auth_asym_id_3", chainID },
+										{ "auth_comp_id_3", resNam },
+										{ "auth_seq_id_3",	seqNum },
+										{ "PDB_ins_code_3",	iCode },
+										{ "angle_deviation",deviation }
+									});
+								}
+								
+								break;
+							
+							case eTA:
+								if (not headerSeen)
+								{
+									if (ba::starts_with(line, "FORMAT: ") and line != "FORMAT:(10X,I3,1X,A3,1X,A1,I4,A1,4X,F7.2,3X,F7.2)")
+										throw runtime_error("Unexpected format in REMARK 500");
+									
+									headerSeen = line == "M RES CSSEQI        PSI       PHI";
+								}
+								else if (line.empty())
+									state = eStart;
+								else
+								{
+									int model = vI(11, 13);
+									string resNam = vS(15, 17);
+									string chainID { vC(19) };
+									int seqNum = vI(20, 23);
+									string iCode { vC(24) };
+									
+									if (iCode == " ")
+										iCode.clear();
+									
+									string psi = vF(27, 35);
+									string phi = vF(37, 45);
+									
+									getCategory("pdbx_validate_torsion")->emplace({
+										{ "id",				to_string(++id) },
+										{ "PDB_model_num",	model ? model : 1 },
+										{ "auth_comp_id", 	resNam },
+										{ "auth_asym_id", 	chainID },
+										{ "auth_seq_id",	seqNum },
+										{ "PDB_ins_code",	iCode },
+										{ "phi",			phi },
+										{ "psi",			psi }
+									});
+								}
+								break;
+							
+							case eCTg:
+								if (not headerSeen)
+									headerSeen = line == "MODEL     OMEGA";
+								else if (line.empty())
+									state = eStart;
+								else
+								{
+									int model = vI(45, 48);
+	
+									string resNam1 = vS(12, 14);
+									string chainID1 { vC(16) };
+									int seqNum1 = vI(17, 21);
+									string iCode1 { vC(22) };
+									
+									if (iCode1 == " ")
+										iCode1.clear();
+									
+									string resNam2 = vS(27, 29);
+									string chainID2 { vC(31) };
+									int seqNum2 = vI(32, 36);
+									string iCode2 { vC(37) };
+									
+									if (iCode2 == " ")
+										iCode2.clear();
+									
+									string omega = vF(54, 60);
+									
+									getCategory("pdbx_validate_peptide_omega")->emplace({
+										{ "id",				to_string(++id) },
+										{ "PDB_model_num",	model ? model : 1 },
+										{ "auth_comp_id_1",	resNam1 },
+										{ "auth_asym_id_1",	chainID1 },
+										{ "auth_seq_id_1",	seqNum1 },
+										{ "PDB_ins_code_1",	iCode1 },
+										{ "auth_comp_id_2",	resNam2 },
+										{ "auth_asym_id_2",	chainID2 },
+										{ "auth_seq_id_2",	seqNum2 },
+										{ "PDB_ins_code_2",	iCode2 },
+										{ "omega",			omega }
+									});
+								}
+								break;
+							
+							case ePG:
+								if (not headerSeen)
+									headerSeen = line == "M RES CSSEQI        RMS     TYPE";
+								else if (line.empty())
+									state = eStart;
+								else
+								{
+									int model = vI(11, 13);
+									string resNam = vS(15, 17);
+									string chainID { vC(19) };
+									int seqNum = vI(20, 23);
+									string iCode { vC(24) };
+									
+									if (iCode == " ")
+										iCode.clear();
+									
+									string rmsd = vF(32, 36);
+									string type = vS(41);
+									
+									getCategory("pdbx_validate_planes")->emplace({
+										{ "id",				to_string(++id) },
+										{ "PDB_model_num",	model ? model : 1 },
+										{ "auth_comp_id",	resNam },
+										{ "auth_asym_id",	chainID },
+										{ "auth_seq_id",	seqNum },
+										{ "PDB_ins_code",	iCode },
+										{ "rmsd",			rmsd },
+										{ "type",			type }
+									});
+								}
+								break;
+							
+							
+							default:
+								state = eStart;
+								break;
+						}
 					}
+					
+					break;
 				}
 				
-				if (not id.empty())
-					store();
-
-				break;
-			}
-			
-			case 999:
-			{
-				stringstream s;
-				GetNextRecord();
-				if (vS(12) == "SEQUENCE")
-					GetNextRecord();
-				
-				while (mRec->is("REMARK 999"))
+				case 610:
 				{
-					s << vS(12) << endl;
-					GetNextRecord();
+					bool headerSeen = false;
+					
+					for (; mRec->is("REMARK 610"); GetNextRecord())
+					{
+						if (not headerSeen)
+						{
+							string line = vS(12);
+							headerSeen = ba::contains(line, "RES C SSEQI");
+							continue;
+						}
+						
+						int modelNr = vI(12, 14);
+						if (modelNr == 0)
+							modelNr = 1;
+						string res = vS(16, 18);
+						char chain = vC(20);
+						int seq = vI(22, 25);
+						char iCode = vC(26);
+						
+						auto compound = libcif::Compound::create(res);
+						if (compound == nullptr)
+							continue;
+						
+						vector<string> atoms;
+						for (auto atom: compound->atoms())
+						{
+							if (atom.typeSymbol != libcif::H)
+								atoms.push_back(atom.id);
+						}
+						
+						mUnobs.push_back({modelNr, res, chain, seq, iCode, { atoms }});
+					}
+					
+					break;
 				}
 				
-				sequenceDetails = s.str();
-				break;
-			}
-
-			// these are skipped 
-
-			case 2:
-			case 290:
-			case 300:
-				GetNextRecord();
-				break;
-
-			default:
-			{
-				string skipped = mRec->mName;
-				
-				stringstream s;
+				case 800:
+				{
+					const regex rx1(R"(SITE_IDENTIFIER: (.+))"),
+						rx2(R"(EVIDENCE_CODE: (.+))"),
+						rx3(R"(SITE_DESCRIPTION: (binding site for residue ([[:alnum:]]{1,3}) ([[:alnum:]]) (\d+)|.+))", regex_constants::icase);
+					
+					string id, evidence, desc;
+					string pdbxAuthAsymId, pdbxAuthCompId, pdbxAuthSeqId, pdbxAuthInsCode;
+					smatch m;
+					
+					enum State { sStart, sId, sEvidence, sDesc, sDesc2 } state = sStart;
+					
+					auto store = [&]()
+					{
+						// Find the matching SITE record
+						auto site = FindRecord([id](PDBRecord& r) -> bool
+						{
+							return r.is("SITE  ") and r.vS(12, 14) == id;
+						});
+						
+						if (site == nullptr)
+							throw runtime_error("Invalid REMARK 800, no SITE record for id " + id);
+						
+						// next record, store what we have	
+						getCategory("struct_site")->emplace({
+							{ "id", id },
+							{ "details", desc },
+							{ "pdbx_auth_asym_id", pdbxAuthAsymId },
+							{ "pdbx_auth_comp_id", pdbxAuthCompId },
+							{ "pdbx_auth_seq_id", pdbxAuthSeqId },
+							{ "pdbx_num_residues", site->vI(16, 17) },
+							{ "pdbx_evidence_code", evidence }
+						});
+					};
+	
+					for ( ; mRec->is("REMARK 800"); GetNextRecord())
+					{
+						string s = mRec->vS(12);
+						if (s.empty())
+							continue;
+	
+						switch (state)
+						{
+							case sStart:
+	 							if (s == "SITE")
+	 								state = sId;
+								else if (VERBOSE)
+									throw runtime_error("Invalid REMARK 800 record, expected SITE");
+	 							break;
+							
+							case sId:
+								if (regex_match(s, m, rx1))
+								{
+									id = m[1].str();
+									state = sEvidence;
+								}
+								else if (VERBOSE)
+									throw runtime_error("Invalid REMARK 800 record, expected SITE_IDENTIFIER");
+								break;
+							
+							case sEvidence:
+								if (regex_match(s, m, rx2))
+								{
+									evidence = m[1].str();
+									state = sDesc;
+								}
+								else if (VERBOSE)
+									throw runtime_error("Invalid REMARK 800 record, expected SITE_IDENTIFIER");
+								break;
+							
+							case sDesc:
+								if (regex_match(s, m, rx3))
+								{
+									desc = m[1].str();
+									pdbxAuthCompId = m[2].str();
+									pdbxAuthAsymId = m[3].str();
+									pdbxAuthSeqId = m[4].str();
+	
+									state = sDesc2;
+								}
+								break;
+							
+							case sDesc2:
+								if (regex_match(s, m, rx1))
+								{
+									store();
+									
+									id = m[1].str();
+									state = sEvidence;
+									evidence.clear();
+									desc.clear();
+								}
+								else
+									desc = desc + ' ' + s;
+								break;
+						}
+					}
+					
+					if (not id.empty())
+						store();
+	
+					break;
+				}
 				
-				if (not mRec->vS(11).empty())
-					s << mRec->vS(11) << endl;
-				GetNextRecord();
+				case 999:
+				{
+					stringstream s;
+					GetNextRecord();
+					if (vS(12) == "SEQUENCE")
+						GetNextRecord();
+					
+					while (mRec->is("REMARK 999"))
+					{
+						s << vS(12) << endl;
+						GetNextRecord();
+					}
 					
-				while (mRec->is(skipped.c_str()))
+					sequenceDetails = s.str();
+					break;
+				}
+	
+				// these are skipped 
+	
+				case 2:
+				case 290:
+				case 300:
+					GetNextRecord();
+					break;
+	
+				default:
 				{
-					s << mRec->vS(11) << endl;
+					string skipped = mRec->mName;
+					
+					stringstream s;
+					
+					if (not mRec->vS(11).empty())
+						s << mRec->vS(11) << endl;
 					GetNextRecord();
+						
+					while (mRec->is(skipped.c_str()))
+					{
+						s << mRec->vS(11) << endl;
+						GetNextRecord();
+					}
+					
+					getCategory("pdbx_database_remark")->emplace({
+						{ "id", remarkNr },
+						{ "text", s.str() }
+					});
+					
+					break;
 				}
-				
-				getCategory("pdbx_database_remark")->emplace({
-					{ "id", remarkNr },
-					{ "text", s.str() }
-				});
-				
-				break;
 			}
 		}
+		catch (const exception& ex)
+		{
+			throw_with_nested(runtime_error("Error parsing REMARK " + to_string(remarkNr)));
+		}
 	}

 	if (not (compoundDetails.empty() and sequenceDetails.empty() and sourceDetails.empty()))
@@ -2566,11 +2608,24 @@ void PDBFileParser::ParseRemark200()
 			{ "crystal_id", 1 }
 		});
 		
+		string collectionDate;
+		boost::system::error_code ec;
+		collectionDate = pdb2cifDate(rm200("DATE OF DATA COLLECTION", diffrnNr), ec);
+		if (ec)
+		{
+			if (VERBOSE)
+				cerr << ec.message() << " for pdbx_collection_date" << endl;
+			
+			// The date field can become truncated when multiple values are available		
+			if (diffrnNr != 1)
+				collectionDate.clear();
+		}
+				
 		getCategory("diffrn_detector")->emplace({
 			{ "diffrn_id", diffrnNr },
 			{ "detector", rm200("DETECTOR TYPE", diffrnNr) },
 			{ "type", rm200("DETECTOR MANUFACTURER", diffrnNr) },
-			{ "pdbx_collection_date", pdb2cifDate(rm200("DATE OF DATA COLLECTION", diffrnNr)) },
+			{ "pdbx_collection_date", collectionDate },
 			{ "details", rm200("OPTICS", diffrnNr) }
 		});
 		
@@ -2586,7 +2641,7 @@ void PDBFileParser::ParseRemark200()

 		vector<string> wavelengths;
 		string wl = rm200("WAVELENGTH OR RANGE (A)", diffrnNr);
-		ba::split(wavelengths, wl, ba::is_any_of(", "), ba::token_compress_on);
+		ba::split(wavelengths, wl, ba::is_any_of(", -"), ba::token_compress_on);
 		
 		diffrnWaveLengths.insert(wavelengths.begin(), wavelengths.end());

@@ -2997,7 +3052,7 @@ void PDBFileParser::ParsePrimaryStructure()
 		else if (mRec->is("DBREF2"))					//	 1 -  6       Record name   "DBREF2"                                        
 		{                                               //	 8 - 11       IDcode        idCode        ID code of this datablock.            
 			if (vC(13) != cur.chainID)			        //	13            Character     chainID       Chain identifier.                 
-				Error("Chain ID's for DBREF1/DBREF2 records do not match");
+				throw runtime_error("Chain ID's for DBREF1/DBREF2 records do not match");
 			cur.dbAccession				= vS(19, 40);  //	19 - 40       LString       dbAccession   Sequence database accession code, 
                                                        //	                                          left justified.                   
 			cur.dbSeqBegin				= vI(46, 55);  //	46 - 55       Integer       seqBegin      Initial sequence number of the    
@@ -3096,25 +3151,32 @@ void PDBFileParser::ParseHeterogen()
 		GetNextRecord();
 	}
 	
-	while (mRec->is("HETNAM"))		//	 1 -  6       Record name   "HETNAM"                                                 
-	{								//	 9 - 10       Continuation  continuation    Allows concatenation of multiple records.
-		string hetID = vS(12, 14);	//	12 - 14       LString(3)    hetID           Het identifier, right-justified.         
-        string text = vS(16);		//	16 - 70       String        text            Chemical name.                           
-
-		mHetnams[hetID] = text;
-		InsertChemComp(hetID);
+	for (;;)
+	{
+		if (mRec->is("HETNAM"))		//	 1 -  6       Record name   "HETNAM"                                                 
+		{								//	 9 - 10       Continuation  continuation    Allows concatenation of multiple records.
+			string hetID = vS(12, 14);	//	12 - 14       LString(3)    hetID           Het identifier, right-justified.         
+	        string text = vS(16);		//	16 - 70       String        text            Chemical name.                           
+	
+			mHetnams[hetID] = text;
+			InsertChemComp(hetID);
+			
+			GetNextRecord();
+			continue;
+		}
+	
+		if (mRec->is("HETSYN"))		 //	 1 -  6       Record name   "HETSYN"                                                           
+		{                                //	 9 - 10       Continuation  continuation   Allows concatenation of multiple records.           
+	         string hetID = vS(12, 14); //	12 - 14       LString(3)    hetID          Het identifier, right-justified.                    
+	         string syn = vS(16);		 //	16 - 70       SList         hetSynonyms    List of synonyms.                                   
+	
+			mHetsyns[hetID] = syn;
+	
+			GetNextRecord();
+			continue;
+		}
 		
-		GetNextRecord();
-	}
-
-	while (mRec->is("HETSYN"))		 //	 1 -  6       Record name   "HETSYN"                                                           
-	{                                //	 9 - 10       Continuation  continuation   Allows concatenation of multiple records.           
-         string hetID = vS(12, 14); //	12 - 14       LString(3)    hetID          Het identifier, right-justified.                    
-         string syn = vS(16);		 //	16 - 70       SList         hetSynonyms    List of synonyms.                                   
-
-		mHetsyns[hetID] = syn;
-
-		GetNextRecord();
+		break;
 	}
 	
 	while (mRec->is("FORMUL"))			//	 1 -  6        Record name   "FORMUL"                          
@@ -3145,7 +3207,7 @@ void PDBFileParser::ConstructEntities()
 	{
 		if (r->is("MODEL "))
 		{
-			modelNr = vI(11, 14);
+			modelNr = r->vI(11, 14);
 			continue;
 		}

@@ -3245,6 +3307,8 @@ void PDBFileParser::ConstructEntities()
 		}
 	}

+	set<char> terminatedChains;
+
 	for (auto r = mData; r != nullptr; r = r->mNext)
 	{
 		if (r->is("ATOM  ") or r->is("HETATM"))
@@ -3295,7 +3359,8 @@ void PDBFileParser::ConstructEntities()
 				}
 			}
 			
-			if (r->is("HETATM"))
+			// There appears to be a program that writes out HETATM records as ATOM records....
+			if (r->is("HETATM") or terminatedChains.count(chainID))
 			{
 				if (isWater(resName))
 					mWaterHetId = resName;
@@ -3317,6 +3382,12 @@ void PDBFileParser::ConstructEntities()

 			continue;
 		}
+
+		if (r->is("TER   "))
+		{
+			char chainID		= r->vC(22);		//	22             Character     chainID      Chain identifier.
+			terminatedChains.insert(chainID);
+		}
 	}

 	// Create missing compounds
@@ -3371,32 +3442,38 @@ void PDBFileParser::ConstructEntities()
 		int seqNr = 1;
 		for (auto& res: chain.mSeqres)
 		{
-			string authMonId, authSeqNum;
-			if (res.mSeen)
-			{
-				authMonId = res.mMonId;
-				authSeqNum = to_string(res.mSeqNum);
-			}
-
 			mChainSeq2AsymSeq[make_tuple(chain.mDbref.chainID, res.mSeqNum, res.mIcode)] = make_tuple(asymId, seqNr, true);
 			
 			string seqId = to_string(seqNr);
 			++seqNr;
 			
-			cat->emplace({
-				{ "asym_id", asymId },
-				{ "entity_id", mMolID2EntityID[chain.mMolId] },
-				{ "seq_id", seqId },
-				{ "mon_id", res.mMonId },
-				{ "ndb_seq_num", seqId },
-				{ "pdb_seq_num", res.mSeqNum },
-				{ "auth_seq_num", authSeqNum },
-				{ "pdb_mon_id", authMonId },
-				{ "auth_mon_id", authMonId },
-				{ "pdb_strand_id", string{chain.mDbref.chainID} },
-				{ "pdb_ins_code", (res.mIcode == ' ' or res.mIcode == 0) ? "." : string{res.mIcode} },
-				{ "hetero", res.mAlts.empty() ? "n" : "y" }
-			});
+			set<string> monIds = { res.mMonId };
+			monIds.insert(res.mAlts.begin(), res.mAlts.end());
+			
+			for (string monId: monIds)
+			{
+				string authMonId, authSeqNum;
+				if (res.mSeen)
+				{
+					authMonId = monId;
+					authSeqNum = to_string(res.mSeqNum);
+				}
+
+				cat->emplace({
+					{ "asym_id", asymId },
+					{ "entity_id", mMolID2EntityID[chain.mMolId] },
+					{ "seq_id", seqId },
+					{ "mon_id", monId },
+					{ "ndb_seq_num", seqId },
+					{ "pdb_seq_num", res.mSeqNum },
+					{ "auth_seq_num", authSeqNum },
+					{ "pdb_mon_id", authMonId },
+					{ "auth_mon_id", authMonId },
+					{ "pdb_strand_id", string{chain.mDbref.chainID} },
+					{ "pdb_ins_code", (res.mIcode == ' ' or res.mIcode == 0) ? "." : string{res.mIcode} },
+					{ "hetero", res.mAlts.empty() ? "n" : "y" }
+				});
+			}
 		}
 	}
 	
@@ -3431,9 +3508,12 @@ void PDBFileParser::ConstructEntities()
 				{ "gene_src_common_name", cmp.mSource["ORGANISM_COMMON"] },
 				{ "pdbx_gene_src_gene", cmp.mSource["GENE"] },
 				{ "gene_src_strain", cmp.mSource["STRAIN"] },
+				{ "gene_src_tissue", cmp.mSource["TISSUE"] },
+				{ "gene_src_tissue_fraction", cmp.mSource["TISSUE_FRACTION"] },
 				{ "pdbx_gene_src_cell_line", cmp.mSource["CELL_LINE"] },
 				{ "pdbx_gene_src_organelle", cmp.mSource["ORGANELLE"] },
 				{ "pdbx_gene_src_cellular_location", cmp.mSource["CELLULAR_LOCATION"] },
+				{ "host_org_common_name", cmp.mSource["EXPRESSION_SYSTEM_COMMON"] },
 				{ "pdbx_gene_src_scientific_name", cmp.mSource["ORGANISM_SCIENTIFIC"] },
 				{ "pdbx_gene_src_ncbi_taxonomy_id", cmp.mSource["ORGANISM_TAXID"] },
 				{ "pdbx_host_org_scientific_name", cmp.mSource["EXPRESSION_SYSTEM"] },
@@ -4613,9 +4693,21 @@ void PDBFileParser::ParseCrystallographic()
 		{ "Z_PDB", vF(67, 70) }                //	67 - 70       Integer       z              Z value.           
 	});
 	
+	string spageGroup, intTablesNr;
+	try
+	{
+		spageGroup = vS(56, 66);
+		clipper::Spacegroup sg(clipper::Spgr_descr{spageGroup});
+		intTablesNr = to_string(sg.spacegroup_number());
+	}
+	catch (...)
+	{
+	}
+
 	getCategory("symmetry")->emplace({
 		{ "entry_id", mStructureId },
-		{ "space_group_name_H-M", vS(56, 66) }
+		{ "space_group_name_H-M", spageGroup },
+		{ "Int_Tables_number", intTablesNr }
 	});

 	GetNextRecord();
@@ -4901,7 +4993,7 @@ void PDBFileParser::ParseCoordinate(int modelNr)
 			int u23 = vI(64, 70);				//	64 - 70       Integer       u[1][2]        U(2,3)
 			
 			if (vS(7, 11) + vS(77, 80) != check)
-				Error("ANISOU record should follow corresponding ATOM record");
+				throw runtime_error("ANISOU record should follow corresponding ATOM record");
 			
 			auto f = [](float f) -> string { return (boost::format("%6.4f") % f).str(); };
 			
@@ -4912,7 +5004,7 @@ void PDBFileParser::ParseCoordinate(int modelNr)
 				{ "pdbx_label_alt_id", altLoc != ' ' ? string { altLoc } : "." },
 				{ "pdbx_label_comp_id", resName },
 				{ "pdbx_label_asym_id", asymId },
-				{ "pdbx_label_seq_id", seqId },
+				{ "pdbx_label_seq_id", (isResseq and seqId > 0) ? to_string(seqId) : "." },
 				{ "U[1][1]", f(u11 / 10000.f) },
 				{ "U[2][2]", f(u22 / 10000.f) },
 				{ "U[3][3]", f(u33 / 10000.f) },
@@ -5028,12 +5120,9 @@ void PDBFileParser::Parse(istream& is, cif::File& result)
 		}
 		catch (const exception& ex)
 		{
-			cerr << "Error parsing REMARK 3: " << endl
-				 << ex.what() << endl;
+			throw_with_nested(runtime_error("Error parsing REMARK 3"));
 		}
 //		
-//		
-//		
 //		auto cat = getCategory("pdbx_refine_tls_group");
 //		for (Row r: *cat)
 //		{
@@ -5062,7 +5151,10 @@ void PDBFileParser::Parse(istream& is, cif::File& result)
 	}
 	catch (const exception& ex)
 	{
-		Error(ex.what());
+		if (mRec != nullptr)
+			throw_with_nested(runtime_error("Error parsing PDB at line " + to_string(mRec->mLineNr)));
+		else
+			throw;
 	}
 }

@@ -5175,36 +5267,75 @@ void PDBFileParser::PDBChain::AlignResToSeqRes()
 	x = highX;
 	y = highY;
 	
-	while (x >= 0 and y >= 0)
+	try
 	{
-		switch (tb(x, y))
+		while (x >= 0 and y >= 0)
 		{
-			case -1:
-				throw runtime_error("A residue found in the ATOM records (" + ry[y].mMonId + 
-					" @ " + string{mDbref.chainID} + ":" + to_string(ry[y].mSeqNum) +
-					") was not found in the SEQRES records");
-				--y;
-				break;
+			switch (tb(x, y))
+			{
+				case -1:
+					throw runtime_error("A residue found in the ATOM records (" + ry[y].mMonId + 
+						" @ " + string{mDbref.chainID} + ":" + to_string(ry[y].mSeqNum) +
+						((ry[y].mIcode == ' ' or ry[y].mIcode == 0) ? "" : string{ ry[y].mIcode })+
+						") was not found in the SEQRES records");
+					--y;
+					break;
+				
+				case 1:
+					if (VERBOSE > 3)
+						cerr << "Missing residue in ATOM records: " << rx[x].mMonId << " at " << rx[x].mSeqNum << endl;
+	
+					--x;
+					break;
+				
+				case 0:
+					if (VERBOSE > 3 and rx[x].mMonId != ry[y].mMonId)
+						cerr << "Warning, unaligned residues at " << x << "/" << y << "(" << rx[x].mMonId << '/' << ry[y].mMonId << ')' << endl;
+					else if (VERBOSE > 4)
+						cerr << rx[x].mMonId << " -> " << ry[y].mMonId << " (" << ry[y].mSeqNum << ')' << endl;
+	
+					rx[x].mSeqNum = ry[y].mSeqNum;
+					rx[x].mIcode = ry[y].mIcode;
+	
+					--x;
+					--y;
+			}
+		}
+	}
+	catch (const exception& ex)
+	{
+		if (VERBOSE)
+		{
+			std::vector<pair<string,string>> alignment;
 			
-			case 1:
-				if (VERBOSE > 3)
-					cerr << "Missing residue in ATOM records: " << rx[x].mMonId << " at " << rx[x].mSeqNum << endl;
-
-				--x;
-				break;
+			for (x = highX, y = highY; x >= 0 and y >= 0; )
+			{
+				switch (tb(x, y))
+				{
+					case -1:
+						alignment.push_back(make_pair("...", ry[y].mMonId));
+						--y;
+						break;
+					
+					case 1:
+						alignment.push_back(make_pair(rx[x].mMonId, "..."));
+						--x;
+						break;
+					
+					case 0:
+						alignment.push_back(make_pair(rx[x].mMonId, ry[y].mMonId));
+						--x;
+						--y;
+						break;
+				}
+			}
 			
-			case 0:
-				if (VERBOSE > 3 and rx[x].mMonId != ry[y].mMonId)
-					cerr << "Warning, unaligned residues at " << x << "/" << y << "(" << rx[x].mMonId << '/' << ry[y].mMonId << ')' << endl;
-				else if (VERBOSE > 4)
-					cerr << rx[x].mMonId << " -> " << ry[y].mMonId << " (" << ry[y].mSeqNum << ')' << endl;
-
-				rx[x].mSeqNum = ry[y].mSeqNum;
-				rx[x].mIcode = ry[y].mIcode;
-
-				--x;
-				--y;
+			reverse(alignment.begin(), alignment.end());
+			for (auto a: alignment)
+				cerr << "  " << a.first << " -- " << a.second << endl;
 		}
+		
+		throw;
 	}
 	
 	// assign numbers to the residues that don't have them yet
@@ -5248,15 +5379,7 @@ void ReadPDBFile(istream& pdbFile, cif::File& cifFile)

 	cifFile.loadDictionary("mmcif_pdbx");

-	try
-	{
-		p.Parse(pdbFile, cifFile);
-	}
-	catch (const exception& ex)
-	{
-		cerr << "Error parsing PDB file" << endl;
-		throw;
-	}
+	p.Parse(pdbFile, cifFile);
 	
 	cifFile.validate();
 }
--- a/src/PDB2CifRemark3.cpp
+++ b/src/PDB2CifRemark3.cpp
@@ -978,7 +978,10 @@ float Remark3Parser::parse()
 	}

 	if (not remarks.empty() and not iequals(remarks, "NULL"))
-		mDb["refine"].front()["details"] = remarks;
+	{
+		if (not mDb["refine"].empty())
+			mDb["refine"].front()["details"] = remarks;
+	}

 	float score = float(lineCount - dropped) / lineCount;


--- a/src/TlsParser.cpp
+++ b/src/TlsParser.cpp
+/* 
+	Created by: Maarten L. Hekkelman
+	Date: dinsdag 07 november, 2017
+
+	Copyright 2017 NKI AVL
+	
+	Permission is hereby granted, free of charge, to any person obtaining 
+	a copy of this software and associated documentation files (the 
+	"Software"), to deal in the Software without restriction, including 
+	without limitation the rights to use, copy, modify, merge, publish, 
+	distribute, sublicense, and/or sell copies of the Software, and to 
+	permit persons to whom the Software is furnished to do so, subject to 
+	the following conditions:
+	
+	The above copyright notice and this permission notice shall be 
+	included in all copies or substantial portions of the Software.
+	
+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+	EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
+	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
+	IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
+	CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
+	TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
+	SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+#include "cif++/Config.h"
+
+#include <termios.h>
+#include <sys/ioctl.h>
+
+#include <iostream>
+#include <iomanip>
+
+#include <boost/program_options.hpp>
+#include <boost/algorithm/string.hpp>
+#include <boost/filesystem/path.hpp>
+
+#include "cif++/CifUtils.h"
+#include "cif++/Structure.h"
+#include "cif++/TlsParser.h"
+
+using namespace std;
+namespace po = boost::program_options;
+namespace ba = boost::algorithm;
+namespace fs = boost::filesystem;
+namespace c = libcif;
+
+namespace cif
+{
+
+static const char* kRedOn = "\033[37;1;41m";
+static const char* kRedOff = "\033[0m";
+
+const int
+	kResidueNrWildcard = numeric_limits<int>::min(),
+	kNoSeqNum = numeric_limits<int>::max() - 1;
+
+using namespace std;
+
+// --------------------------------------------------------------------
+// We parse selection statements and create a selection expression tree
+// which is then interpreted by setting the selected flag for the
+// residues. After that, the selected ranges are collected and printed.
+
+struct TLSResidue
+{
+	string	chainID;
+	int		seqNr;
+	char	iCode;
+	string	name;
+	bool	selected;
+	
+	string	asymID;
+	int		seqID;
+	
+	bool operator==(const TLSResidue& rhs) const
+	{
+		return chainID == rhs.chainID and
+			seqNr == rhs.seqNr and
+			iCode == rhs.iCode and
+			iequals(name, rhs.name) and
+			selected == rhs.selected;
+	}
+};
+
+void DumpSelection(const vector<TLSResidue>& selected, int indentLevel)
+{
+	string indent(indentLevel * 2, ' ');
+	
+	auto i = selected.begin();
+	bool first = true;
+
+	// First print in PDB space	
+	while (i != selected.end())
+	{
+		auto b = find_if(i, selected.end(), [](auto s) -> bool { return s.selected; });
+		if (b == selected.end())
+			break;
+		
+		if (first)
+			cout << indent << "PDB:" << endl;
+		first = false;
+		
+		auto e = find_if(b, selected.end(), [b](auto s) -> bool { return s.chainID != b->chainID or not s.selected; });
+		
+		cout << indent << " >> " << b->chainID << ' ' << b->seqNr << ':' << (e - 1)->seqNr << endl;
+		i = e;
+	}
+	
+	// Then in mmCIF space
+	
+	if (not first)
+		cout << indent << "mmCIF:" << endl;
+	
+	i = selected.begin();
+	while (i != selected.end())
+	{
+		auto b = find_if(i, selected.end(), [](auto s) -> bool { return s.selected; });
+		if (b == selected.end())
+			break;
+		
+		auto e = find_if(b, selected.end(), [b](auto s) -> bool { return s.asymID != b->asymID or not s.selected; });
+	
+		string asymID = b->asymID;
+		int from = b->seqID, to = from;
+
+		for (auto j = b + 1; j != e; ++j)
+		{
+			if (j->seqID == to + 1)
+				to = j->seqID;
+			else if (j->seqID != to)		// probably an insertion code
+			{
+				if (from == kNoSeqNum or to == kNoSeqNum)
+					cout << indent << " >> " << asymID << endl;
+				else
+					cout << indent << " >> " << asymID << ' ' << from << ':' << to << endl;
+				asymID = b->asymID;
+				from = to = b->seqID;
+			}
+		}
+
+		if (from == kNoSeqNum or to == kNoSeqNum)
+			cout << indent << " >> " << asymID << endl;
+		else
+			cout << indent << " >> " << asymID << ' ' << from << ':' << to << endl;
+
+		i = e;
+	}
+	
+	if (first)
+	{
+		if (isatty(STDOUT_FILENO))
+			cout << indent << kRedOn << "Empty selection" << kRedOff << endl;
+		else
+			cout << indent << kRedOn << "Empty selection" << kRedOff << endl;
+	}
+}
+
+vector<tuple<string,int,int>> TLSSelection::GetRanges(Datablock& db, bool pdbNamespace) const
+{
+	vector<TLSResidue> selected;
+
+	// Collect the residues from poly seq scheme...		
+	for (auto r: db["pdbx_poly_seq_scheme"])
+	{
+		string chain, seqNr, iCode, name;
+
+		string asymID;
+		int seqID;
+		
+		if (pdbNamespace)
+			cif::tie(chain, seqNr, iCode, name, asymID, seqID) = r.get("pdb_strand_id", "pdb_seq_num", "pdb_ins_code", "pdb_comp_id", "asym_id", "seq_id");
+		else
+		{
+			cif::tie(chain, seqNr, name) = r.get("asym_id", "seq_id", "mon_id");
+			asymID = chain;
+			seqID = stoi(seqNr);
+		}
+		
+		if (seqNr.empty())
+			continue;
+
+		if (iCode.length() > 1)
+			throw runtime_error("invalid iCode");
+		
+		selected.push_back({chain, stoi(seqNr), iCode[0], name, false, asymID, seqID});
+	}
+
+	// ... those from the nonpoly scheme
+	for (auto r: db["pdbx_nonpoly_scheme"])
+	{
+		string chain, seqNr, iCode, name, asymID;
+
+		if (pdbNamespace)
+		{
+			cif::tie(chain, seqNr, iCode, name, asymID) = r.get("pdb_strand_id", "pdb_seq_num", "pdb_ins_code", "pdb_mon_id", "asym_id");
+			if (seqNr.empty())
+				continue;
+		}
+		else
+		{
+			cif::tie(chain, name) = r.get("asym_id", "mon_id");
+			asymID = chain;
+			seqNr = "0";
+		}
+		
+		if (iequals(name, "HOH") or iequals(name, "H2O"))
+			continue;
+		
+		if (iCode.length() > 1)
+			throw runtime_error("invalid iCode");
+		
+		selected.push_back({chain, stoi(seqNr), iCode[0], name, false, asymID, kNoSeqNum});
+	}
+	
+	// selected might consist of multiple ranges
+	// output per chain
+
+	stable_sort(selected.begin(), selected.end(), [](auto& a, auto& b) -> bool
+		{
+			int d = a.chainID.compare(b.chainID);
+			if (d == 0)
+				d = a.seqNr - b.seqNr;
+			return d < 0;
+		});
+	
+	CollectResidues(db, selected);
+	
+	vector<tuple<string,int,int>> result;
+
+	auto i = selected.begin();
+	
+	while (i != selected.end())
+	{
+		auto b = find_if(i, selected.end(), [](auto s) -> bool { return s.selected; });
+		if (b == selected.end())
+			break;
+		
+		auto e = find_if(b, selected.end(), [b](auto s) -> bool { return s.asymID != b->asymID or not s.selected; });
+
+		// return ranges with strict increasing sequence numbers.
+		// So when there's a gap in the sequence we split the range.
+		// Beware of iCodes though
+		result.push_back(make_tuple(b->asymID, b->seqID, b->seqID));
+		for (auto j = b + 1; j != e; ++j)
+		{
+			if (j->seqID == get<2>(result.back()) + 1)
+				get<2>(result.back()) = j->seqID;
+			else if (j->seqID != get<2>(result.back()))		// probably an insertion code
+				result.push_back(make_tuple(b->asymID, j->seqID, j->seqID));
+		}
+
+		i = e;
+	}
+	
+	return result;
+}
+
+struct TLSSelectionNot : public TLSSelection
+{
+	TLSSelectionNot(TLSSelectionPtr selection)
+		: selection(selection.release()) {}
+	
+	virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
+	{
+		selection->CollectResidues(db, residues, indentLevel + 1);
+		
+		for (auto& r: residues)
+			r.selected = not r.selected;
+
+		if (VERBOSE)
+		{
+			cout << string(indentLevel * 2, ' ') << "NOT" << endl;
+			DumpSelection(residues, indentLevel);
+		}
+	} 
+
+	TLSSelectionPtr selection;
+};
+
+struct TLSSelectionAll : public TLSSelection
+{
+	TLSSelectionAll() {}
+	
+	virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
+	{
+		for (auto& r: residues)
+			r.selected = true;
+
+		if (VERBOSE)
+		{
+			cout << string(indentLevel * 2, ' ') << "ALL" << endl;
+			DumpSelection(residues, indentLevel);
+		}
+	} 
+};
+
+struct TLSSelectionChain : public TLSSelectionAll
+{
+	TLSSelectionChain(const string& chainID)
+		: m_chain(chainID) {}
+	
+	virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
+	{
+		bool allChains = m_chain == "*";
+		
+		for (auto& r: residues)
+			r.selected = allChains or r.chainID == m_chain;
+
+		if (VERBOSE)
+		{
+			cout << string(indentLevel * 2, ' ') << "CHAIN " << m_chain << endl;
+			DumpSelection(residues, indentLevel);
+		}
+	} 
+
+	string	m_chain;
+};
+
+struct TLSSelectionResID : public TLSSelectionAll
+{
+	TLSSelectionResID(int seqNr, char iCode)
+		: m_seq_nr(seqNr), m_icode(iCode) {}
+	
+	virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
+	{
+		for (auto& r: residues)
+			r.selected = r.seqNr == m_seq_nr and r.iCode == m_icode;
+
+		if (VERBOSE)
+		{
+			cout << string(indentLevel * 2, ' ') << "ResID " << m_seq_nr << (m_icode ? string { m_icode} : "") << endl;
+			DumpSelection(residues, indentLevel);
+		}
+	} 
+
+	int		m_seq_nr;
+	char	m_icode;
+};
+
+struct TLSSelectionRangeSeq : public TLSSelectionAll
+{
+	TLSSelectionRangeSeq(int first, int last)
+		: m_first(first), m_last(last) {}
+	
+	virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
+	{
+		for (auto& r: residues)
+		{
+			r.selected = ((r.seqNr >= m_first or m_first == kResidueNrWildcard) and
+						  (r.seqNr <= m_last or m_last == kResidueNrWildcard));
+		}
+
+		if (VERBOSE)
+		{
+			cout << string(indentLevel * 2, ' ') << "Range " << m_first << ':' << m_last << endl;
+			DumpSelection(residues, indentLevel);
+		}
+	} 
+
+	int		m_first, m_last;
+};
+
+struct TLSSelectionRangeID : public TLSSelectionAll
+{
+	TLSSelectionRangeID(int first, int last, char icodeFirst = 0, char icodeLast = 0)
+		: m_first(first), m_last(last), m_icode_first(icodeFirst), m_icode_last(icodeLast) {}
+	
+	virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
+	{
+		// need to do this per chain
+		set<string> chains;
+		for (auto& r: residues)
+			chains.insert(r.chainID);
+		
+		for (string chain: chains)
+		{
+			auto f = find_if(residues.begin(), residues.end(),
+				[=](auto r) -> bool {
+					return r.chainID == chain and r.seqNr == m_first and r.iCode == m_icode_first;
+				});
+			
+			auto l = find_if(residues.begin(), residues.end(),
+				[=](auto r) -> bool {
+					return r.chainID == chain and r.seqNr == m_last and r.iCode == m_icode_last;
+				});
+			
+			if (f != residues.end() and l != residues.end() and f <= l)
+			{
+				++l;
+				
+				for (; f != l; ++f)
+					f->selected = true;
+			}
+		}
+
+		if (VERBOSE)
+		{
+			cout << string(indentLevel * 2, ' ') << "Through " << m_first << ':' << m_last << endl;
+			DumpSelection(residues, indentLevel);
+		}
+	} 
+
+	int		m_first, m_last;
+	char	m_icode_first, m_icode_last;
+};
+
+struct TLSSelectionUnion : public TLSSelection
+{
+	TLSSelectionUnion(TLSSelectionPtr& lhs, TLSSelectionPtr& rhs)
+		: lhs(lhs.release()), rhs(rhs.release()) {}
+	
+	TLSSelectionUnion(TLSSelectionPtr& lhs, TLSSelectionPtr&& rhs)
+		: lhs(lhs.release()), rhs(rhs.release()) {}
+	
+	virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
+	{
+		auto a = residues;
+		for_each(a.begin(), a.end(), [](auto& r) { r.selected = false; });
+		
+		auto b = residues;
+		for_each(b.begin(), b.end(), [](auto& r) { r.selected = false; });
+		
+		lhs->CollectResidues(db, a, indentLevel + 1);
+		rhs->CollectResidues(db, b, indentLevel + 1);
+		
+		for (auto ai = a.begin(), bi = b.begin(), ri = residues.begin(); ri != residues.end(); ++ai, ++bi, ++ri)
+			ri->selected = ai->selected or bi->selected;
+
+		if (VERBOSE)
+		{
+			cout << string(indentLevel * 2, ' ') << "Union" << endl;
+			DumpSelection(residues, indentLevel);
+		}
+	} 
+	
+	TLSSelectionPtr	lhs;
+	TLSSelectionPtr	rhs;
+};
+
+struct TLSSelectionIntersection : public TLSSelection
+{
+	TLSSelectionIntersection(TLSSelectionPtr& lhs, TLSSelectionPtr& rhs)
+		: lhs(lhs.release()), rhs(rhs.release()) {}
+	
+	TLSSelectionIntersection(TLSSelectionPtr& lhs, TLSSelectionPtr&& rhs)
+		: lhs(lhs.release()), rhs(rhs.release()) {}
+	
+	virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
+	{
+		auto a = residues;
+		for_each(a.begin(), a.end(), [](auto& r) { r.selected = false; });
+		
+		auto b = residues;
+		for_each(b.begin(), b.end(), [](auto& r) { r.selected = false; });
+		
+		lhs->CollectResidues(db, a, indentLevel + 1);
+		rhs->CollectResidues(db, b, indentLevel + 1);
+		
+		for (auto ai = a.begin(), bi = b.begin(), ri = residues.begin(); ri != residues.end(); ++ai, ++bi, ++ri)
+			ri->selected = ai->selected and bi->selected;
+
+		if (VERBOSE)
+		{
+			cout << string(indentLevel * 2, ' ') << "Intersection" << endl;
+			DumpSelection(residues, indentLevel);
+		}
+	} 
+	
+	TLSSelectionPtr	lhs;
+	TLSSelectionPtr	rhs;
+};
+
+struct TLSSelectionByName : public TLSSelectionAll
+{
+  public:
+	TLSSelectionByName(const string& resname)
+		: m_name(resname) {}
+
+	virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
+	{
+		for (auto& r: residues)
+			r.selected = r.name == m_name;
+
+		if (VERBOSE)
+		{
+			cout << string(indentLevel * 2, ' ') << "Name " << m_name << endl;
+			DumpSelection(residues, indentLevel);
+		}
+	} 
+
+	string	m_name;
+};
+
+struct TLSSelectionByElement : public TLSSelectionAll
+{
+  public:
+	TLSSelectionByElement(const string& element)
+		: m_element(element) {}
+
+	virtual void CollectResidues(Datablock& db, vector<TLSResidue>& residues, int indentLevel) const
+	{
+		// rationale... We want to select residues only. So we select
+		// residues that have just a single atom of type m_element.
+		// And we assume these have as residue name... m_element.
+		// ... Right?
+		
+		for (auto& r: residues)
+			r.selected = iequals(r.name, m_element);
+
+		if (VERBOSE)
+		{
+			cout << string(indentLevel * 2, ' ') << "Element " << m_element << endl;
+			DumpSelection(residues, indentLevel);
+		}
+	} 
+
+	string	m_element;
+};
+
+// --------------------------------------------------------------------
+
+class TLSSelectionParserImpl
+{
+  public:
+	TLSSelectionParserImpl(const string& selection)
+		: m_selection(selection), m_p(m_selection.begin()), m_end(m_selection.end()) {}
+	
+	virtual TLSSelectionPtr Parse() = 0;
+	
+  protected:
+
+	virtual int GetNextToken() = 0;
+	virtual void Match(int token);
+	virtual string ToString(int token) = 0;
+
+	string				m_selection;
+	string::iterator	m_p, m_end;
+	int					m_lookahead;
+	string				m_token;
+};
+
+
+void TLSSelectionParserImpl::Match(int token)
+{
+	if (m_lookahead == token)
+		m_lookahead = GetNextToken();
+	else
+	{
+		string expected;
+		if (token >= 256)
+			expected = ToString(token);
+		else
+			expected = { char(token) };
+		
+		string found;
+		if (m_lookahead >= 256)
+			found = ToString(m_lookahead) + " (" + m_token + ')';
+		else
+			found = { char(m_lookahead) };
+	
+		throw runtime_error("Expected " + expected + " but found " + found);
+	}
+}
+
+// --------------------------------------------------------------------
+
+class TLSSelectionParserImplPhenix : public TLSSelectionParserImpl
+{
+  public:
+	TLSSelectionParserImplPhenix(const string& selection)
+		: TLSSelectionParserImpl(selection)
+	{
+		m_lookahead = GetNextToken();
+	}
+	
+	virtual TLSSelectionPtr Parse();
+
+  private:
+
+	TLSSelectionPtr ParseAtomSelection();
+	TLSSelectionPtr	ParseTerm();
+	TLSSelectionPtr	ParseFactor();
+
+	enum TOKEN {
+		pt_NONE		= 0,
+		pt_IDENT	= 256,
+		pt_STRING,
+		pt_NUMBER,
+		pt_RESID,
+		pt_EOLN,
+		pt_KW_ALL,
+		pt_KW_CHAIN,
+		pt_KW_RESSEQ,
+		pt_KW_RESID,
+		pt_KW_ICODE,
+		pt_KW_RESNAME,
+		pt_KW_ELEMENT,
+		pt_KW_AND,
+		pt_KW_OR,
+		pt_KW_NOT,
+		pt_KW_PDB,
+		pt_KW_ENTRY,
+		pt_KW_THROUGH
+	};
+
+	virtual int GetNextToken();
+	virtual string ToString(int token);
+	
+	int m_value_i;
+	string m_value_s;
+	char m_icode;
+};
+
+int TLSSelectionParserImplPhenix::GetNextToken()
+{
+	int result = pt_NONE;
+	enum STATE {
+		st_START,
+		st_RESID	= 200,
+		st_NUM		= 300,
+		st_IDENT	= 400,
+		st_QUOTED	= 500,
+		st_DQUOTED	= 550,
+		st_OTHER	= 600
+	};
+	int state = st_START;
+	
+	m_value_i = 0;
+	m_icode = 0;
+	m_value_s.clear();
+	auto s = m_p;
+	
+	auto start = state;
+	m_token.clear();
+	
+	auto restart = [&]()
+	{
+		switch (start)
+		{
+			case st_START:	state = start = st_RESID;	break;
+			case st_RESID:	state = start = st_NUM;		break;
+			case st_NUM:	state = start = st_IDENT;	break;
+			case st_IDENT:	state = start = st_QUOTED;	break;
+			case st_QUOTED:	state = start = st_DQUOTED;	break;
+			case st_DQUOTED:state = start = st_OTHER;	break;
+		}
+		m_token.clear();
+		m_p = s;
+	};
+	
+	auto retract = [&]()
+	{
+		--m_p;
+		m_token.pop_back();
+	};
+	
+	while (result == pt_NONE)
+	{
+		char ch = *m_p++;
+		if (m_p > m_end)
+			ch = 0;
+		else
+			m_token += ch;
+		
+		switch (state)
+		{
+			// start block
+			case st_START:
+				if (ch == 0)
+					result = pt_EOLN;
+				else if (isspace(ch))
+				{
+					m_token.clear();
+					++s;
+				}
+				else
+					restart();
+				break;
+
+			// RESID block
+			case st_RESID:
+				if (ch == '-')
+					state = st_RESID + 1;
+				else if (isdigit(ch))
+				{
+					m_value_i = (ch - '0');
+					state = st_RESID + 2;
+				}
+				else
+					restart();
+				break;
+			
+			case st_RESID + 1:
+				if (isdigit(ch))
+				{
+					m_value_i = -(ch - '0');
+					state = st_RESID + 2;
+				}
+				else
+					restart();
+				break;
+			
+			case st_RESID + 2:
+				if (isdigit(ch))
+					m_value_i = 10 * m_value_i + (m_value_i < 0 ? -1 : 1) * (ch - '0');
+				else if (isalpha(ch))
+				{
+					m_icode = ch;
+					state = st_RESID + 3;
+				}
+				else
+					restart();
+				break;
+			
+			case st_RESID + 3:
+				if (isalnum(ch))
+					restart();
+				else
+				{
+					retract();
+					result = pt_RESID;
+				}
+				break;
+			
+			// NUM block
+			
+			case st_NUM:
+				if (ch == '-')
+					state = st_NUM + 1;
+				else if (isdigit(ch))
+				{
+					m_value_i = ch - '0';
+					state = st_NUM + 2;
+				}
+				else
+					restart();
+				break;
+			
+			case st_NUM + 1:
+				if (isdigit(ch))
+				{
+					m_value_i = -(ch - '0');
+					state = st_NUM + 2;
+				}
+				else
+					restart();
+				break;
+			
+			case st_NUM + 2:
+				if (isdigit(ch))
+					m_value_i = 10 * m_value_i + (m_value_i < 0 ? -1 : 1) * (ch - '0');
+				else if (not isalpha(ch))
+				{
+					result = pt_NUMBER;
+					retract();
+				}
+				else
+					restart();
+				break;
+			
+			// IDENT block
+			
+			case st_IDENT:
+				if (isalnum(ch))
+				{
+					m_value_s = { ch };
+					state = st_IDENT + 1;
+				}
+				else
+					restart();
+				break;
+			
+			case st_IDENT + 1:
+				if (isalnum(ch) or ch == '\'')
+					m_value_s += ch;
+				else
+				{
+					--m_p;
+					result = pt_IDENT;
+				}
+				break;
+			
+			// QUOTED block
+			
+			case st_QUOTED:
+				if (ch == '\'')
+				{
+					m_value_s.clear();
+					state = st_QUOTED + 1;
+				}
+				else
+					restart();
+				break;
+			
+			case st_QUOTED + 1:
+				if (ch == '\'')
+					result = pt_STRING;
+				else if (ch == 0)
+					throw runtime_error("Unexpected end of selection, missing quote character?");
+				else
+					m_value_s += ch;
+				break;
+			
+			// QUOTED block
+			
+			case st_DQUOTED:
+				if (ch == '\"')
+				{
+					m_value_s.clear();
+					state = st_DQUOTED + 1;
+				}
+				else
+					restart();
+				break;
+			
+			case st_DQUOTED + 1:
+				if (ch == '\"')
+					result = pt_STRING;
+				else if (ch == 0)
+					throw runtime_error("Unexpected end of selection, missing quote character?");
+				else
+					m_value_s += ch;
+				break;
+			
+			// OTHER block
+			case st_OTHER:
+				result = ch;
+				break;
+		}
+	}
+	
+	if (result == pt_IDENT)
+	{
+		if (iequals(m_value_s, "CHAIN"))
+			result = pt_KW_CHAIN;
+		else if (iequals(m_value_s, "ALL"))
+			result = pt_KW_ALL;
+		else if (iequals(m_value_s, "AND"))
+			result = pt_KW_AND;
+		else if (iequals(m_value_s, "OR"))
+			result = pt_KW_OR;
+		else if (iequals(m_value_s, "NOT"))
+			result = pt_KW_NOT;
+		else if (iequals(m_value_s, "RESSEQ"))
+			result = pt_KW_RESSEQ;
+		else if (iequals(m_value_s, "RESID") or iequals(m_value_s, "RESI"))
+			result = pt_KW_RESID;
+		else if (iequals(m_value_s, "RESNAME"))
+			result = pt_KW_RESNAME;
+		else if (iequals(m_value_s, "ELEMENT"))
+			result = pt_KW_ELEMENT;
+		else if (iequals(m_value_s, "PDB"))
+			result = pt_KW_PDB;
+		else if (iequals(m_value_s, "ENTRY"))
+			result = pt_KW_ENTRY;
+		else if (iequals(m_value_s, "THROUGH"))
+			result = pt_KW_THROUGH;
+	}
+		
+	return result;
+}
+
+string TLSSelectionParserImplPhenix::ToString(int token)
+{
+	switch (token)
+	{
+		case pt_IDENT:		return "identifier";
+		case pt_STRING:		return "string";
+		case pt_NUMBER:		return "number";
+		case pt_RESID:		return "resid";
+		case pt_EOLN:		return "end of line";
+
+		case pt_KW_ALL:		return "ALL";
+		case pt_KW_CHAIN:	return "CHAIN";
+		case pt_KW_RESSEQ:	return "RESSEQ";
+		case pt_KW_RESID:	return "RESID";
+		case pt_KW_RESNAME:	return "RESNAME";
+		case pt_KW_ELEMENT:	return "ELEMENT";
+		case pt_KW_AND:		return "AND";
+		case pt_KW_OR:		return "OR";
+		case pt_KW_NOT:		return "NOT";
+		case pt_KW_PDB:		return "PDB";
+		case pt_KW_ENTRY:	return "ENTRY";
+		case pt_KW_THROUGH:	return "THROUGH";
+
+		default:			return "character";
+	}
+}
+
+TLSSelectionPtr TLSSelectionParserImplPhenix::Parse()
+{
+	if (m_lookahead == pt_KW_PDB)
+	{
+		Match(pt_KW_PDB);
+//		Match(pt_KW_ENTRY);
+		
+		throw runtime_error("Unimplemented PDB ENTRY specification");
+	}
+
+	TLSSelectionPtr result = ParseAtomSelection();
+
+	bool extraParenthesis = false;
+
+	if (m_lookahead == ')')
+	{
+		extraParenthesis = true;
+		m_lookahead = GetNextToken();
+	}
+
+	Match(pt_EOLN);
+	
+	if (extraParenthesis)
+		cerr << "WARNING: too many closing parenthesis in TLS selection statement" << endl;
+	
+	return result;
+}
+
+TLSSelectionPtr TLSSelectionParserImplPhenix::ParseAtomSelection()
+{
+	TLSSelectionPtr result = ParseTerm();
+	
+	while (m_lookahead == pt_KW_OR)
+	{
+		Match(pt_KW_OR);
+		result.reset(new TLSSelectionUnion(result, ParseTerm()));
+	}
+	
+	return result;
+}
+
+TLSSelectionPtr TLSSelectionParserImplPhenix::ParseTerm()
+{
+	TLSSelectionPtr result = ParseFactor();
+	
+	while (m_lookahead == pt_KW_AND)
+	{
+		Match(pt_KW_AND);
+		result.reset(new TLSSelectionIntersection(result, ParseFactor()));
+	}
+	
+	return result;
+}
+
+TLSSelectionPtr TLSSelectionParserImplPhenix::ParseFactor()
+{
+	TLSSelectionPtr result;
+
+	switch (m_lookahead)
+	{
+		case '(':
+			Match('(');
+			result = ParseAtomSelection();
+			if (m_lookahead == pt_EOLN)
+				cerr << "WARNING: missing closing parenthesis in TLS selection statement" << endl;
+			else
+				Match(')');
+			break;
+		
+		case pt_KW_NOT:
+			Match(pt_KW_NOT);
+			result.reset(new TLSSelectionNot(ParseAtomSelection()));
+			break;
+		
+		case pt_KW_CHAIN:
+		{
+			Match(pt_KW_CHAIN);
+			
+			string chainID = m_value_s;
+			if (m_lookahead == pt_NUMBER)	// sigh
+			{
+				chainID = to_string(m_value_i);
+				Match(pt_NUMBER);
+			}
+			else
+				Match(m_lookahead == pt_STRING ? pt_STRING : pt_IDENT);
+			
+			result.reset(new TLSSelectionChain(chainID));
+			break;
+		}
+		
+		case pt_KW_RESNAME:
+		{
+			Match(pt_KW_RESNAME);
+			string name = m_value_s;
+			Match(pt_IDENT);
+			result.reset(new TLSSelectionByName(name));
+			break;
+		}
+		
+		case pt_KW_ELEMENT:
+		{
+			Match(pt_KW_ELEMENT);
+			string element = m_value_s;
+			Match(pt_IDENT);
+			result.reset(new TLSSelectionByElement(element));
+			break;
+		}
+		
+		case pt_KW_RESSEQ:
+		{
+			Match(pt_KW_RESSEQ);
+			
+			int from = m_value_i;
+			Match(pt_NUMBER);
+			
+			int to = from;
+			if (m_lookahead == ':')
+			{
+				Match(':');
+				to = m_value_i;
+				Match(pt_NUMBER);
+			}
+			
+			result.reset(new TLSSelectionRangeSeq(from, to));
+			break;
+		}
+		
+		case pt_KW_RESID:
+		{
+			Match(pt_KW_RESID);
+			
+			int from, to;
+			char icode_from = 0, icode_to = 0;
+			bool through = false;
+			
+			from = to = m_value_i;
+
+			if (m_lookahead == pt_NUMBER)
+				Match(pt_NUMBER);
+			else
+			{
+				icode_from = m_icode;
+				Match(pt_RESID);
+			}
+
+			if (m_lookahead == ':' or m_lookahead == pt_KW_THROUGH or m_lookahead == '-')
+			{
+				through = m_lookahead == pt_KW_THROUGH;
+
+				Match(m_lookahead);
+
+				to = m_value_i;
+				if (m_lookahead == pt_NUMBER)
+					Match(pt_NUMBER);
+				else
+				{
+					icode_to = m_icode;
+					Match(pt_RESID);
+				}
+
+				if (through)
+					result.reset(new TLSSelectionRangeID(from, to, icode_from, icode_to));
+				else
+				{
+					if (VERBOSE and (icode_from or icode_to))
+						cerr << "Warning, ignoring insertion codes" << endl;
+					
+					result.reset(new TLSSelectionRangeSeq(from, to));
+				}
+			}
+			else
+				result.reset(new TLSSelectionResID(from, icode_from));
+			
+			break;
+		}
+		
+		case pt_KW_ALL:
+			Match(pt_KW_ALL);
+			result.reset(new TLSSelectionAll());
+			break;
+		
+		default:
+			throw runtime_error("Unexpected token " + ToString(m_lookahead) + " (" + m_token + ')');
+	}
+	
+	return result;
+}
+
+// --------------------------------------------------------------------
+
+class TLSSelectionParserImplBuster : public TLSSelectionParserImpl
+{
+  public:
+	TLSSelectionParserImplBuster(const string& selection);
+	
+	virtual TLSSelectionPtr Parse();
+	
+  protected:
+	
+	enum TOKEN {
+		bt_NONE	= 0,
+		bt_IDENT	= 256,
+		bt_NUMBER,
+		bt_EOLN,
+	};
+	
+	virtual int GetNextToken();
+	virtual string ToString(int token);
+		
+	TLSSelectionPtr ParseGroup();
+	tuple<string,int> ParseAtom();
+
+	TLSSelectionPtr ParseOldGroup();
+	
+	int m_value_i;
+	string m_value_s;
+	bool m_parsing_old_style = false;
+};
+
+TLSSelectionParserImplBuster::TLSSelectionParserImplBuster(const string& selection)
+	: TLSSelectionParserImpl(selection)
+{
+	m_lookahead = GetNextToken();
+}
+
+int TLSSelectionParserImplBuster::GetNextToken()
+{
+	int result = bt_NONE;
+	enum STATE { st_START, st_NEGATE, st_NUM, st_IDENT } state = st_START;
+	
+	m_value_i = 0;
+	m_value_s.clear();
+	bool negative = false;
+	
+	while (result == bt_NONE)
+	{
+		char ch = *m_p++;
+		if (m_p > m_end)
+			ch = 0;
+		
+		switch (state)
+		{
+			case st_START:
+				if (ch == 0)
+					result = bt_EOLN;
+				else if (isspace(ch))
+					continue;
+				else if (isdigit(ch))
+				{
+					m_value_i = ch - '0';
+					state = st_NUM;
+				}
+				else if (isalpha(ch))
+				{
+					m_value_s = { ch };
+					state = st_IDENT;
+				}
+				else if (ch == '-')
+				{
+					state = st_NEGATE;
+				}
+				else
+					result = ch;
+				break;
+			
+			case st_NEGATE:
+				if (isdigit(ch))
+				{
+					m_value_i = ch - '0';
+					state = st_NUM;
+					negative = true;
+				}
+				else
+				{
+					--m_p;
+					result = '-';
+				}
+				break;
+			
+			case st_NUM:
+				if (isdigit(ch))
+					m_value_i = 10 * m_value_i + (ch - '0');
+				else
+				{
+					if (negative)
+						m_value_i = -m_value_i;
+
+					result = bt_NUMBER;
+					--m_p;
+				}
+				break;
+			
+			case st_IDENT:
+				if (isalnum(ch))
+					m_value_s += ch;
+				else
+				{
+					--m_p;
+					result = bt_IDENT;
+				}
+				break;
+		}
+	}
+	
+	return result;
+}
+
+string TLSSelectionParserImplBuster::ToString(int token)
+{
+	switch (token)
+	{
+		case bt_IDENT:	return "identifier (" + m_value_s + ')';
+		case bt_NUMBER:	return "number (" + to_string(m_value_i) + ')';
+		case bt_EOLN:	return "end of line";
+
+		default:
+			assert(false);
+			return "unknown token";
+	}
+}
+
+TLSSelectionPtr TLSSelectionParserImplBuster::ParseGroup()
+{
+	TLSSelectionPtr result;
+	
+	auto add = [&result](const string& chainID, int from, int to)
+	{
+		TLSSelectionPtr sc(new TLSSelectionChain(chainID));
+		TLSSelectionPtr sr(new TLSSelectionRangeSeq(from, to));
+		TLSSelectionPtr s(new TLSSelectionIntersection(sc, sr));
+		
+		if (result == nullptr)
+			result.reset(s.release());
+		else
+			result.reset(new TLSSelectionUnion{result, s });
+	};
+	
+	Match('{');
+
+	do
+	{
+		string chain1;
+		int seqNr1;
+		std::tie(chain1, seqNr1) = ParseAtom();
+		
+		if (m_lookahead == '-')
+		{
+			string chain2;
+			int seqNr2 = seqNr1;
+
+			Match('-');
+			
+			if (m_lookahead == bt_NUMBER)
+			{
+				seqNr2 = m_value_i;
+				Match(bt_NUMBER);
+			}
+			else
+			{
+				std::tie(chain2, seqNr2) = ParseAtom();
+				if (chain1 != chain2)
+				{
+					cerr << "Warning, ranges over multiple chains detected" << endl;
+					
+					TLSSelectionPtr sc1(new TLSSelectionChain(chain1));
+					TLSSelectionPtr sr1(new TLSSelectionRangeSeq(seqNr1, kResidueNrWildcard));
+					TLSSelectionPtr s1(new TLSSelectionIntersection(sc1, sr1));
+
+					TLSSelectionPtr sc2(new TLSSelectionChain(chain2));
+					TLSSelectionPtr sr2(new TLSSelectionRangeSeq(kResidueNrWildcard, seqNr2));
+					TLSSelectionPtr s2(new TLSSelectionIntersection(sc2, sr2));
+					
+					TLSSelectionPtr s(new TLSSelectionUnion(s1, s2));
+					
+					if (result == nullptr)
+						result.reset(s.release());
+					else
+						result.reset(new TLSSelectionUnion{result, s });
+					
+					chain1.clear();
+				}
+			}
+
+			if (not chain1.empty())
+				add(chain1, seqNr1, seqNr2);
+		}
+		else
+			add(chain1, seqNr1, seqNr1);
+	}
+	while (m_lookahead != '}');
+	
+	Match('}');
+	
+	return result;
+}
+
+tuple<string,int> TLSSelectionParserImplBuster::ParseAtom()
+{
+	string chain = m_value_s;
+	int seqNr = kResidueNrWildcard;
+	
+	if (m_lookahead == '*')
+		Match('*');
+	else
+		Match(bt_IDENT);
+	
+	Match('|');
+	
+	if (m_lookahead == '*')
+		Match('*');
+	else
+	{
+		seqNr = m_value_i;
+		Match(bt_NUMBER);
+		
+		if (m_lookahead == ':')
+		{
+			Match(':');
+			string atom = m_value_s;
+			
+			if (VERBOSE)
+				cerr << "Warning: ignoring atom ID '" << atom << "' in TLS selection" << endl;
+			
+			Match(bt_IDENT);
+		}
+	}
+	
+	return make_tuple(chain, seqNr);
+}
+
+TLSSelectionPtr TLSSelectionParserImplBuster::Parse()
+{
+	TLSSelectionPtr result = ParseGroup();
+	Match(bt_EOLN);
+	return result;
+}
+
+// --------------------------------------------------------------------
+
+class TLSSelectionParserImplBusterOld : public TLSSelectionParserImpl
+{
+  public:
+	TLSSelectionParserImplBusterOld(const string& selection)
+		: TLSSelectionParserImpl(selection)
+	{
+		m_lookahead = GetNextToken();
+	}
+	
+	virtual TLSSelectionPtr Parse();
+
+  private:
+
+	TLSSelectionPtr ParseAtomSelection();
+	TLSSelectionPtr	ParseTerm();
+	TLSSelectionPtr	ParseFactor();
+	
+	TLSSelectionPtr ParseResid();
+	TLSSelectionPtr ParseChainResid();
+
+	enum TOKEN {
+		pt_NONE		= 0,
+		pt_IDENT	= 256,
+		pt_CHAINRESID,
+		pt_STRING,
+		pt_NUMBER,
+		pt_RANGE,
+		pt_EOLN,
+		
+		pt_KW_ALL,
+		pt_KW_CHAIN,
+		pt_KW_RESSEQ,
+		pt_KW_RESID,
+		pt_KW_RESNAME,
+		pt_KW_ELEMENT,
+		pt_KW_AND,
+		pt_KW_OR,
+		pt_KW_NOT,
+		pt_KW_PDB,
+		pt_KW_ENTRY,
+		pt_KW_THROUGH
+	};
+
+	virtual int GetNextToken();
+	virtual string ToString(int token);
+	
+	int m_value_i;
+	string m_value_s;
+	int m_value_r[2];
+};
+
+int TLSSelectionParserImplBusterOld::GetNextToken()
+{
+	int result = pt_NONE;
+	enum STATE { st_START, st_NEGATE, st_NUM, st_RANGE, st_IDENT_1, st_IDENT, st_CHAINRESID, st_QUOTED_1, st_QUOTED_2 } state = st_START;
+	
+	m_value_i = 0;
+	m_value_s.clear();
+	
+	bool negative = false;
+	
+	while (result == pt_NONE)
+	{
+		char ch = *m_p++;
+		if (m_p > m_end)
+			ch = 0;
+		
+		switch (state)
+		{
+			case st_START:
+				if (ch == 0)
+					result = pt_EOLN;
+				else if (isspace(ch))
+					continue;
+				else if (isdigit(ch))
+				{
+					m_value_i = ch - '0';
+					state = st_NUM;
+				}
+				else if (isalpha(ch))
+				{
+					m_value_s = { ch };
+					state = st_IDENT_1;
+				}
+				else if (ch == '-')
+				{
+					state = st_NEGATE;
+				}
+				else if (ch == '\'')
+				{
+					state = st_QUOTED_1;
+				}
+				else
+					result = ch;
+				break;
+			
+			case st_NEGATE:
+				if (isdigit(ch))
+				{
+					m_value_i = ch - '0';
+					state = st_NUM;
+					negative = true;
+				}
+				else
+				{
+					--m_p;
+					result = '-';
+				}
+				break;
+			
+			case st_NUM:
+				if (isdigit(ch))
+					m_value_i = 10 * m_value_i + (ch - '0');
+				else if (ch == '-' or ch == ':')
+				{
+					if (negative)
+						m_value_i = -m_value_i;
+
+					m_value_r[0] = m_value_i;
+					m_value_r[1] = 0;
+					state = st_RANGE;
+				}
+				else
+				{
+					if (negative)
+						m_value_i = -m_value_i;
+
+					result = pt_NUMBER;
+					--m_p;
+				}
+				break;
+			
+			case st_RANGE:		// TODO: question, is "-2--1" a valid range? We do not support that, yet
+				if (isdigit(ch))
+					m_value_r[1] = 10 * m_value_r[1] + (ch - '0');
+				else if (m_value_r[1] != 0)
+				{
+					result = pt_RANGE;
+					--m_p;
+				}
+				else
+				{
+					--m_p;
+					--m_p;
+					result = pt_NUMBER;
+				}
+				break;
+			
+			case st_IDENT_1:
+				if (isalpha(ch))
+				{
+					m_value_s += ch;
+					state = st_IDENT;
+				}
+				else if (isdigit(ch))
+				{
+					m_value_i = (ch - '0');
+					state = st_CHAINRESID;
+				}
+				else
+				{
+					--m_p;
+					result = pt_IDENT;
+				}
+				break;
+			
+			case st_CHAINRESID:
+				if (isalpha(ch))
+				{
+					m_value_s += to_string(m_value_i);
+					m_value_s += ch;
+					state = st_IDENT;
+				}
+				else if (isdigit(ch))
+					m_value_i = 10 * m_value_i + (ch - '0');
+				else
+				{
+					--m_p;
+					result = pt_CHAINRESID;
+				}
+				break;
+			
+			case st_IDENT:
+				if (isalnum(ch))
+					m_value_s += ch;
+				else
+				{
+					--m_p;
+					result = pt_IDENT;
+				}
+				break;
+			
+			case st_QUOTED_1:
+				if (ch == '\'')
+				{
+					--m_p;
+					result = '\'';
+				}
+				else
+				{
+					m_value_s = { ch };
+					state = st_QUOTED_2;
+				}
+				break;
+			
+			case st_QUOTED_2:
+				if (ch == '\'')
+					result = pt_STRING;
+				else if (ch == 0)
+					throw runtime_error("Unexpected end of selection, missing quote character?");
+				else
+					m_value_s += ch;
+				break;
+				
+		}
+	}
+	
+	if (result == pt_IDENT)
+	{
+		if (iequals(m_value_s, "CHAIN"))
+			result = pt_KW_CHAIN;
+		else if (iequals(m_value_s, "ALL"))
+			result = pt_KW_ALL;
+		else if (iequals(m_value_s, "AND"))
+			result = pt_KW_AND;
+		else if (iequals(m_value_s, "OR"))
+			result = pt_KW_OR;
+		else if (iequals(m_value_s, "NOT"))
+			result = pt_KW_NOT;
+		else if (iequals(m_value_s, "RESSEQ"))
+			result = pt_KW_RESSEQ;
+		else if (iequals(m_value_s, "RESID") or iequals(m_value_s, "RESI") or iequals(m_value_s, "RESIDUES"))
+			result = pt_KW_RESID;
+		else if (iequals(m_value_s, "RESNAME"))
+			result = pt_KW_RESNAME;
+		else if (iequals(m_value_s, "PDB"))
+			result = pt_KW_PDB;
+		else if (iequals(m_value_s, "ENTRY"))
+			result = pt_KW_ENTRY;
+		else if (iequals(m_value_s, "THROUGH"))
+			result = pt_KW_THROUGH;
+	}
+	
+	return result;
+}
+
+string TLSSelectionParserImplBusterOld::ToString(int token)
+{
+	switch (token)
+	{
+		case pt_IDENT:	return "identifier (" + m_value_s + ')';
+		case pt_STRING:	return "string (" + m_value_s + ')';
+		case pt_NUMBER:	return "number (" + to_string(m_value_i) + ')';
+		case pt_RANGE:	return "range (" + to_string(m_value_r[0]) + ':' + to_string(m_value_r[1]) + ')'; 
+		case pt_EOLN:	return "end of line";
+
+		case pt_KW_ALL:		return "ALL";
+		case pt_KW_CHAIN:	return "CHAIN";
+		case pt_KW_RESSEQ:	return "RESSEQ";
+		case pt_KW_RESID:	return "RESID";
+		case pt_KW_RESNAME:	return "RESNAME";
+		case pt_KW_ELEMENT:	return "ELEMENT";
+		case pt_KW_AND:		return "AND";
+		case pt_KW_OR:		return "OR";
+		case pt_KW_NOT:		return "NOT";
+		case pt_KW_PDB:		return "PDB";
+		case pt_KW_ENTRY:	return "ENTRY";
+		case pt_KW_THROUGH:	return "THROUGH";
+		default:
+			assert(false);
+			return "unknown token";
+	}
+}
+
+TLSSelectionPtr TLSSelectionParserImplBusterOld::Parse()
+{
+	if (m_lookahead == pt_KW_PDB)
+	{
+		Match(pt_KW_PDB);
+//		Match(pt_KW_ENTRY);
+		
+		throw runtime_error("Unimplemented PDB ENTRY specification");
+	}
+
+	TLSSelectionPtr result = ParseAtomSelection();
+
+	Match(pt_EOLN);
+	
+	return result;
+}
+
+TLSSelectionPtr TLSSelectionParserImplBusterOld::ParseAtomSelection()
+{
+	TLSSelectionPtr result = ParseTerm();
+	
+	while (m_lookahead == pt_KW_OR)
+	{
+		Match(pt_KW_OR);
+		result.reset(new TLSSelectionUnion(result, ParseTerm()));
+	}
+	
+	return result;
+}
+
+TLSSelectionPtr TLSSelectionParserImplBusterOld::ParseTerm()
+{
+	TLSSelectionPtr result = ParseFactor();
+	
+	while (m_lookahead == pt_KW_AND)
+	{
+		Match(pt_KW_AND);
+		result.reset(new TLSSelectionIntersection(result, ParseFactor()));
+	}
+	
+	return result;
+}
+
+TLSSelectionPtr TLSSelectionParserImplBusterOld::ParseFactor()
+{
+	TLSSelectionPtr result;
+
+	switch (m_lookahead)
+	{
+		case '(':
+			Match('(');
+			result = ParseAtomSelection();
+			Match(')');
+			break;
+		
+		case pt_KW_NOT:
+			Match(pt_KW_NOT);
+			result.reset(new TLSSelectionNot(ParseAtomSelection()));
+			break;
+		
+		case pt_KW_CHAIN:
+		{
+			Match(pt_KW_CHAIN);
+
+			string chainID = m_value_s;
+			if (m_lookahead == pt_NUMBER)	// sigh
+			{
+				chainID = to_string(m_value_i);
+				Match(pt_NUMBER);
+			}
+			else
+				Match(m_lookahead == pt_STRING ? pt_STRING : pt_IDENT);
+			
+			result.reset(new TLSSelectionChain(chainID));
+			break;
+		}
+		
+		case pt_KW_RESNAME:
+		{
+			Match(pt_KW_RESNAME);
+			string name = m_value_s;
+			Match(pt_IDENT);
+			result.reset(new TLSSelectionByName(name));
+			break;
+		}
+		
+		case pt_KW_RESSEQ:
+			Match(pt_KW_RESSEQ);
+			result = ParseResid();
+			break;
+	
+		case pt_KW_RESID:
+			Match(pt_KW_RESID);
+			result = ParseResid();
+			break;
+		
+		case pt_KW_ALL:
+			Match(pt_KW_ALL);
+			result.reset(new TLSSelectionAll());
+			break;
+		
+		case pt_CHAINRESID:
+			result = ParseChainResid();
+			break;
+		
+		default:
+			throw runtime_error("Unexpected token " + ToString(m_lookahead));
+	}
+	
+	return result;
+}
+
+TLSSelectionPtr TLSSelectionParserImplBusterOld::ParseResid()
+{
+	TLSSelectionPtr result;
+	
+	for (;;)
+	{
+		int from, to;
+				
+		if (m_lookahead == pt_RANGE)
+		{
+			from = m_value_r[0];
+			to = m_value_r[1];
+			Match(pt_RANGE);
+		}
+		else
+		{
+			from = m_value_i;
+			Match(pt_NUMBER);
+			
+			to = from;
+			if (m_lookahead == ':' or m_lookahead == '-' or m_lookahead == pt_KW_THROUGH)
+			{
+				Match(m_lookahead);
+				to = m_value_i;
+				Match(pt_NUMBER);
+			}
+		}
+		
+		TLSSelectionPtr range(new TLSSelectionRangeSeq(from, to));
+		
+		if (result)
+			result.reset(new TLSSelectionUnion(result, range));
+		else
+			result.reset(range.release());
+		
+		if (m_lookahead == ',')
+		{
+			Match(',');
+			continue;
+		}
+		
+		break;
+	}
+	
+	return result;
+}
+
+TLSSelectionPtr TLSSelectionParserImplBusterOld::ParseChainResid()
+{
+	TLSSelectionPtr result;
+	
+	for (;;)
+	{
+		int from, to;
+		
+		from = to = m_value_i;
+		string chainID = m_value_s;
+		
+		Match(pt_CHAINRESID);
+
+		if (m_lookahead == '-')
+		{
+			Match(m_lookahead);
+			to = m_value_i;
+			
+			if (m_value_s != chainID)
+				throw runtime_error("Cannot have two different chainIDs in a range selection");
+			
+			Match(pt_CHAINRESID);
+		}
+		
+		TLSSelectionPtr sc(new TLSSelectionChain(chainID));
+		TLSSelectionPtr sr(new TLSSelectionRangeSeq(from, to));
+		TLSSelectionPtr range(new TLSSelectionIntersection(sc, sr));
+		
+		if (result)
+			result.reset(new TLSSelectionUnion(result, range));
+		else
+			result.reset(range.release());
+		
+		if (m_lookahead == ',')
+		{
+			Match(',');
+			continue;
+		}
+		
+		break;
+	}
+	
+	return result;
+}
+
+// --------------------------------------------------------------------
+
+class TLSSelectionParserBase
+{
+  public:
+	virtual TLSSelectionPtr Parse(const string& selection) const = 0;
+	virtual ~TLSSelectionParserBase() {}
+};
+
+template<typename IMPL>
+class TLSSelectionParser
+{
+  public:
+	virtual TLSSelectionPtr Parse(const string& selection) const
+	{
+		TLSSelectionPtr result;
+		
+		try
+		{
+			IMPL p(selection);
+			result = p.Parse();
+		}
+		catch (const exception& ex)
+		{
+			cerr << "ParseError: " << ex.what() << endl;
+		}
+		
+		return result;
+	}
+};
+
+// --------------------------------------------------------------------
+
+
+TLSSelectionPtr ParseSelectionDetails(const string& program, const string& selection)
+{
+	TLSSelectionParser<TLSSelectionParserImplPhenix> phenix;
+	TLSSelectionParser<TLSSelectionParserImplBuster> buster;
+	TLSSelectionParser<TLSSelectionParserImplBusterOld> busterOld;
+
+	TLSSelectionPtr result;
+
+	if (ba::icontains(program, "buster"))
+	{
+		result = buster.Parse(selection);
+
+		if (not result)
+		{
+			if (VERBOSE)
+				cerr << "Falling back to old BUSTER" << endl;
+			result = busterOld.Parse(selection);
+		}
+		
+		if (not result)
+		{
+			if (VERBOSE)
+				cerr << "Falling back to PHENIX" << endl;
+			result = phenix.Parse(selection);
+		}
+	}
+	else if (ba::icontains(program, "phenix"))
+	{
+		result = phenix.Parse(selection);
+
+		if (not result)
+		{
+			if (VERBOSE)
+				cerr << "Falling back to BUSTER" << endl;
+			result = buster.Parse(selection);
+		}
+
+		if (not result)
+		{
+			if (VERBOSE)
+				cerr << "Falling back to old BUSTER" << endl;
+			result = busterOld.Parse(selection);
+		}
+	}
+	else
+	{
+		if (VERBOSE)
+			cerr << "No known program specified, trying PHENIX" << endl;
+
+		result = phenix.Parse(selection);
+
+		if (not result)
+		{
+			if (VERBOSE)
+				cerr << "Falling back to BUSTER" << endl;
+			result = buster.Parse(selection);
+		}
+
+		if (not result)
+		{
+			if (VERBOSE)
+				cerr << "Falling back to old BUSTER" << endl;
+			result = busterOld.Parse(selection);
+		}
+	}
+	
+	return result;
+}
+
+}