parser just started working again, a bit

24fa80ba · Maarten L. Hekkelman · 3999d792 · 24fa80ba · 24fa80ba · 24fa80ba
Commit 24fa80ba authored Aug 02, 2022 by Maarten L. Hekkelman
9 changed files
--- a/include/cif++/v2/category.hpp
+++ b/include/cif++/v2/category.hpp
@@ -432,22 +432,190 @@ class category_t

 		if (result == m_columns.size())
 		{
-			const ValidateItem *itemValidator = nullptr;
+			const ValidateItem *item_validator = nullptr;

 			// if (mCatValidator != nullptr)
 			// {
-			// 	itemValidator = mCatValidator->getValidatorForItem(column_name);
-			// 	if (itemValidator == nullptr)
-			// 		mValidator->reportError("tag " + std::string(column_name) + " not allowed in Category " + mName, false);
+			// 	item_validator = mCatValidator->getValidatorForItem(column_name);
+			// 	if (item_validator == nullptr)
+			// 		m_validator->reportError("tag " + std::string(column_name) + " not allowed in Category " + mName, false);
 			// }

-			m_columns.emplace_back(column_name, itemValidator);
+			m_columns.emplace_back(column_name, item_validator);
 		}

 		return result;
 	}

  private:
+	void update_value(row *row, size_t column, std::string_view value, bool updateLinked, bool validate = true)
+	{
+		auto &col = m_columns[column];
+
+		const char *oldValue = nullptr;
+		for (auto iv = row->m_head; iv != nullptr; iv = iv->m_next)
+		{
+			assert(iv != iv->m_next and (iv->m_next == nullptr or iv != iv->m_next->m_next));
+
+			if (iv->m_column_ix == column)
+			{
+				oldValue = iv->c_str();
+				break;
+			}
+		}
+
+		if (oldValue != nullptr and value == oldValue) // no need to update
+			return;
+
+		std::string oldStrValue = oldValue ? oldValue : "";
+
+		// // check the value
+		// if (col.m_validator and validate)
+		// 	(*col.m_validator)(value);
+
+		// If the field is part of the Key for this Category, remove it from the index
+		// before updating
+
+		bool reinsert = false;
+
+		// if (updateLinked and // an update of an Item's value
+		// 	cat->mIndex != nullptr and cat->keyFieldsByIndex().count(column))
+		// {
+		// 	reinsert = cat->mIndex->find(mData);
+		// 	if (reinsert)
+		// 		cat->mIndex->erase(mData);
+		// }
+
+		// first remove old value with cix
+
+		if (row->m_head == nullptr)
+			; // nothing to do
+		else if (row->m_head->m_column_ix == column)
+		{
+			auto iv = row->m_head;
+			row->m_head = iv->m_next;
+			iv->m_next = nullptr;
+			delete_item(iv);
+		}
+		else
+		{
+			for (auto iv = row->m_head; iv->m_next != nullptr; iv = iv->m_next)
+			{
+				if (iv->m_next->m_column_ix != column)
+					continue;
+
+				auto nv = iv->m_next;
+				iv->m_next = nv->m_next;
+				nv->m_next = nullptr;
+				delete_item(nv);
+
+				break;
+			}
+		}
+
+		if (not value.empty())
+		{
+			auto nv = create_item(column, value);
+
+			if (row->m_head == nullptr)
+				row->m_head = nv;
+			else
+			{
+				auto iv = row->m_head;
+				while (iv->m_next != nullptr)
+					iv = iv->m_next;
+				iv->m_next = nv;
+			}
+		}
+
+		// if (reinsert)
+		// 	cat->mIndex->insert(mData);
+
+		// // see if we need to update any child categories that depend on this value
+		// auto iv = col.m_validator;
+		// if (not skipUpdateLinked and iv != nullptr and mCascade)
+		// {
+		// 	for (auto &&[childCat, linked] : cat->mChildLinks)
+		// 	{
+		// 		if (find(linked->mParentKeys.begin(), linked->mParentKeys.end(), iv->mTag) == linked->mParentKeys.end())
+		// 			continue;
+
+		// 		Condition cond;
+		// 		std::string childTag;
+
+		// 		for (size_t ix = 0; ix < linked->mParentKeys.size(); ++ix)
+		// 		{
+		// 			std::string pk = linked->mParentKeys[ix];
+		// 			std::string ck = linked->mChildKeys[ix];
+
+		// 			// TODO add code to *NOT* test mandatory fields for Empty
+
+		// 			if (pk == iv->mTag)
+		// 			{
+		// 				childTag = ck;
+		// 				cond = std::move(cond) && Key(ck) == oldStrValue;
+		// 			}
+		// 			else
+		// 			{
+		// 				const char *pk_value = (*this)[pk].c_str();
+		// 				if (*pk_value == 0)
+		// 					cond = std::move(cond) && Key(ck) == Empty();
+		// 				else
+		// 					cond = std::move(cond) && ((Key(ck) == pk_value) or Key(ck) == Empty());
+		// 			}
+		// 		}
+
+		// 		auto rows = childCat->find(std::move(cond));
+		// 		if (rows.empty())
+		// 			continue;
+
+		// 		// if (cif::VERBOSE > 2)
+		// 		// {
+		// 		// 	std::cerr << "Parent: " << linked->mParentCategory << " Child: " << linked->mChildCategory << std::endl
+		// 		// 			  << cond << std::endl;
+		// 		// }
+
+		// 		// Now, suppose there are already rows in child that conform to the new value,
+		// 		// we then skip this renam
+
+		// 		Condition cond_n;
+
+		// 		for (size_t ix = 0; ix < linked->mParentKeys.size(); ++ix)
+		// 		{
+		// 			std::string pk = linked->mParentKeys[ix];
+		// 			std::string ck = linked->mChildKeys[ix];
+
+		// 			// TODO add code to *NOT* test mandatory fields for Empty
+
+		// 			if (pk == iv->mTag)
+		// 				cond_n = std::move(cond_n) && Key(ck) == value;
+		// 			else
+		// 			{
+		// 				const char *pk_value = (*this)[pk].c_str();
+		// 				if (*pk_value == 0)
+		// 					cond_n = std::move(cond_n) && Key(ck) == Empty();
+		// 				else
+		// 					cond_n = std::move(cond_n) && ((Key(ck) == pk_value) or Key(ck) == Empty());
+		// 			}
+		// 		}
+
+		// 		auto rows_n = childCat->find(std::move(cond_n));
+		// 		if (not rows_n.empty())
+		// 		{
+		// 			if (cif::VERBOSE > 0)
+		// 				std::cerr << "Will not rename in child category since there are already rows that link to the parent" << std::endl;
+
+		// 			continue;
+		// 		}
+
+		// 		for (auto &cr : rows)
+		// 			cr.assign(childTag, value, false);
+		// 	}
+		// }		
+	}
+
+
+  private:
 	using char_allocator_type = typename std::allocator_traits<Alloc>::template rebind_alloc<char>;
 	using char_allocator_traits = std::allocator_traits<char_allocator_type>;


--- a/include/cif++/v2/datablock.hpp
+++ b/include/cif++/v2/datablock.hpp
@@ -34,17 +34,22 @@ namespace cif::v2
 // --------------------------------------------------------------------

 template <
-	typename Category = category,
-	typename Alloc = std::allocator<Category>>
-class datablock_t : public std::list<Category, Alloc>
+	typename Alloc = std::allocator<void>,
+	typename Category = category_t<Alloc>>
+class datablock_t
 {
  public:
 	using category_type = Category;
-	using base_type = std::list<category_type, Alloc>;
 	using allocator_type = Alloc;

-	datablock_t(const std::string &name, const allocator_type &alloc = allocator_type())
-		: base_type(alloc)
+	using category_allocator_type = typename std::allocator_traits<Alloc>::template rebind_alloc<category_type>;
+	using category_type_list = std::list<category_type, category_allocator_type>;
+
+	using iterator = category_type_list::iterator;
+	using const_iterator = category_type_list::const_iterator;
+
+	datablock_t(std::string_view name, const allocator_type &alloc = allocator_type())
+		: m_categories(alloc)
 		, m_name(name)
 	{
 	}
@@ -53,19 +58,19 @@ class datablock_t : public std::list<Category, Alloc>

 	datablock_t(datablock_t &&) = default;

-	template <typename Alloc2>
-	datablock_t(const datablock_t &db, const Alloc2 &a)
-		: base_type(db, a)
-		, m_name(db.m_name)
-	{
-	}
+	// template <typename Alloc2>
+	// datablock_t(const datablock_t &db, const Alloc2 &a)
+	// 	: m_categories(db, a)
+	// 	, m_name(db.m_name)
+	// {
+	// }

-	template <typename Alloc2>
-	datablock_t(datablock_t &&db, const Alloc2 &a)
-		: base_type(std::move(db), a)
-		, m_name(db.m_name)
-	{
-	}
+	// template <typename Alloc2>
+	// datablock_t(datablock_t &&db, const Alloc2 &a)
+	// 	: base_type(std::move(db), a)
+	// 	, m_name(db.m_name)
+	// {
+	// }

 	datablock_t &operator=(const datablock_t &) = default;
 	datablock_t &operator=(datablock_t &&) = default;
@@ -78,19 +83,57 @@ class datablock_t : public std::list<Category, Alloc>

 	category_type &operator[](std::string_view name)
 	{
-		auto i = std::find_if(this->begin(), this->end(), [name](const category_type &c)
+		auto i = std::find_if(m_categories.begin(), m_categories.end(), [name](const category_type &c)
 			{ return iequals(c.name(), name); });
-		if (i == this->end())
-			i = this->emplace(name);
-		return *i;
+		
+		if (i != m_categories.end())
+			return *i;
+
+		m_categories.emplace_back(name);
+		return m_categories.back();
 	}

 	const category_type &operator[](std::string_view name) const
 	{
 		static const category_type s_empty;
-		auto i = std::find_if(this->begin(), this->end(), [name](const category_type &c)
+		auto i = std::find_if(m_categories.begin(), m_categories.end(), [name](const category_type &c)
 			{ return iequals(c.name(), name); });
-		return i == this->end() ? s_empty : *i;
+		return i == m_categories.end() ? s_empty : *i;
+	}
+
+	std::tuple<iterator, bool> emplace(std::string_view name)
+	{
+		bool is_new = true;
+
+		auto i = m_categories.begin();
+		while (i != m_categories.end())
+		{
+			if (iequals(name, i->name()))
+			{
+				is_new = false;
+
+				if (i != m_categories.begin())
+				{
+					auto n = std::next(i);
+					m_categories.splice(m_categories.begin(), m_categories, i, n);
+				}
+
+				break;
+			}
+
+			++i;
+		}
+
+		if (is_new)
+		{
+			m_categories.emplace(m_categories.begin(), name);
+			// m_categories.emplace(begin(), *this, std::string(name), mValidator);
+
+			// for (auto &cat : mCategories)
+			// 	cat.updateLinks();
+		}
+
+		return std::make_tuple(m_categories.begin(), is_new);		
 	}

 	void write(std::ostream &os) const
@@ -104,7 +147,7 @@ class datablock_t : public std::list<Category, Alloc>
 		// and if it exists, _AND_ we have a Validator, write out the
 		// audit_conform record.

-		for (auto &cat : *this)
+		for (auto &cat : m_categories)
 		{
 			if (cat.name() != "entry")
 				continue;
@@ -122,7 +165,7 @@ class datablock_t : public std::list<Category, Alloc>
 			break;
 		}

-		for (auto &cat : *this)
+		for (auto &cat : m_categories)
 		{
 			if (cat.name() != "entry" and cat.name() != "audit_conform")
 				cat.write(os);
@@ -136,6 +179,7 @@ class datablock_t : public std::list<Category, Alloc>
 	}

  private:
+	category_type_list m_categories;
 	std::string m_name;
 };


--- a/include/cif++/v2/file.hpp
+++ b/include/cif++/v2/file.hpp
@@ -27,6 +27,7 @@
 #pragma once

 #include "datablock.hpp"
+#include "parser.hpp"

 namespace cif::v2
 {
@@ -34,25 +35,121 @@ namespace cif::v2
 // --------------------------------------------------------------------

 template <
-	typename Datablock = datablock,
-	typename Alloc = std::allocator<Datablock>>
-class file_t : public std::list<Datablock, Alloc>
+	typename Alloc = std::allocator<void>,
+	typename Datablock = datablock_t<Alloc>,
+	typename Category = typename Datablock::category_type>
+class file_t
 {
  public:
-	using value_type = Datablock;
-	using base_type = std::list<value_type, Alloc>;
 	using allocator_type = Alloc;

+	using datablock_type = Datablock;
+	using category_type = typename datablock_type::category_type;
+
+	using datablock_allocator_type = typename std::allocator_traits<Alloc>::template rebind_alloc<datablock_type>;
+	using datablock_list = std::list<datablock_type, datablock_allocator_type>;
+
+	using value_type = datablock_list::value_type;
+	using reference = datablock_list::reference;
+	using pointer = datablock_list::pointer;
+
+	using iterator = datablock_list::iterator;
+	using const_iterator = datablock_list::const_iterator;
+
+	using parser_type = parser_t<file_t, datablock_type, category_type>;
+
 	file_t() = default;

+	file_t(const allocator_type &a = allocator_type())
+		: m_datablocks(a)
+	{
+	}
+
 	file_t(std::istream &is, const allocator_type &alloc = allocator_type())
+		: m_datablocks(alloc)
 	{
+		load(is);
 	}

 	file_t(const file_t &) = default;
 	file_t(file_t &&) = default;
 	file_t &operator=(const file_t &) = default;
 	file_t &operator=(file_t &&) = default;
+
+	datablock_type &operator[](std::string_view name)
+	{
+		auto i = std::find_if(m_datablocks.begin(), m_datablocks.end(), [name](const datablock_type &c)
+			{ return iequals(c.name(), name); });
+		
+		if (i != m_datablocks.end())
+			return *i;
+
+		m_datablocks.emplace_back(name);
+		return m_datablocks.back();
+	}
+
+	const datablock_type &operator[](std::string_view name) const
+	{
+		static const datablock_type s_empty;
+		auto i = std::find_if(m_datablocks.begin(), m_datablocks.end(), [name](const datablock_type &c)
+			{ return iequals(c.name(), name); });
+		return i == m_datablocks.end() ? s_empty : *i;
+	}
+
+	std::tuple<iterator, bool> emplace(std::string_view name)
+	{
+		bool is_new = true;
+
+		auto i = m_datablocks.begin();
+		while (i != m_datablocks.end())
+		{
+			if (iequals(name, i->name()))
+			{
+				is_new = false;
+
+				if (i != m_datablocks.begin())
+				{
+					auto n = std::next(i);
+					m_datablocks.splice(m_datablocks.begin(), m_datablocks, i, n);
+				}
+
+				break;
+			}
+
+			++i;
+		}
+
+		if (is_new)
+			m_datablocks.emplace(m_datablocks.begin(), name);
+
+		return std::make_tuple(m_datablocks.begin(), is_new);		
+	}
+
+	bool empty() const { return m_datablocks.empty(); }
+	size_t size() const { return m_datablocks.size(); }
+
+	reference front() { return m_datablocks.front(); }
+	reference back() { return m_datablocks.back(); }
+
+
+
+	void load(std::istream &is)
+	{
+		// auto saved = mValidator;
+		// setValidator(nullptr);
+
+		parser_type p(is, *this);
+		p.parseFile();
+
+		// if (saved != nullptr)
+		// {
+		// 	setValidator(saved);
+		// 	(void)isValid();
+		// }
+	}
+
+  private:
+	datablock_list m_datablocks;
 };

 using file = file_t<>;

--- a/include/cif++/v2/item.hpp
+++ b/include/cif++/v2/item.hpp
@@ -66,7 +66,7 @@ class item
 		auto r = cif::to_chars(m_buffer, m_buffer + sizeof(m_buffer) - 1, value, cif::chars_format::fixed, precision);
 		if (r.ec != std::errc())
 			throw std::runtime_error("Could not format number");
-		
+
 		assert(r.ptr >= m_buffer and r.ptr < m_buffer + sizeof(m_buffer));
 		*r.ptr = 0;
 		m_value = std::string_view(m_buffer, r.ptr - m_buffer);
@@ -94,7 +94,7 @@ class item
 		auto r = std::to_chars(m_buffer, m_buffer + sizeof(m_buffer) - 1, value);
 		if (r.ec != std::errc())
 			throw std::runtime_error("Could not format number");
-		
+
 		assert(r.ptr >= m_buffer and r.ptr < m_buffer + sizeof(m_buffer));
 		*r.ptr = 0;
 		m_value = std::string_view(m_buffer, r.ptr - m_buffer);
@@ -134,44 +134,52 @@ class item
  private:
 	std::string_view m_name;
 	std::string_view m_value;
-	char m_buffer[64];		// TODO: optimize this magic number, might be too large
+	char m_buffer[64]; // TODO: optimize this magic number, might be too large
 };

 // --------------------------------------------------------------------
 // Transient object to access stored data

-template <typename Row>
+template <typename RowHandle>
 struct item_handle
 {
-	using row_type = Row;
+	using row_handle_type = RowHandle;

  public:
 	// conversion helper class
 	template <typename T, typename = void>
 	struct item_value_as;

-	template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
+	template <typename T>
 	item_handle &operator=(const T &value)
 	{
-		this->operator=(std::to_string(value));
+		item v{"", value};
+		m_row_handle.assign(m_column, v.value(), false);
 		return *this;
 	}

-	template <typename T>
-	item_handle &operator=(const std::optional<T> &value)
-	{
-		if (value)
-			this->operator=(*value);
-		else
-			this->operator=("?");
-		return *this;
-	}
+	// template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
+	// item_handle &operator=(const T &value)
+	// {
+	// 	this->operator=(std::to_string(value));
+	// 	return *this;
+	// }

-	item_handle &operator=(const std::string &value)
-	{
-		m_row.assign(m_column, value, false);
-		return *this;
-	}
+	// template <typename T>
+	// item_handle &operator=(const std::optional<T> &value)
+	// {
+	// 	if (value)
+	// 		this->operator=(*value);
+	// 	else
+	// 		this->operator=("?");
+	// 	return *this;
+	// }
+
+	// item_handle &operator=(std::string_view value)
+	// {
+	// 	m_row_handle.assign(m_column, value, false);
+	// 	return *this;
+	// }

 	template <typename... Ts>
 	void os(const Ts &...v)
@@ -227,7 +235,7 @@ struct item_handle

 	// const char *c_str() const
 	// {
-	// 	for (auto iv = m_row.m_head; iv != nullptr; iv = iv->m_next)
+	// 	for (auto iv = m_row_handle.m_head; iv != nullptr; iv = iv->m_next)
 	// 	{
 	// 		if (iv->m_column_ix == m_column)
 	// 			return iv->m_text;
@@ -238,7 +246,7 @@ struct item_handle

 	std::string_view text() const
 	{
-		for (auto iv = m_row.m_head; iv != nullptr; iv = iv->m_next)
+		for (auto iv = m_row_handle.m_row->m_head; iv != nullptr; iv = iv->m_next)
 		{
 			if (iv->m_column_ix == m_column)
 				return iv->text();
@@ -250,15 +258,15 @@ struct item_handle
 	// bool operator!=(const std::string &s) const { return s != c_str(); }
 	// bool operator==(const std::string &s) const { return s == c_str(); }

-	item_handle(uint16_t column, row_type &row)
+	item_handle(uint16_t column, row_handle_type &row)
 		: m_column(column)
-		, m_row(row)
+		, m_row_handle(row)
 	{
 	}

  private:
 	uint16_t m_column;
-	row_type &m_row;
+	row_handle_type &m_row_handle;
 	// bool mConst = false;

 	static constexpr const char *s_empty_result = "";

--- a/include/cif++/v2/iterator.hpp
+++ b/include/cif++/v2/iterator.hpp
@@ -55,6 +55,8 @@ class iterator_impl
 	using pointer = std::conditional_t<N == 0, row_handle_type, value_type *>;
 	using reference = std::conditional_t<N == 0, row_handle_type, value_type &>;

+	iterator_impl() = default;
+
 	iterator_impl(const iterator_impl &rhs) = default;

 	template<typename C2, typename... T2s>
@@ -188,8 +190,8 @@ class iterator_impl
 		return {};
 	}

-	category_type *m_category;
-	row_type *m_current;
+	category_type *m_category = nullptr;
+	row_type *m_current = nullptr;
 	value_type m_value;
 	std::array<size_t, N> m_column_ix;
 };

--- a/include/cif++/v2/parser.hpp
+++ b/include/cif++/v2/parser.hpp
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <map>
+#include <stack>
+
+namespace cif::v2
+{
+
+// --------------------------------------------------------------------
+
+class parse_error : public std::runtime_error
+{
+  public:
+	parse_error(uint32_t line_nr, const std::string &message)
+		: std::runtime_error("parse error at line " + std::to_string(line_nr) + ": " + message)
+	{
+	}
+};
+
+// --------------------------------------------------------------------
+
+class sac_parser
+{
+  public:
+	using DatablockIndex = std::map<std::string, std::size_t>;
+
+	sac_parser(std::istream &is)
+		: mData(is)
+	{
+		m_validate = true;
+		m_line_nr = 1;
+		m_bol = true;
+
+		// if (init)
+			m_lookahead = get_next_token();
+	}
+
+	virtual ~sac_parser() = default;
+
+	enum CharTraitsMask : uint8_t
+	{
+		kOrdinaryMask = 1 << 0,
+		kNonBlankMask = 1 << 1,
+		kTextLeadMask = 1 << 2,
+		kAnyPrintMask = 1 << 3
+	};
+
+	static constexpr bool is_white(int ch)
+	{
+		return std::isspace(ch) or ch == '#';
+	}
+
+	static constexpr bool is_ordinary(int ch)
+	{
+		return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kOrdinaryMask) != 0;
+	}
+
+	static constexpr bool is_non_blank(int ch)
+	{
+		return ch > 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kNonBlankMask) != 0;
+	}
+
+	static constexpr bool is_text_lead(int ch)
+	{
+		return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kTextLeadMask) != 0;
+	}
+
+	static constexpr bool is_any_print(int ch)
+	{
+		return ch == '\t' or
+		       (ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
+	}
+
+	static bool is_unquoted_string(const char *s)
+	{
+		auto ss = s;
+
+		bool result = is_ordinary(*s++);
+		while (result and *s != 0)
+		{
+			result = is_non_blank(*s);
+			++s;
+		}
+
+		// but be careful it does not contain e.g. stop_
+		if (result)
+		{
+			static const std::regex reservedRx(R"((^(?:data|save)|.*(?:loop|stop|global))_.+)", std::regex_constants::icase);
+			result = not std::regex_match(ss, reservedRx);
+		}
+
+		return result;
+	}
+
+  protected:
+	static constexpr uint8_t kCharTraitsTable[128] = {
+		//	0	1	2	3	4	5	6	7	8	9	a	b	c	d	e	f
+		14, 15, 14, 14, 14, 15, 15, 14, 15, 15, 15, 15, 15, 15, 15, 15, //	2
+		15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 10, 15, 15, 15, 15, //	3
+		15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, //	4
+		15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 15, 14, 15, 14, //	5
+		15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, //	6
+		15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0,  //	7
+	};
+
+	enum class CIFToken
+	{
+		Unknown,
+
+		Eof,
+
+		DATA,
+		LOOP,
+		GLOBAL,
+		SAVE,
+		STOP,
+		Tag,
+		Value
+	};
+
+	static constexpr const char *get_token_name(CIFToken token)
+	{
+		switch (token)
+		{
+			case CIFToken::Unknown: return "Unknown";
+			case CIFToken::Eof: return "Eof";
+			case CIFToken::DATA: return "DATA";
+			case CIFToken::LOOP: return "LOOP";
+			case CIFToken::GLOBAL: return "GLOBAL";
+			case CIFToken::SAVE: return "SAVE";
+			case CIFToken::STOP: return "STOP";
+			case CIFToken::Tag: return "Tag";
+			case CIFToken::Value: return "Value";
+		}
+	}
+
+	enum class CIFValue
+	{
+		Int,
+		Float,
+		Numeric,
+		String,
+		TextField,
+		Inapplicable,
+		Unknown
+	};
+
+	static constexpr const char *get_value_name(CIFValue type)
+	{
+		switch (type)
+		{
+			case CIFValue::Int: return "Int";
+			case CIFValue::Float: return "Float";
+			case CIFValue::Numeric: return "Numeric";
+			case CIFValue::String: return "String";
+			case CIFValue::TextField: return "TextField";
+			case CIFValue::Inapplicable: return "Inapplicable";
+			case CIFValue::Unknown: return "Unknown";
+		}
+	}
+
+	// getNextChar takes a char from the buffer, or if it is empty
+	// from the istream. This function also does carriage/linefeed
+	// translation.
+	int getNextChar()
+	{
+		int result;
+
+		if (mBuffer.empty())
+			result = mData.get();
+		else
+		{
+			result = mBuffer.top();
+			mBuffer.pop();
+		}
+
+		// very simple CR/LF translation into LF
+		if (result == '\r')
+		{
+			int lookahead = mData.get();
+			if (lookahead != '\n')
+				mBuffer.push(lookahead);
+			result = '\n';
+		}
+
+		mTokenValue += static_cast<char>(result);
+
+		if (result == '\n')
+			++m_line_nr;
+
+		if (VERBOSE >= 6)
+		{
+			std::cerr << "getNextChar => ";
+			if (iscntrl(result) or not isprint(result))
+				std::cerr << int(result) << std::endl;
+			else
+				std::cerr << char(result) << std::endl;
+		}
+
+		return result;
+	}
+
+	void retract()
+	{
+		assert(not mTokenValue.empty());
+
+		char ch = mTokenValue.back();
+		if (ch == '\n')
+			--m_line_nr;
+
+		mBuffer.push(ch);
+		mTokenValue.pop_back();
+	}
+
+	int restart(int start)
+	{
+		int result = 0;
+
+		while (not mTokenValue.empty())
+			retract();
+
+		switch (start)
+		{
+			case State::Start:
+				result = State::Float;
+				break;
+
+			case State::Float:
+				result = State::Int;
+				break;
+
+			case State::Int:
+				result = State::Value;
+				break;
+
+			default:
+				error("Invalid state in SacParser");
+		}
+
+		m_bol = false;
+
+		return result;
+	}
+
+	CIFToken get_next_token()
+	{
+		const auto kEOF = std::char_traits<char>::eof();
+
+		CIFToken result = CIFToken::Unknown;
+		int quoteChar = 0;
+		int state = State::Start, start = State::Start;
+		m_bol = false;
+
+		mTokenValue.clear();
+		mTokenType = CIFValue::Unknown;
+
+		while (result == CIFToken::Unknown)
+		{
+			auto ch = getNextChar();
+
+			switch (state)
+			{
+				case State::Start:
+					if (ch == kEOF)
+						result = CIFToken::Eof;
+					else if (ch == '\n')
+					{
+						m_bol = true;
+						state = State::White;
+					}
+					else if (ch == ' ' or ch == '\t')
+						state = State::White;
+					else if (ch == '#')
+						state = State::Comment;
+					else if (ch == '_')
+						state = State::Tag;
+					else if (ch == ';' and m_bol)
+						state = State::TextField;
+					else if (ch == '\'' or ch == '"')
+					{
+						quoteChar = ch;
+						state = State::QuotedString;
+					}
+					else
+						state = start = restart(start);
+					break;
+
+				case State::White:
+					if (ch == kEOF)
+						result = CIFToken::Eof;
+					else if (not isspace(ch))
+					{
+						state = State::Start;
+						retract();
+						mTokenValue.clear();
+					}
+					else
+						m_bol = (ch == '\n');
+					break;
+
+				case State::Comment:
+					if (ch == '\n')
+					{
+						state = State::Start;
+						m_bol = true;
+						mTokenValue.clear();
+					}
+					else if (ch == kEOF)
+						result = CIFToken::Eof;
+					else if (not is_any_print(ch))
+						error("invalid character in comment");
+					break;
+
+				case State::TextField:
+					if (ch == '\n')
+						state = State::TextField + 1;
+					else if (ch == kEOF)
+						error("unterminated textfield");
+					else if (not is_any_print(ch))
+						warning("invalid character in text field '" + std::string({static_cast<char>(ch)}) + "' (" + std::to_string((int)ch) + ")");
+					break;
+
+				case State::TextField + 1:
+					if (is_text_lead(ch) or ch == ' ' or ch == '\t')
+						state = State::TextField;
+					else if (ch == ';')
+					{
+						assert(mTokenValue.length() >= 2);
+						mTokenValue = mTokenValue.substr(1, mTokenValue.length() - 3);
+						mTokenType = CIFValue::TextField;
+						result = CIFToken::Value;
+					}
+					else if (ch == kEOF)
+						error("unterminated textfield");
+					else if (ch != '\n')
+						error("invalid character in text field");
+					break;
+
+				case State::QuotedString:
+					if (ch == kEOF)
+						error("unterminated quoted string");
+					else if (ch == quoteChar)
+						state = State::QuotedStringQuote;
+					else if (not is_any_print(ch))
+						warning("invalid character in quoted string: '" + std::string({static_cast<char>(ch)}) + '\'');
+					break;
+
+				case State::QuotedStringQuote:
+					if (is_white(ch))
+					{
+						retract();
+						result = CIFToken::Value;
+						mTokenType = CIFValue::String;
+
+						if (mTokenValue.length() < 2)
+							error("Invalid quoted string token");
+
+						mTokenValue = mTokenValue.substr(1, mTokenValue.length() - 2);
+					}
+					else if (ch == quoteChar)
+						;
+					else if (is_any_print(ch))
+						state = State::QuotedString;
+					else if (ch == kEOF)
+						error("unterminated quoted string");
+					else
+						error("invalid character in quoted string");
+					break;
+
+				case State::Tag:
+					if (not is_non_blank(ch))
+					{
+						retract();
+						result = CIFToken::Tag;
+					}
+					break;
+
+				case State::Float:
+					if (ch == '+' or ch == '-')
+					{
+						state = State::Float + 1;
+					}
+					else if (isdigit(ch))
+						state = State::Float + 1;
+					else
+						state = start = restart(start);
+					break;
+
+				case State::Float + 1:
+					//				if (ch == '(')	// numeric???
+					//					mState = State::NumericSuffix;
+					//				else
+					if (ch == '.')
+						state = State::Float + 2;
+					else if (tolower(ch) == 'e')
+						state = State::Float + 3;
+					else if (is_white(ch) or ch == kEOF)
+					{
+						retract();
+						result = CIFToken::Value;
+						mTokenType = CIFValue::Int;
+					}
+					else
+						state = start = restart(start);
+					break;
+
+				// parsed '.'
+				case State::Float + 2:
+					if (tolower(ch) == 'e')
+						state = State::Float + 3;
+					else if (is_white(ch) or ch == kEOF)
+					{
+						retract();
+						result = CIFToken::Value;
+						mTokenType = CIFValue::Float;
+					}
+					else
+						state = start = restart(start);
+					break;
+
+				// parsed 'e'
+				case State::Float + 3:
+					if (ch == '-' or ch == '+')
+						state = State::Float + 4;
+					else if (isdigit(ch))
+						state = State::Float + 5;
+					else
+						state = start = restart(start);
+					break;
+
+				case State::Float + 4:
+					if (isdigit(ch))
+						state = State::Float + 5;
+					else
+						state = start = restart(start);
+					break;
+
+				case State::Float + 5:
+					if (is_white(ch) or ch == kEOF)
+					{
+						retract();
+						result = CIFToken::Value;
+						mTokenType = CIFValue::Float;
+					}
+					else
+						state = start = restart(start);
+					break;
+
+				case State::Int:
+					if (isdigit(ch) or ch == '+' or ch == '-')
+						state = State::Int + 1;
+					else
+						state = start = restart(start);
+					break;
+
+				case State::Int + 1:
+					if (is_white(ch) or ch == kEOF)
+					{
+						retract();
+						result = CIFToken::Value;
+						mTokenType = CIFValue::Int;
+					}
+					else
+						state = start = restart(start);
+					break;
+
+				case State::Value:
+					if (ch == '_')
+					{
+						std::string s = toLowerCopy(mTokenValue);
+
+						if (s == "global_")
+							result = CIFToken::GLOBAL;
+						else if (s == "stop_")
+							result = CIFToken::STOP;
+						else if (s == "loop_")
+							result = CIFToken::LOOP;
+						else if (s == "data_")
+						{
+							state = State::DATA;
+							continue;
+						}
+						else if (s == "save_")
+						{
+							state = State::SAVE;
+							continue;
+						}
+					}
+
+					if (result == CIFToken::Unknown and not is_non_blank(ch))
+					{
+						retract();
+						result = CIFToken::Value;
+
+						if (mTokenValue == ".")
+							mTokenType = CIFValue::Inapplicable;
+						else if (mTokenValue == "?")
+						{
+							mTokenType = CIFValue::Unknown;
+							mTokenValue.clear();
+						}
+					}
+					break;
+
+				case State::DATA:
+				case State::SAVE:
+					if (not is_non_blank(ch))
+					{
+						retract();
+
+						if (state == State::DATA)
+							result = CIFToken::DATA;
+						else
+							result = CIFToken::SAVE;
+
+						mTokenValue.erase(mTokenValue.begin(), mTokenValue.begin() + 5);
+					}
+					break;
+
+				default:
+					assert(false);
+					error("Invalid state in get_next_token");
+					break;
+			}
+		}
+
+		if (VERBOSE >= 5)
+		{
+			std::cerr << get_token_name(result);
+			if (mTokenType != CIFValue::Unknown)
+				std::cerr << ' ' << get_value_name(mTokenType);
+			if (result != CIFToken::Eof)
+				std::cerr << " " << std::quoted(mTokenValue);
+			std::cerr << std::endl;
+		}
+
+		return result;
+	}
+
+	void match(CIFToken token)
+	{
+		if (m_lookahead != token)
+			error(std::string("Unexpected token, expected ") + get_token_name(token) + " but found " + get_token_name(m_lookahead));
+
+		m_lookahead = get_next_token();
+	}
+
+  public:
+	bool parseSingleDatablock(const std::string &datablock)
+	{
+		// first locate the start, as fast as we can
+		auto &sb = *mData.rdbuf();
+
+		enum
+		{
+			start,
+			comment,
+			string,
+			string_quote,
+			qstring,
+			data
+		} state = start;
+
+		int quote = 0;
+		bool bol = true;
+		std::string dblk = "data_" + datablock;
+		std::string::size_type si = 0;
+		bool found = false;
+
+		for (auto ch = sb.sbumpc(); not found and ch != std::streambuf::traits_type::eof(); ch = sb.sbumpc())
+		{
+			switch (state)
+			{
+				case start:
+					switch (ch)
+					{
+						case '#': state = comment; break;
+						case 'd':
+						case 'D':
+							state = data;
+							si = 1;
+							break;
+						case '\'':
+						case '"':
+							state = string;
+							quote = ch;
+							break;
+						case ';':
+							if (bol)
+								state = qstring;
+							break;
+					}
+					break;
+
+				case comment:
+					if (ch == '\n')
+						state = start;
+					break;
+
+				case string:
+					if (ch == quote)
+						state = string_quote;
+					break;
+
+				case string_quote:
+					if (std::isspace(ch))
+						state = start;
+					else
+						state = string;
+					break;
+
+				case qstring:
+					if (ch == ';' and bol)
+						state = start;
+					break;
+
+				case data:
+					if (isspace(ch) and dblk[si] == 0)
+						found = true;
+					else if (dblk[si++] != ch)
+						state = start;
+					break;
+			}
+
+			bol = (ch == '\n');
+		}
+
+		if (found)
+		{
+			produceDatablock(datablock);
+			m_lookahead = get_next_token();
+			parseDataBlock();
+		}
+
+		return found;
+	}
+
+	DatablockIndex indexDatablocks()
+	{
+		DatablockIndex index;
+
+		// first locate the start, as fast as we can
+		auto &sb = *mData.rdbuf();
+
+		enum
+		{
+			start,
+			comment,
+			string,
+			string_quote,
+			qstring,
+			data,
+			data_name
+		} state = start;
+
+		int quote = 0;
+		bool bol = true;
+		const char dblk[] = "data_";
+		std::string::size_type si = 0;
+		std::string datablock;
+
+		for (auto ch = sb.sbumpc(); ch != std::streambuf::traits_type::eof(); ch = sb.sbumpc())
+		{
+			switch (state)
+			{
+				case start:
+					switch (ch)
+					{
+						case '#': state = comment; break;
+						case 'd':
+						case 'D':
+							state = data;
+							si = 1;
+							break;
+						case '\'':
+						case '"':
+							state = string;
+							quote = ch;
+							break;
+						case ';':
+							if (bol)
+								state = qstring;
+							break;
+					}
+					break;
+
+				case comment:
+					if (ch == '\n')
+						state = start;
+					break;
+
+				case string:
+					if (ch == quote)
+						state = string_quote;
+					break;
+
+				case string_quote:
+					if (std::isspace(ch))
+						state = start;
+					else
+						state = string;
+					break;
+
+				case qstring:
+					if (ch == ';' and bol)
+						state = start;
+					break;
+
+				case data:
+					if (dblk[si] == 0 and is_non_blank(ch))
+					{
+						datablock = {static_cast<char>(ch)};
+						state = data_name;
+					}
+					else if (dblk[si++] != ch)
+						state = start;
+					break;
+
+				case data_name:
+					if (is_non_blank(ch))
+						datablock.insert(datablock.end(), char(ch));
+					else if (isspace(ch))
+					{
+						if (not datablock.empty())
+							index[datablock] = mData.tellg();
+
+						state = start;
+					}
+					else
+						state = start;
+					break;
+			}
+
+			bol = (ch == '\n');
+		}
+
+		return index;
+	}
+
+	bool parseSingleDatablock(const std::string &datablock, const DatablockIndex &index)
+	{
+		bool result = false;
+
+		auto i = index.find(datablock);
+		if (i != index.end())
+		{
+			mData.seekg(i->second);
+
+			produceDatablock(datablock);
+			m_lookahead = get_next_token();
+			parseDataBlock();
+
+			result = true;
+		}
+
+		return result;
+	}
+
+	void parseFile()
+	{
+		while (m_lookahead != CIFToken::Eof)
+		{
+			switch (m_lookahead)
+			{
+				case CIFToken::GLOBAL:
+					parseGlobal();
+					break;
+
+				case CIFToken::DATA:
+					produceDatablock(mTokenValue);
+
+					match(CIFToken::DATA);
+					parseDataBlock();
+					break;
+
+				default:
+					error("This file does not seem to be an mmCIF file");
+					break;
+			}
+		}
+	}
+
+  protected:
+	void parseGlobal()
+	{
+		match(CIFToken::GLOBAL);
+		while (m_lookahead == CIFToken::Tag)
+		{
+			match(CIFToken::Tag);
+			match(CIFToken::Value);
+		}
+	}
+
+	void parseDataBlock()
+	{
+		std::string cat;
+
+		while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag or m_lookahead == CIFToken::SAVE)
+		{
+			switch (m_lookahead)
+			{
+				case CIFToken::LOOP:
+				{
+					cat.clear(); // should start a new category
+
+					match(CIFToken::LOOP);
+
+					std::vector<std::string> tags;
+
+					while (m_lookahead == CIFToken::Tag)
+					{
+						std::string catName, itemName;
+						std::tie(catName, itemName) = splitTagName(mTokenValue);
+
+						if (cat.empty())
+						{
+							produceCategory(catName);
+							cat = catName;
+						}
+						else if (not iequals(cat, catName))
+							error("inconsistent categories in loop_");
+
+						tags.push_back(itemName);
+
+						match(CIFToken::Tag);
+					}
+
+					while (m_lookahead == CIFToken::Value)
+					{
+						produceRow();
+
+						for (auto tag : tags)
+						{
+							produceItem(cat, tag, mTokenValue);
+							match(CIFToken::Value);
+						}
+					}
+
+					cat.clear();
+					break;
+				}
+
+				case CIFToken::Tag:
+				{
+					std::string catName, itemName;
+					std::tie(catName, itemName) = splitTagName(mTokenValue);
+
+					if (not iequals(cat, catName))
+					{
+						produceCategory(catName);
+						cat = catName;
+						produceRow();
+					}
+
+					match(CIFToken::Tag);
+
+					produceItem(cat, itemName, mTokenValue);
+
+					match(CIFToken::Value);
+					break;
+				}
+
+				case CIFToken::SAVE:
+					parseSaveFrame();
+					break;
+
+				default:
+					assert(false);
+					break;
+			}
+		}
+	}
+
+	virtual void parseSaveFrame()
+	{
+		error("A regular CIF file should not contain a save frame");
+	}
+
+	void error(const std::string &msg)
+	{
+		throw parse_error(m_line_nr, msg);
+	}
+
+	void warning(const std::string &msg)
+	{
+		std::cerr << "parser warning at line" << m_line_nr << ": " << msg << std::endl;
+	}
+
+	// production methods, these are pure virtual here
+
+	virtual void produceDatablock(const std::string &name) = 0;
+	virtual void produceCategory(const std::string &name) = 0;
+	virtual void produceRow() = 0;
+	virtual void produceItem(const std::string &category, const std::string &item, const std::string &value) = 0;
+
+  protected:
+	enum State
+	{
+		Start,
+		White,
+		Comment,
+		QuestionMark,
+		Dot,
+		QuotedString,
+		QuotedStringQuote,
+		UnquotedString,
+		Tag,
+		TextField,
+		Float = 100,
+		Int = 110,
+		Value = 300,
+		DATA,
+		SAVE
+	};
+
+	std::istream &mData;
+
+	// Parser state
+	bool m_validate;
+	uint32_t m_line_nr;
+	bool m_bol;
+	CIFToken m_lookahead;
+	std::string mTokenValue;
+	CIFValue mTokenType;
+	std::stack<int> mBuffer;
+};
+
+// --------------------------------------------------------------------
+
+template <
+	typename File,
+	typename Datablock,
+	typename Category>
+class parser_t : public sac_parser
+{
+  public:
+	using file_type = File;
+	using datablock_type = Datablock;
+	using category_type = Category;
+	using row_handle_type = category_type::reference;
+
+	parser_t(std::istream &is, file_type &file)
+		: sac_parser(is)
+		, m_file(file)
+	{
+	}
+
+	void produceDatablock(const std::string &name) override
+	{
+		std::tie(m_datablock, std::ignore) = m_file.emplace(name);
+	}
+
+	void produceCategory(const std::string &name) override
+	{
+		if (VERBOSE >= 4)
+			std::cerr << "producing category " << name << std::endl;
+
+		std::tie(m_category, std::ignore) = m_datablock->emplace(name);
+	}
+
+	void produceRow() override
+	{
+		if (VERBOSE >= 4)
+			std::cerr << "producing row for category " << m_category->name() << std::endl;
+
+		m_category->emplace({});
+		m_row = m_category->back();
+		// m_row.lineNr(m_line_nr);
+	}
+
+	void produceItem(const std::string &category, const std::string &item, const std::string &value) override
+	{
+		if (VERBOSE >= 4)
+			std::cerr << "producing _" << category << '.' << item << " -> " << value << std::endl;
+
+		if (not iequals(category, m_category->name()))
+			error("inconsistent categories in loop_");
+
+		m_row[item] = mTokenValue;
+	}
+
+  protected:
+	file_type &m_file;
+	file_type::iterator m_datablock;
+	datablock_type::iterator m_category;
+	row_handle_type m_row;
+};
+
+// class Parser : public SacParser
+// {
+//   public:
+// 	Parser(std::istream &is, File &f, bool init = true);
+
+// 	virtual void produceDatablock(const std::string &name);
+// 	virtual void produceCategory(const std::string &name);
+// 	virtual void produceRow();
+// 	virtual void produceItem(const std::string &category, const std::string &item, const std::string &value);
+
+//   protected:
+// 	File &mFile;
+// 	Datablock *mDataBlock;
+// 	Datablock::iterator m_category;
+// 	Row mRow;
+// };
+
+// // --------------------------------------------------------------------
+
+// class DictParser : public Parser
+// {
+//   public:
+// 	DictParser(Validator &validator, std::istream &is);
+// 	~DictParser();
+
+// 	void loadDictionary();
+
+//   private:
+// 	virtual void parseSaveFrame();
+
+// 	bool collectItemTypes();
+// 	void linkItems();
+
+// 	Validator &mValidator;
+// 	File mFile;
+// 	struct DictParserDataImpl *mImpl;
+// 	bool mCollectedItemTypes = false;
+// };
+
+} // namespace cif::v2
--- a/include/cif++/v2/row.hpp
+++ b/include/cif++/v2/row.hpp
@@ -120,9 +120,14 @@ class row_handle
 	using category_type = Category;
 	using row_type = std::conditional_t<std::is_const_v<category_type>, const typename category_type::row, typename category_type::row>;

+	using item_handle_type = item_handle<row_handle>;
+
 	template <typename>
 	friend class row_handle;

+	template <typename>
+	friend class item_handle;
+
 	row_handle() = default;

 	row_handle(const row_handle &) = default;
@@ -149,24 +154,24 @@ class row_handle
 		return m_cat != nullptr and m_row != nullptr;
 	}

-	item_handle<row_type> operator[](uint32_t column_ix)
+	item_handle_type operator[](uint32_t column_ix)
 	{
-		return item_handle<row_type>(column_ix, *m_row);
+		return item_handle_type(column_ix, *this);
 	}

-	const item_handle<const row_type> operator[](uint32_t column_ix) const
+	const item_handle_type operator[](uint32_t column_ix) const
 	{
-		return item_handle<const row_type>(column_ix, *m_row);
+		return item_handle_type(column_ix, const_cast<row_handle &>(*this));
 	}

-	item_handle<row_type> operator[](std::string_view column_name)
+	item_handle_type operator[](std::string_view column_name)
 	{
-		return item_handle<row_type>(get_column_ix(column_name), *m_row);
+		return item_handle_type(add_column(column_name), *this);
 	}

-	const item_handle<const row_type> operator[](std::string_view column_name) const
+	const item_handle_type operator[](std::string_view column_name) const
 	{
-		return item_handle<const row_type>(get_column_ix(column_name), *m_row);
+		return item_handle_type(get_column_ix(column_name), *this);
 	}

 	template <typename... Ts, size_t N>
@@ -186,12 +191,85 @@ class row_handle
 		return detail::get_row_result<category_type, C...>(*this, {get_column_ix(columns)...});
 	}

+	void assign(const std::vector<item> &values)
+	{
+		// std::map<std::string, std::tuple<size_t, std::string, std::string>> changed;
+
+		for (auto &value : values)
+		{
+			assign(value, true);
+
+			// auto columnIx = cat->add_column(value.name());
+			// auto &col = cat->m_columns[columnIx];
+			// std::string tag = col.mValidator ? col.mValidator->mTag : std::to_string(columnIx);
+
+			// changed[tag] = std::make_tuple(columnIx, operator[](columnIx).c_str(), value.value());
+
+			// assign(columnIx, value.value(), true);
+		}
+
+		// // see if we need to update any child categories that depend on these values
+		// // auto iv = col.mValidator;
+		// if (mCascade)
+		// {
+		// 	for (auto &&[childCat, linked] : cat->mChildLinks)
+		// 	{
+		// 		Condition cond;
+		// 		std::string childTag;
+
+		// 		std::vector<Item> newValues;
+
+		// 		for (size_t ix = 0; ix < linked->mParentKeys.size(); ++ix)
+		// 		{
+		// 			std::string pk = linked->mParentKeys[ix];
+		// 			std::string ck = linked->mChildKeys[ix];
+
+		// 			if (changed.count(pk) > 0)
+		// 			{
+		// 				childTag = ck;
+		// 				cond = std::move(cond) && (Key(ck) == std::get<1>(changed[pk]));
+		// 				newValues.emplace_back(ck, std::get<2>(changed[pk]));
+		// 			}
+		// 			else
+		// 			{
+		// 				const char *value = (*this)[pk].c_str();
+		// 				cond = std::move(cond) && (Key(ck) == value);
+		// 			}
+		// 		}
+
+		// 		auto rows = childCat->find(std::move(cond));
+		// 		for (auto &cr : rows)
+		// 			cr.assign(newValues);
+		// 	}
+		// }
+	}
+
+	void assign(std::string_view name, std::string_view value, bool updateLinked, bool validate = true)
+	{
+		assign(m_cat->add_column(name), value, updateLinked, validate);
+	}
+
+	void assign(size_t column, std::string_view value, bool updateLinked, bool validate = true)
+	{
+		m_cat->update_value(m_row, column, value, updateLinked, validate);	
+	}
+
  private:
-	uint32_t get_column_ix(std::string_view name) const
+	uint16_t get_column_ix(std::string_view name) const
 	{
 		return m_cat->get_column_ix(name);
 	}

+	uint16_t add_column(std::string_view name)
+	{
+		return m_cat->add_column(name);
+	}
+
+	void assign(const item &i, bool updateLinked)
+	{
+		assign(i.name(), i.value(), updateLinked);
+	}
+
 	category_type *m_cat = nullptr;
 	row_type *m_row = nullptr;
 };

--- a/src/parser.cpp
+++ b/src/parser.cpp
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <set>
+
+#include <cif++/v2/parser.hpp>
+
+// extern int VERBOSE;
+
+namespace cif::v2
+{
+
+const uint32_t kMaxLineLength = 132;
+
+const uint8_t kCharTraitsTable[128] = {
+	//	0	1	2	3	4	5	6	7	8	9	a	b	c	d	e	f
+	14, 15, 14, 14, 14, 15, 15, 14, 15, 15, 15, 15, 15, 15, 15, 15, //	2
+	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 10, 15, 15, 15, 15, //	3
+	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, //	4
+	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 15, 14, 15, 14, //	5
+	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, //	6
+	15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0,  //	7
+};
+
+// --------------------------------------------------------------------
+
+parse_error::parse_error(uint32_t lineNr, const std::string &message)
+	: std::runtime_error("parse error at line " + std::to_string(lineNr) + ": " + message)
+{
+}
+
+// --------------------------------------------------------------------
+
+const char *SacParser::kTokenName[] = {
+	"unknown",
+	"EOF",
+	"DATA",
+	"LOOP",
+	"GLOBAL",
+	"SAVE",
+	"STOP",
+	"Tag",
+	"Value"};
+
+const char *SacParser::kValueName[] = {
+	"Int",
+	"Float",
+	"Numeric",
+	"String",
+	"TextField",
+	"Inapplicable",
+	"Unknown"};
+
+// --------------------------------------------------------------------
+
+bool isUnquotedString(const char *s)
+{
+	auto ss = s;
+
+	bool result = isOrdinary(*s++);
+	while (result and *s != 0)
+	{
+		result = isNonBlank(*s);
+		++s;
+	}
+
+	// but be careful it does not contain e.g. stop_
+	if (result)
+	{
+		static const std::regex reservedRx(R"((^(?:data|save)|.*(?:loop|stop|global))_.+)", std::regex_constants::icase);
+		result = not std::regex_match(ss, reservedRx);
+	}
+
+	return result;
+}
+
+// --------------------------------------------------------------------
+
+SacParser::SacParser(std::istream &is, bool init)
+	: mData(is)
+{
+	mValidate = true;
+	mLineNr = 1;
+	mBol = true;
+
+	if (init)
+		mLookahead = getNextToken();
+}
+
+void SacParser::error(const std::string &msg)
+{
+	throw parse_error(mLineNr, msg);
+}
+
+// getNextChar takes a char from the buffer, or if it is empty
+// from the istream. This function also does carriage/linefeed
+// translation.
+int SacParser::getNextChar()
+{
+	int result;
+
+	if (mBuffer.empty())
+		result = mData.get();
+	else
+	{
+		result = mBuffer.top();
+		mBuffer.pop();
+	}
+
+	// very simple CR/LF translation into LF
+	if (result == '\r')
+	{
+		int lookahead = mData.get();
+		if (lookahead != '\n')
+			mBuffer.push(lookahead);
+		result = '\n';
+	}
+
+	mTokenValue += static_cast<char>(result);
+
+	if (result == '\n')
+		++mLineNr;
+
+	if (VERBOSE >= 6)
+	{
+		std::cerr << "getNextChar => ";
+		if (iscntrl(result) or not isprint(result))
+			std::cerr << int(result) << std::endl;
+		else
+			std::cerr << char(result) << std::endl;
+	}
+
+	return result;
+}
+
+void SacParser::retract()
+{
+	assert(not mTokenValue.empty());
+
+	char ch = mTokenValue.back();
+	if (ch == '\n')
+		--mLineNr;
+
+	mBuffer.push(ch);
+	mTokenValue.pop_back();
+}
+
+
+int SacParser::restart(int start)
+{
+	int result = 0;
+
+	while (not mTokenValue.empty())
+		retract();
+
+	switch (start)
+	{
+		case eStateStart:
+			result = eStateFloat;
+			break;
+
+		case eStateFloat:
+			result = eStateInt;
+			break;
+
+		case eStateInt:
+			result = eStateValue;
+			break;
+
+		default:
+			error("Invalid state in SacParser");
+	}
+
+	mBol = false;
+
+	return result;
+}
+
+void SacParser::match(SacParser::CIFToken t)
+{
+	if (mLookahead != t)
+		error(std::string("Unexpected token, expected ") + kTokenName[t] + " but found " + kTokenName[mLookahead]);
+
+	mLookahead = getNextToken();
+}
+
+SacParser::CIFToken SacParser::getNextToken()
+{
+	const auto kEOF = std::char_traits<char>::eof();
+
+	CIFToken result = eCIFTokenUnknown;
+	int quoteChar = 0;
+	int state = eStateStart, start = eStateStart;
+	mBol = false;
+
+	mTokenValue.clear();
+	mTokenType = eCIFValueUnknown;
+
+	while (result == eCIFTokenUnknown)
+	{
+		auto ch = getNextChar();
+
+		switch (state)
+		{
+			case eStateStart:
+				if (ch == kEOF)
+					result = eCIFTokenEOF;
+				else if (ch == '\n')
+				{
+					mBol = true;
+					state = eStateWhite;
+				}
+				else if (ch == ' ' or ch == '\t')
+					state = eStateWhite;
+				else if (ch == '#')
+					state = eStateComment;
+				else if (ch == '_')
+					state = eStateTag;
+				else if (ch == ';' and mBol)
+					state = eStateTextField;
+				else if (ch == '\'' or ch == '"')
+				{
+					quoteChar = ch;
+					state = eStateQuotedString;
+				}
+				else
+					state = start = restart(start);
+				break;
+
+			case eStateWhite:
+				if (ch == kEOF)
+					result = eCIFTokenEOF;
+				else if (not isspace(ch))
+				{
+					state = eStateStart;
+					retract();
+					mTokenValue.clear();
+				}
+				else
+					mBol = (ch == '\n');
+				break;
+
+			case eStateComment:
+				if (ch == '\n')
+				{
+					state = eStateStart;
+					mBol = true;
+					mTokenValue.clear();
+				}
+				else if (ch == kEOF)
+					result = eCIFTokenEOF;
+				else if (not isAnyPrint(ch))
+					error("invalid character in comment");
+				break;
+
+			case eStateTextField:
+				if (ch == '\n')
+					state = eStateTextField + 1;
+				else if (ch == kEOF)
+					error("unterminated textfield");
+				else if (not isAnyPrint(ch))
+					//					error("invalid character in text field '" + string({ static_cast<char>(ch) }) + "' (" + to_string((int)ch) + ")");
+					std::cerr << "invalid character in text field '" << std::string({static_cast<char>(ch)}) << "' (" << ch << ") line: " << mLineNr << std::endl;
+				break;
+
+			case eStateTextField + 1:
+				if (isTextLead(ch) or ch == ' ' or ch == '\t')
+					state = eStateTextField;
+				else if (ch == ';')
+				{
+					assert(mTokenValue.length() >= 2);
+					mTokenValue = mTokenValue.substr(1, mTokenValue.length() - 3);
+					mTokenType = eCIFValueTextField;
+					result = eCIFTokenValue;
+				}
+				else if (ch == kEOF)
+					error("unterminated textfield");
+				else if (ch != '\n')
+					error("invalid character in text field");
+				break;
+
+			case eStateQuotedString:
+				if (ch == kEOF)
+					error("unterminated quoted string");
+				else if (ch == quoteChar)
+					state = eStateQuotedStringQuote;
+				else if (not isAnyPrint(ch))
+					std::cerr << "invalid character in quoted string '" << std::string({static_cast<char>(ch)}) << "' (" << ch << ") line: " << mLineNr << std::endl;
+					// error("invalid character in quoted string");
+				break;
+
+			case eStateQuotedStringQuote:
+				if (isWhite(ch))
+				{
+					retract();
+					result = eCIFTokenValue;
+					mTokenType = eCIFValueString;
+
+					if (mTokenValue.length() < 2)
+						error("Invalid quoted string token");
+						
+					mTokenValue = mTokenValue.substr(1, mTokenValue.length() - 2);
+				}
+				else if (ch == quoteChar)
+					;
+				else if (isAnyPrint(ch))
+					state = eStateQuotedString;
+				else if (ch == kEOF)
+					error("unterminated quoted string");
+				else
+					error("invalid character in quoted string");
+				break;
+
+			case eStateTag:
+				if (not isNonBlank(ch))
+				{
+					retract();
+					result = eCIFTokenTag;
+				}
+				break;
+
+			case eStateFloat:
+				if (ch == '+' or ch == '-')
+				{
+					state = eStateFloat + 1;
+				}
+				else if (isdigit(ch))
+					state = eStateFloat + 1;
+				else
+					state = start = restart(start);
+				break;
+
+			case eStateFloat + 1:
+				//				if (ch == '(')	// numeric???
+				//					mState = eStateNumericSuffix;
+				//				else
+				if (ch == '.')
+					state = eStateFloat + 2;
+				else if (tolower(ch) == 'e')
+					state = eStateFloat + 3;
+				else if (isWhite(ch) or ch == kEOF)
+				{
+					retract();
+					result = eCIFTokenValue;
+					mTokenType = eCIFValueInt;
+				}
+				else
+					state = start = restart(start);
+				break;
+
+			// parsed '.'
+			case eStateFloat + 2:
+				if (tolower(ch) == 'e')
+					state = eStateFloat + 3;
+				else if (isWhite(ch) or ch == kEOF)
+				{
+					retract();
+					result = eCIFTokenValue;
+					mTokenType = eCIFValueFloat;
+				}
+				else
+					state = start = restart(start);
+				break;
+
+			// parsed 'e'
+			case eStateFloat + 3:
+				if (ch == '-' or ch == '+')
+					state = eStateFloat + 4;
+				else if (isdigit(ch))
+					state = eStateFloat + 5;
+				else
+					state = start = restart(start);
+				break;
+
+			case eStateFloat + 4:
+				if (isdigit(ch))
+					state = eStateFloat + 5;
+				else
+					state = start = restart(start);
+				break;
+
+			case eStateFloat + 5:
+				if (isWhite(ch) or ch == kEOF)
+				{
+					retract();
+					result = eCIFTokenValue;
+					mTokenType = eCIFValueFloat;
+				}
+				else
+					state = start = restart(start);
+				break;
+
+			case eStateInt:
+				if (isdigit(ch) or ch == '+' or ch == '-')
+					state = eStateInt + 1;
+				else
+					state = start = restart(start);
+				break;
+
+			case eStateInt + 1:
+				if (isWhite(ch) or ch == kEOF)
+				{
+					retract();
+					result = eCIFTokenValue;
+					mTokenType = eCIFValueInt;
+				}
+				else
+					state = start = restart(start);
+				break;
+
+			case eStateValue:
+				if (ch == '_')
+				{
+					std::string s = toLowerCopy(mTokenValue);
+
+					if (s == "global_")
+						result = eCIFTokenGLOBAL;
+					else if (s == "stop_")
+						result = eCIFTokenSTOP;
+					else if (s == "loop_")
+						result = eCIFTokenLOOP;
+					else if (s == "data_")
+					{
+						state = eStateDATA;
+						continue;
+					}
+					else if (s == "save_")
+					{
+						state = eStateSAVE;
+						continue;
+					}
+				}
+
+				if (result == eCIFTokenUnknown and not isNonBlank(ch))
+				{
+					retract();
+					result = eCIFTokenValue;
+
+					if (mTokenValue == ".")
+						mTokenType = eCIFValueInapplicable;
+					else if (mTokenValue == "?")
+					{
+						mTokenType = eCIFValueUnknown;
+						mTokenValue.clear();
+					}
+				}
+				break;
+
+			case eStateDATA:
+			case eStateSAVE:
+				if (not isNonBlank(ch))
+				{
+					retract();
+
+					if (state == eStateDATA)
+						result = eCIFTokenDATA;
+					else
+						result = eCIFTokenSAVE;
+
+					mTokenValue.erase(mTokenValue.begin(), mTokenValue.begin() + 5);
+				}
+				break;
+
+			default:
+				assert(false);
+				error("Invalid state in getNextToken");
+				break;
+		}
+	}
+
+	if (VERBOSE >= 5)
+	{
+		std::cerr << kTokenName[result];
+		if (mTokenType != eCIFValueUnknown)
+			std::cerr << ' ' << kValueName[mTokenType];
+		if (result != eCIFTokenEOF)
+			std::cerr << " '" << mTokenValue << '\'';
+		std::cerr << std::endl;
+	}
+
+	return result;
+}
+
+
+DatablockIndex SacParser::indexDatablocks()
+{
+	DatablockIndex index;
+
+	// first locate the start, as fast as we can
+	auto &sb = *mData.rdbuf();
+
+	enum
+	{
+		start,
+		comment,
+		string,
+		string_quote,
+		qstring,
+		data,
+		data_name
+	} state = start;
+
+	int quote = 0;
+	bool bol = true;
+	const char dblk[] = "data_";
+	std::string::size_type si = 0;
+	std::string datablock;
+
+	for (auto ch = sb.sbumpc(); ch != std::streambuf::traits_type::eof(); ch = sb.sbumpc())
+	{
+		switch (state)
+		{
+			case start:
+				switch (ch)
+				{
+					case '#': state = comment; break;
+					case 'd':
+					case 'D':
+						state = data;
+						si = 1;
+						break;
+					case '\'':
+					case '"':
+						state = string;
+						quote = ch;
+						break;
+					case ';':
+						if (bol)
+							state = qstring;
+						break;
+				}
+				break;
+
+			case comment:
+				if (ch == '\n')
+					state = start;
+				break;
+
+			case string:
+				if (ch == quote)
+					state = string_quote;
+				break;
+
+			case string_quote:
+				if (std::isspace(ch))
+					state = start;
+				else
+					state = string;
+				break;
+
+			case qstring:
+				if (ch == ';' and bol)
+					state = start;
+				break;
+
+			case data:
+				if (dblk[si] == 0 and isNonBlank(ch))
+				{
+					datablock = {static_cast<char>(ch)};
+					state = data_name;
+				}
+				else if (dblk[si++] != ch)
+					state = start;
+				break;
+
+			case data_name:
+				if (isNonBlank(ch))
+					datablock.insert(datablock.end(), char(ch));
+				else if (isspace(ch))
+				{
+					if (not datablock.empty())
+						index[datablock] = mData.tellg();
+
+					state = start;
+				}
+				else
+					state = start;
+				break;
+		}
+
+		bol = (ch == '\n');
+	}
+
+	return index;
+}
+
+bool SacParser::parseSingleDatablock(const std::string &datablock)
+{
+	// first locate the start, as fast as we can
+	auto &sb = *mData.rdbuf();
+
+	enum
+	{
+		start,
+		comment,
+		string,
+		string_quote,
+		qstring,
+		data
+	} state = start;
+
+	int quote = 0;
+	bool bol = true;
+	std::string dblk = "data_" + datablock;
+	std::string::size_type si = 0;
+	bool found = false;
+
+	for (auto ch = sb.sbumpc(); not found and ch != std::streambuf::traits_type::eof(); ch = sb.sbumpc())
+	{
+		switch (state)
+		{
+			case start:
+				switch (ch)
+				{
+					case '#': state = comment; break;
+					case 'd':
+					case 'D':
+						state = data;
+						si = 1;
+						break;
+					case '\'':
+					case '"':
+						state = string;
+						quote = ch;
+						break;
+					case ';':
+						if (bol)
+							state = qstring;
+						break;
+				}
+				break;
+
+			case comment:
+				if (ch == '\n')
+					state = start;
+				break;
+
+			case string:
+				if (ch == quote)
+					state = string_quote;
+				break;
+
+			case string_quote:
+				if (std::isspace(ch))
+					state = start;
+				else
+					state = string;
+				break;
+
+			case qstring:
+				if (ch == ';' and bol)
+					state = start;
+				break;
+
+			case data:
+				if (isspace(ch) and dblk[si] == 0)
+					found = true;
+				else if (dblk[si++] != ch)
+					state = start;
+				break;
+		}
+
+		bol = (ch == '\n');
+	}
+
+	if (found)
+	{
+		produceDatablock(datablock);
+		mLookahead = getNextToken();
+		parseDataBlock();
+	}
+
+	return found;
+}
+
+bool SacParser::parseSingleDatablock(const std::string &datablock, const DatablockIndex &index)
+{
+	bool result = false;
+
+	auto i = index.find(datablock);
+	if (i != index.end())
+	{
+		mData.seekg(i->second);
+
+		produceDatablock(datablock);
+		mLookahead = getNextToken();
+		parseDataBlock();
+
+		result = true;
+	}
+
+	return result;
+}
+
+void SacParser::parseFile()
+{
+	while (mLookahead != eCIFTokenEOF)
+	{
+		switch (mLookahead)
+		{
+			case eCIFTokenGLOBAL:
+				parseGlobal();
+				break;
+
+			case eCIFTokenDATA:
+				produceDatablock(mTokenValue);
+
+				match(eCIFTokenDATA);
+				parseDataBlock();
+				break;
+
+			default:
+				error("This file does not seem to be an mmCIF file");
+				break;
+		}
+	}
+}
+
+void SacParser::parseGlobal()
+{
+	match(eCIFTokenGLOBAL);
+	while (mLookahead == eCIFTokenTag)
+	{
+		match(eCIFTokenTag);
+		match(eCIFTokenValue);
+	}
+}
+
+void SacParser::parseDataBlock()
+{
+	std::string cat;
+
+	while (mLookahead == eCIFTokenLOOP or mLookahead == eCIFTokenTag or mLookahead == eCIFTokenSAVE)
+	{
+		switch (mLookahead)
+		{
+			case eCIFTokenLOOP:
+			{
+				cat.clear(); // should start a new category
+
+				match(eCIFTokenLOOP);
+
+				std::vector<std::string> tags;
+
+				while (mLookahead == eCIFTokenTag)
+				{
+					std::string catName, itemName;
+					std::tie(catName, itemName) = splitTagName(mTokenValue);
+
+					if (cat.empty())
+					{
+						produceCategory(catName);
+						cat = catName;
+					}
+					else if (not iequals(cat, catName))
+						error("inconsistent categories in loop_");
+
+					tags.push_back(itemName);
+
+					match(eCIFTokenTag);
+				}
+
+				while (mLookahead == eCIFTokenValue)
+				{
+					produceRow();
+
+					for (auto tag : tags)
+					{
+						produceItem(cat, tag, mTokenValue);
+						match(eCIFTokenValue);
+					}
+				}
+
+				cat.clear();
+				break;
+			}
+
+			case eCIFTokenTag:
+			{
+				std::string catName, itemName;
+				std::tie(catName, itemName) = splitTagName(mTokenValue);
+
+				if (not iequals(cat, catName))
+				{
+					produceCategory(catName);
+					cat = catName;
+					produceRow();
+				}
+
+				match(eCIFTokenTag);
+
+				produceItem(cat, itemName, mTokenValue);
+
+				match(eCIFTokenValue);
+				break;
+			}
+
+			case eCIFTokenSAVE:
+				parseSaveFrame();
+				break;
+
+			default:
+				assert(false);
+				break;
+		}
+	}
+}
+
+void SacParser::parseSaveFrame()
+{
+	error("A regular CIF file should not contain a save frame");
+}
+
+// --------------------------------------------------------------------
+
+Parser::Parser(std::istream &is, File &f, bool init)
+	: SacParser(is, init)
+	, mFile(f)
+	, mDataBlock(nullptr)
+{
+}
+
+void Parser::produceDatablock(const std::string &name)
+{
+	mDataBlock = new Datablock(name);
+	mFile.append(mDataBlock);
+}
+
+void Parser::produceCategory(const std::string &name)
+{
+	if (VERBOSE >= 4)
+		std::cerr << "producing category " << name << std::endl;
+
+	std::tie(mCat, std::ignore) = mDataBlock->emplace(name);
+}
+
+void Parser::produceRow()
+{
+	if (VERBOSE >= 4)
+		std::cerr << "producing row for category " << mCat->name() << std::endl;
+
+	mCat->emplace({});
+	mRow = mCat->back();
+	mRow.lineNr(mLineNr);
+}
+
+void Parser::produceItem(const std::string &category, const std::string &item, const std::string &value)
+{
+	if (VERBOSE >= 4)
+		std::cerr << "producing _" << category << '.' << item << " -> " << value << std::endl;
+
+	if (not iequals(category, mCat->name()))
+		error("inconsistent categories in loop_");
+
+	mRow[item] = mTokenValue;
+}
+
+// --------------------------------------------------------------------
+
+struct DictParserDataImpl
+{
+	// temporary values for constructing dictionaries
+	std::vector<ValidateCategory> mCategoryValidators;
+	std::map<std::string, std::vector<ValidateItem>> mItemValidators;
+	std::set<std::tuple<std::string, std::string>> mLinkedItems;
+};
+
+DictParser::DictParser(Validator &validator, std::istream &is)
+	: Parser(is, mFile)
+	, mValidator(validator)
+	, mImpl(new DictParserDataImpl)
+{
+}
+
+DictParser::~DictParser()
+{
+	delete mImpl;
+}
+
+void DictParser::parseSaveFrame()
+{
+	if (not mCollectedItemTypes)
+		mCollectedItemTypes = collectItemTypes();
+
+	std::string saveFrameName = mTokenValue;
+
+	if (saveFrameName.empty())
+		error("Invalid save frame, should contain more than just 'save_' here");
+
+	bool isCategorySaveFrame = mTokenValue[0] != '_';
+
+	Datablock dict(mTokenValue);
+	Datablock::iterator cat = dict.end();
+
+	match(eCIFTokenSAVE);
+	while (mLookahead == eCIFTokenLOOP or mLookahead == eCIFTokenTag)
+	{
+		if (mLookahead == eCIFTokenLOOP)
+		{
+			cat = dict.end(); // should start a new category
+
+			match(eCIFTokenLOOP);
+
+			std::vector<std::string> tags;
+			while (mLookahead == eCIFTokenTag)
+			{
+				std::string catName, itemName;
+				std::tie(catName, itemName) = splitTagName(mTokenValue);
+
+				if (cat == dict.end())
+					std::tie(cat, std::ignore) = dict.emplace(catName);
+				else if (not iequals(cat->name(), catName))
+					error("inconsistent categories in loop_");
+
+				tags.push_back(itemName);
+				match(eCIFTokenTag);
+			}
+
+			while (mLookahead == eCIFTokenValue)
+			{
+				cat->emplace({});
+				auto row = cat->back();
+
+				for (auto tag : tags)
+				{
+					row[tag] = mTokenValue;
+					match(eCIFTokenValue);
+				}
+			}
+
+			cat = dict.end();
+		}
+		else
+		{
+			std::string catName, itemName;
+			std::tie(catName, itemName) = splitTagName(mTokenValue);
+
+			if (cat == dict.end() or not iequals(cat->name(), catName))
+				std::tie(cat, std::ignore) = dict.emplace(catName);
+
+			match(eCIFTokenTag);
+
+			if (cat->empty())
+				cat->emplace({});
+			cat->back()[itemName] = mTokenValue;
+
+			match(eCIFTokenValue);
+		}
+	}
+
+	match(eCIFTokenSAVE);
+
+	if (isCategorySaveFrame)
+	{
+		std::string category;
+		cif::tie(category) = dict["category"].front().get("id");
+
+		std::vector<std::string> keys;
+		for (auto k : dict["category_key"])
+			keys.push_back(std::get<1>(splitTagName(k["name"].as<std::string>())));
+
+		iset groups;
+		for (auto g : dict["category_group"])
+			groups.insert(g["id"].as<std::string>());
+
+		mImpl->mCategoryValidators.push_back(ValidateCategory{category, keys, groups});
+	}
+	else
+	{
+		// if the type code is missing, this must be a pointer, just skip it
+		std::string typeCode;
+		cif::tie(typeCode) = dict["item_type"].front().get("code");
+
+		const ValidateType *tv = nullptr;
+		if (not(typeCode.empty() or typeCode == "?"))
+			tv = mValidator.getValidatorForType(typeCode);
+
+		iset ess;
+		for (auto e : dict["item_enumeration"])
+			ess.insert(e["value"].as<std::string>());
+
+		std::string defaultValue;
+		cif::tie(defaultValue) = dict["item_default"].front().get("value");
+		bool defaultIsNull = false;
+		if (defaultValue.empty())
+		{
+			for (auto &r : dict["_item_default"])
+			{
+				defaultIsNull = r["value"].is_null();
+				break;
+			}
+		}
+
+		// collect the dict from our dataBlock and construct validators
+		for (auto i : dict["item"])
+		{
+			std::string tagName, category, mandatory;
+
+			cif::tie(tagName, category, mandatory) = i.get("name", "category_id", "mandatory_code");
+
+			std::string catName, itemName;
+			std::tie(catName, itemName) = splitTagName(tagName);
+
+			if (catName.empty() or itemName.empty())
+				error("Invalid tag name in _item.name " + tagName);
+
+			if (not iequals(category, catName) and not(category.empty() or category == "?"))
+				error("specified category id does match the implicit category name for tag '" + tagName + '\'');
+			else
+				category = catName;
+
+			auto &ivs = mImpl->mItemValidators[category];
+
+			auto vi = find(ivs.begin(), ivs.end(), ValidateItem{itemName});
+			if (vi == ivs.end())
+				ivs.push_back(ValidateItem{itemName, iequals(mandatory, "yes"), tv, ess, defaultValue, defaultIsNull});
+			else
+			{
+				// need to update the itemValidator?
+				if (vi->mMandatory != (iequals(mandatory, "yes")))
+				{
+					if (VERBOSE > 2)
+					{
+						std::cerr << "inconsistent mandatory value for " << tagName << " in dictionary" << std::endl;
+
+						if (iequals(tagName, saveFrameName))
+							std::cerr << "choosing " << mandatory << std::endl;
+						else
+							std::cerr << "choosing " << (vi->mMandatory ? "Y" : "N") << std::endl;
+					}
+
+					if (iequals(tagName, saveFrameName))
+						vi->mMandatory = (iequals(mandatory, "yes"));
+				}
+
+				if (vi->mType != nullptr and tv != nullptr and vi->mType != tv)
+				{
+					if (VERBOSE > 1)
+						std::cerr << "inconsistent type for " << tagName << " in dictionary" << std::endl;
+				}
+
+				//				vi->mMandatory = (iequals(mandatory, "yes"));
+				if (vi->mType == nullptr)
+					vi->mType = tv;
+
+				vi->mEnums.insert(ess.begin(), ess.end());
+
+				// anything else yet?
+				// ...
+			}
+		}
+
+		// collect the dict from our dataBlock and construct validators
+		for (auto i : dict["item_linked"])
+		{
+			std::string childTagName, parentTagName;
+
+			cif::tie(childTagName, parentTagName) = i.get("child_name", "parent_name");
+
+			mImpl->mLinkedItems.emplace(childTagName, parentTagName);
+		}
+	}
+}
+
+void DictParser::linkItems()
+{
+	if (not mDataBlock)
+		error("no datablock");
+
+	auto &dict = *mDataBlock;
+
+	// links are identified by a parent category, a child category and a group ID
+
+	using key_type = std::tuple<std::string, std::string, int>;
+
+	std::map<key_type, size_t> linkIndex;
+
+	// Each link group consists of a set of keys
+	std::vector<std::tuple<std::vector<std::string>, std::vector<std::string>>> linkKeys;
+
+	auto addLink = [&](size_t ix, const std::string &pk, const std::string &ck)
+	{
+		auto &&[pkeys, ckeys] = linkKeys.at(ix);
+
+		bool found = false;
+		for (size_t i = 0; i < pkeys.size(); ++i)
+		{
+			if (pkeys[i] == pk and ckeys[i] == ck)
+			{
+				found = true;
+				break;
+			}
+		}
+
+		if (not found)
+		{
+			pkeys.push_back(pk);
+			ckeys.push_back(ck);
+		}
+	};
+
+	auto &linkedGroupList = dict["pdbx_item_linked_group_list"];
+
+	for (auto gl : linkedGroupList)
+	{
+		std::string child, parent;
+		int link_group_id;
+		cif::tie(child, parent, link_group_id) = gl.get("child_name", "parent_name", "link_group_id");
+
+		auto civ = mValidator.getValidatorForItem(child);
+		if (civ == nullptr)
+			error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
+
+		auto piv = mValidator.getValidatorForItem(parent);
+		if (piv == nullptr)
+			error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
+
+		key_type key{piv->mCategory->mName, civ->mCategory->mName, link_group_id};
+		if (not linkIndex.count(key))
+		{
+			linkIndex[key] = linkKeys.size();
+			linkKeys.push_back({});
+		}
+
+		size_t ix = linkIndex.at(key);
+		addLink(ix, piv->mTag, civ->mTag);
+	}
+
+	// Only process inline linked items if the linked group list is absent
+	if (linkedGroupList.empty())
+	{
+		// for links recorded in categories but not in pdbx_item_linked_group_list
+		for (auto li : mImpl->mLinkedItems)
+		{
+			std::string child, parent;
+			std::tie(child, parent) = li;
+
+			auto civ = mValidator.getValidatorForItem(child);
+			if (civ == nullptr)
+				error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
+
+			auto piv = mValidator.getValidatorForItem(parent);
+			if (piv == nullptr)
+				error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
+
+			key_type key{piv->mCategory->mName, civ->mCategory->mName, 0};
+			if (not linkIndex.count(key))
+			{
+				linkIndex[key] = linkKeys.size();
+				linkKeys.push_back({});
+			}
+
+			size_t ix = linkIndex.at(key);
+			addLink(ix, piv->mTag, civ->mTag);
+		}
+	}
+
+	auto &linkedGroup = dict["pdbx_item_linked_group"];
+
+	// now store the links in the validator
+	for (auto &kv : linkIndex)
+	{
+		ValidateLink link = {};
+		std::tie(link.mParentCategory, link.mChildCategory, link.mLinkGroupID) = kv.first;
+
+		std::tie(link.mParentKeys, link.mChildKeys) = linkKeys[kv.second];
+
+		// look up the label
+		for (auto r : linkedGroup.find(cif::Key("category_id") == link.mChildCategory and cif::Key("link_group_id") == link.mLinkGroupID))
+		{
+			link.mLinkGroupLabel = r["label"].as<std::string>();
+			break;
+		}
+
+		mValidator.addLinkValidator(std::move(link));
+	}
+
+	// now make sure the itemType is specified for all itemValidators
+
+	for (auto &cv : mValidator.mCategoryValidators)
+	{
+		for (auto &iv : cv.mItemValidators)
+		{
+			if (iv.mType == nullptr and cif::VERBOSE >= 0)
+				std::cerr << "Missing item_type for " << iv.mTag << std::endl;
+		}
+	}
+}
+
+void DictParser::loadDictionary()
+{
+	std::unique_ptr<Datablock> dict;
+	Datablock *savedDatablock = mDataBlock;
+
+	try
+	{
+		while (mLookahead != eCIFTokenEOF)
+		{
+			switch (mLookahead)
+			{
+				case eCIFTokenGLOBAL:
+					parseGlobal();
+					break;
+
+				default:
+				{
+					dict.reset(new Datablock(mTokenValue)); // dummy datablock, for constructing the validator only
+					mDataBlock = dict.get();
+
+					match(eCIFTokenDATA);
+					parseDataBlock();
+					break;
+				}
+			}
+		}
+	}
+	catch (const std::exception &)
+	{
+		if (cif::VERBOSE >= 0)
+			std::cerr << "Error parsing dictionary" << std::endl;
+		throw;
+	}
+
+	// store all validators
+	for (auto &ic : mImpl->mCategoryValidators)
+		mValidator.addCategoryValidator(std::move(ic));
+	mImpl->mCategoryValidators.clear();
+
+	for (auto &iv : mImpl->mItemValidators)
+	{
+		auto cv = mValidator.getValidatorForCategory(iv.first);
+		if (cv == nullptr)
+			error("Undefined category '" + iv.first);
+
+		for (auto &v : iv.second)
+			const_cast<ValidateCategory *>(cv)->addItemValidator(std::move(v));
+	}
+
+	// check all item validators for having a typeValidator
+
+	if (dict)
+		linkItems();
+
+	// store meta information
+	Datablock::iterator info;
+	bool n;
+	std::tie(info, n) = mDataBlock->emplace("dictionary");
+	if (n)
+	{
+		auto r = info->front();
+		mValidator.dictName(r["title"].as<std::string>());
+		mValidator.dictVersion(r["version"].as<std::string>());
+	}
+
+	mDataBlock = savedDatablock;
+
+	mImpl->mItemValidators.clear();
+}
+
+bool DictParser::collectItemTypes()
+{
+	bool result = false;
+
+	if (not mDataBlock)
+		error("no datablock");
+
+	auto &dict = *mDataBlock;
+
+	for (auto &t : dict["item_type_list"])
+	{
+		std::string code, primitiveCode, construct;
+		cif::tie(code, primitiveCode, construct) = t.get("code", "primitive_code", "construct");
+
+		ba::replace_all(construct, "\\n", "\n");
+		ba::replace_all(construct, "\\t", "\t");
+		ba::replace_all(construct, "\\\n", "");
+
+		try
+		{
+			ValidateType v = {
+				code, mapToPrimitiveType(primitiveCode), boost::regex(construct, boost::regex::extended | boost::regex::optimize)};
+
+			mValidator.addTypeValidator(std::move(v));
+		}
+		catch (const std::exception &)
+		{
+			throw_with_nested(parse_error(t.lineNr(), "error in regular expression"));
+		}
+
+		// Do not replace an already defined type validator, this won't work with pdbx_v40
+		// as it has a name that is too strict for its own names :-)
+		//		if (mFileImpl.mTypeValidators.count(v))
+		//			mFileImpl.mTypeValidators.erase(v);
+
+		if (VERBOSE >= 5)
+			std::cerr << "Added type " << code << " (" << primitiveCode << ") => " << construct << std::endl;
+
+		result = true;
+	}
+
+	return result;
+}
+
+} // namespace cif
--- a/test/unit-v2-test.cpp
+++ b/test/unit-v2-test.cpp
@@ -35,6 +35,8 @@
 // #include <cif++/CifValidator.hpp>
 // #include <cif++/CifParser.hpp>

+#include <cif++/v2/parser.hpp>
+
 namespace tt = boost::test_tools;

 std::filesystem::path gTestDir = std::filesystem::current_path(); // filled in first test
@@ -264,45 +266,54 @@ BOOST_AUTO_TEST_CASE(ci_1)

 // --------------------------------------------------------------------

-// BOOST_AUTO_TEST_CASE(f_1)
-// {
-// 	// using namespace mmcif;
+BOOST_AUTO_TEST_CASE(f_1)
+{
+	// using namespace mmcif;

-// 	auto f = R"(data_TEST
-// #
-// loop_
-// _test.id
-// _test.name
-// 1 aap
-// 2 noot
-// 3 mies
-//     )"_cf;
+	auto f = R"(data_TEST
+#
+loop_
+_test.id
+_test.name
+1 aap
+2 noot
+3 mies
+    )"_cf;

-// 	BOOST_ASSERT(not f.empty());
-// 	BOOST_ASSERT(f.size() == 1);
+	BOOST_ASSERT(not f.empty());
+	BOOST_ASSERT(f.size() == 1);

-// 	auto &db = f.front();
+	auto &db = f.front();

-// 	BOOST_CHECK(db.name() == "TEST");
+	BOOST_CHECK(db.name() == "TEST");

-// 	auto &test = db["test"];
-// 	BOOST_CHECK(test.size() == 3);
+	auto &test = db["test"];
+	BOOST_CHECK(test.size() == 3);

-// 	// wrong! the next lines will crash. And that's OK, don't do that
-// 	// for (auto r: test)
-// 	// 	test.erase(r);
+	const char *ts[] = {"aap", "noot", "mies"};

-// 	// BOOST_CHECK(test.empty());
+	int n = 1;
+	for (const auto &[i, s] : test.rows<int, std::string>("id", "name"))
+	{
+		BOOST_CHECK_EQUAL(i, n);
+		BOOST_CHECK_EQUAL(s.compare(ts[n - 1]), 0);
+		++n;
+	}

-// 	// test.purge();
+	// for (auto r: test)
+	// 	test.erase(r);

-// 	// auto n = test.erase(cif::Key("id") == 1, [](const cif::Row &r)
-// 	// 	{
-//     //     BOOST_CHECK_EQUAL(r["id"].as<int>(), 1);
-//     //     BOOST_CHECK_EQUAL(r["name"].as<std::string>(), "aap"); });
+	// BOOST_CHECK(test.empty());

-// 	// BOOST_CHECK_EQUAL(n, 1);
-// }
+	// test.clear();
+
+	// auto n = test.erase(cif::Key("id") == 1, [](const cif::Row &r)
+	// 	{
+    //     BOOST_CHECK_EQUAL(r["id"].as<int>(), 1);
+    //     BOOST_CHECK_EQUAL(r["name"].as<std::string>(), "aap"); });
+
+	// BOOST_CHECK_EQUAL(n, 1);
+}

 // // --------------------------------------------------------------------