More tests

ff62efe7 · Maarten L. Hekkelman · 24078771 · ff62efe7 · ff62efe7 · ff62efe7
Commit ff62efe7 authored Aug 11, 2022 by Maarten L. Hekkelman
Showing with 349 additions and 206 deletions

CMakeLists.txt
+1 -0

include/cif++/cif/file.hpp
+18 -109

include/cif++/cif/parser.hpp
+3 -0

src/cif/category.cpp
+15 -14

src/cif/file.cpp
+169 -0

src/cif/parser.cpp
+8 -1

test/unit-v2-test.cpp
+135 -82

No files found.
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -197,6 +197,7 @@ set(project_sources
 	${PROJECT_SOURCE_DIR}/src/cif/condition.cpp
 	${PROJECT_SOURCE_DIR}/src/cif/datablock.cpp
 	${PROJECT_SOURCE_DIR}/src/cif/dictionary_parser.cpp
+	${PROJECT_SOURCE_DIR}/src/cif/file.cpp
 	${PROJECT_SOURCE_DIR}/src/cif/item.cpp
 	${PROJECT_SOURCE_DIR}/src/cif/parser.cpp
 	${PROJECT_SOURCE_DIR}/src/cif/row.cpp

--- a/include/cif++/cif/file.hpp
+++ b/include/cif++/cif/file.hpp
@@ -26,8 +26,6 @@
 #pragma once
-#include <cif++/cif/forward_decl.hpp>
 #include <cif++/cif/datablock.hpp>
 #include <cif++/cif/parser.hpp>
@@ -39,7 +37,6 @@ namespace cif::v2
 class file : public std::list<datablock>
 {
  public:
 	file() = default;
 	file(std::istream &is)
@@ -52,126 +49,38 @@ class file : public std::list<datablock>
 	file &operator=(const file &) = default;
 	file &operator=(file &&) = default;
-	void set_validator(const validator *v)
+	void set_validator(const validator *v);
-	{
-		m_validator = v;
-		for (auto &db : *this)
-			db.set_validator(v);
-	}
 	const validator *get_validator() const
 	{
 		return m_validator;
 	}
-	bool is_valid() const
+	bool is_valid() const;
-	{
+	bool is_valid();
-		if (m_validator == nullptr)
-			std::runtime_error("No validator loaded explicitly, cannot continue");
-		bool result = true;
-		for (auto &d : *this)
-			result = d.is_valid() and result;
-		return result;
-	}
-	bool is_valid()
-	{
-		if (m_validator == nullptr)
-		{
-			if (VERBOSE > 0)
-				std::cerr << "No dictionary loaded explicitly, loading default" << std::endl;
-			load_dictionary();
-		}
-		bool result = true;
-		for (auto &d : *this)
-			result = d.is_valid() and result;
-		return result;
+	void load_dictionary();
-	}
+	void load_dictionary(std::string_view name);
-	void load_dictionary()
-	{
-		load_dictionary("mmcif_ddl");
-	}
-	void load_dictionary(std::string_view name)
+	datablock &operator[](std::string_view name);
-	{
+	const datablock &operator[](std::string_view name) const;
-		set_validator(&validator_factory::instance()[name]);
-	}
-	datablock &operator[](std::string_view name)
+	std::tuple<iterator, bool> emplace(std::string_view name);
-	{
-		auto i = std::find_if(begin(), end(), [name](const datablock &c)
-			{ return iequals(c.name(), name); });
-		if (i != end())
-			return *i;
-		emplace_back(name);
-		return back();
-	}
-	const datablock &operator[](std::string_view name) const
+	void load(const std::filesystem::path &p);
-	{
+	void load(std::istream &is);
-		static const datablock s_empty;
-		auto i = std::find_if(begin(), end(), [name](const datablock &c)
-			{ return iequals(c.name(), name); });
-		return i == end() ? s_empty : *i;
-	}
-	std::tuple<iterator, bool> emplace(std::string_view name)
+	void save(const std::filesystem::path &p) const;
-	{
+	void save(std::ostream &os) const;
-		bool is_new = true;
-		auto i = begin();
-		while (i != end())
-		{
-			if (iequals(name, i->name()))
-			{
-				is_new = false;
-				if (i != begin())
-				{
-					auto n = std::next(i);
-					splice(begin(), *this, i, n);
-				}
-				break;
-			}
-			++i;
-		}
-		if (is_new)
-		{
-			auto &db = emplace_front(name);
-			db.set_validator(m_validator);
-		}
-		return std::make_tuple(begin(), is_new);		
-	}
-	void load(std::istream &is)
+	friend std::ostream &operator<<(std::ostream &os, const file &f)
 	{
-		auto saved = m_validator;
+		f.save(os);
-		set_validator(nullptr);
+		return os;
-		parser p(is, *this);
-		p.parse_file();
-		if (saved != nullptr)
-		{
-			set_validator(saved);
-			(void)is_valid();
-		}
 	}
  private:
-	const validator* m_validator = nullptr;
+	const validator *m_validator = nullptr;
 };
-}
+} // namespace cif::v2
\ No newline at end of file
--- a/include/cif++/cif/parser.hpp
+++ b/include/cif++/cif/parser.hpp
@@ -44,6 +44,8 @@ class parse_error : public std::runtime_error
 // --------------------------------------------------------------------
+// TODO: Need to implement support for transformed long lines
 class sac_parser
 {
  public:
@@ -228,6 +230,7 @@ class sac_parser
 	{
 		Start,
 		White,
+		Esc,
 		Comment,
 		QuestionMark,
 		Dot,

--- a/src/cif/category.cpp
+++ b/src/cif/category.cpp
@@ -1626,7 +1626,7 @@ namespace detail
 		if (value.find('\n') != std::string::npos or width == 0 or value.length() > 132) // write as text field
 		{
 			if (offset > 0)
-				os << std::endl;
+				os << '\n';
 			os << ';';
 			char pc = 0;
@@ -1639,8 +1639,8 @@ namespace detail
 			}
 			if (value.back() != '\n')
-				os << std::endl;
+				os << '\n';
-			os << ';' << std::endl;
+			os << ';' << '\n';
 			offset = 0;
 		}
 		else if (sac_parser::is_unquoted_string(value))
@@ -1690,9 +1690,9 @@ namespace detail
 			if (not done)
 			{
 				if (offset > 0)
-					os << std::endl;
+					os << '\n';
-				os << ';' << value << std::endl
+				os << ';' << value << '\n'
-				   << ';' << std::endl;
+				   << ';' << '\n';
 				offset = 0;
 			}
 		}
@@ -1749,14 +1749,14 @@ void category::write(std::ostream &os, const std::vector<uint16_t> &order, bool 
 	if (needLoop)
 	{
-		os << "loop_" << std::endl;
+		os << "loop_" << '\n';
 		std::vector<size_t> columnWidths;
 		for (auto cix : order)
 		{
 			auto &col = m_columns[cix];
-			os << '_' << m_name << '.' << col.m_name << ' ' << std::endl;
+			os << '_' << m_name << '.' << col.m_name << ' ' << '\n';
 			columnWidths.push_back(2);
 		}
@@ -1809,7 +1809,7 @@ void category::write(std::ostream &os, const std::vector<uint16_t> &order, bool 
 				if (offset + l > 132 and offset > 0)
 				{
-					os << std::endl;
+					os << '\n';
 					offset = 0;
 				}
@@ -1817,13 +1817,13 @@ void category::write(std::ostream &os, const std::vector<uint16_t> &order, bool 
 				if (offset > 132)
 				{
-					os << std::endl;
+					os << '\n';
 					offset = 0;
 				}
 			}
 			if (offset > 0)
-				os << std::endl;
+				os << '\n';
 		}
 	}
 	else
@@ -1863,16 +1863,16 @@ void category::write(std::ostream &os, const std::vector<uint16_t> &order, bool 
 			size_t offset = l;
 			if (s.length() + l >= kMaxLineLength)
 			{
-				os << std::endl;
+				os << '\n';
 				offset = 0;
 			}
 			if (detail::write_value(os, s, offset, 1) != 0)
-				os << std::endl;
+				os << '\n';
 		}
 	}
-	os << "# " << std::endl;
+	os << "# " << '\n';
 }
 } // namespace cif::v2
\ No newline at end of file
--- a/src/cif/file.cpp
+++ b/src/cif/file.cpp
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <gzstream/gzstream.hpp>
+#include <cif++/cif/file.hpp>
+namespace cif::v2
+{
+// --------------------------------------------------------------------
+void file::set_validator(const validator *v)
+{
+	m_validator = v;
+	for (auto &db : *this)
+		db.set_validator(v);
+}
+bool file::is_valid() const
+{
+	if (m_validator == nullptr)
+		std::runtime_error("No validator loaded explicitly, cannot continue");
+	bool result = true;
+	for (auto &d : *this)
+		result = d.is_valid() and result;
+	return result;
+}
+bool file::is_valid()
+{
+	if (m_validator == nullptr)
+	{
+		if (VERBOSE > 0)
+			std::cerr << "No dictionary loaded explicitly, loading default" << std::endl;
+		load_dictionary();
+	}
+	bool result = true;
+	for (auto &d : *this)
+		result = d.is_valid() and result;
+	return result;
+}
+void file::load_dictionary()
+{
+	load_dictionary("mmcif_ddl");
+}
+void file::load_dictionary(std::string_view name)
+{
+	set_validator(&validator_factory::instance()[name]);
+}
+datablock &file::operator[](std::string_view name)
+{
+	auto i = std::find_if(begin(), end(), [name](const datablock &c)
+		{ return iequals(c.name(), name); });
+	if (i != end())
+		return *i;
+	emplace_back(name);
+	return back();
+}
+const datablock &file::operator[](std::string_view name) const
+{
+	static const datablock s_empty;
+	auto i = std::find_if(begin(), end(), [name](const datablock &c)
+		{ return iequals(c.name(), name); });
+	return i == end() ? s_empty : *i;
+}
+std::tuple<file::iterator, bool> file::emplace(std::string_view name)
+{
+	bool is_new = true;
+	auto i = begin();
+	while (i != end())
+	{
+		if (iequals(name, i->name()))
+		{
+			is_new = false;
+			if (i != begin())
+			{
+				auto n = std::next(i);
+				splice(begin(), *this, i, n);
+			}
+			break;
+		}
+		++i;
+	}
+	if (is_new)
+	{
+		auto &db = emplace_front(name);
+		db.set_validator(m_validator);
+	}
+	return std::make_tuple(begin(), is_new);
+}
+void file::load(std::istream &is)
+{
+	auto saved = m_validator;
+	set_validator(nullptr);
+	parser p(is, *this);
+	p.parse_file();
+	if (saved != nullptr)
+	{
+		set_validator(saved);
+		(void)is_valid();
+	}
+}
+void file::save(const std::filesystem::path &p) const
+{
+	if (p.extension() == ".gz")
+	{
+		gzstream::ofstream outFile(p);
+		save(outFile);
+	}
+	else
+	{
+		std::ofstream outFile(p, std::ios_base::binary);
+		save(outFile);
+	}
+}
+void file::save(std::ostream &os) const
+{
+	for (auto &db : *this)
+		db.write(os);
+}
+} // namespace cif::v2
\ No newline at end of file
--- a/src/cif/parser.cpp
+++ b/src/cif/parser.cpp
@@ -195,7 +195,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
 				else
 					m_bol = (ch == '\n');
 				break;
 			case State::Comment:
 				if (ch == '\n')
 				{
@@ -214,10 +214,17 @@ sac_parser::CIFToken sac_parser::get_next_token()
 					state = State::TextField + 1;
 				else if (ch == kEOF)
 					error("unterminated textfield");
+				// else if (ch == '\\')
+				// 	state = State::Esc;
 				else if (not is_any_print(ch))
 					warning("invalid character in text field '" + std::string({static_cast<char>(ch)}) + "' (" + std::to_string((int)ch) + ")");
 				break;
+			// case State::Esc:
+			// 	if (ch == '\n')
+			// 	break;
 			case State::TextField + 1:
 				if (is_text_lead(ch) or ch == ' ' or ch == '\t')
 					state = State::TextField;

--- a/test/unit-v2-test.cpp
+++ b/test/unit-v2-test.cpp
@@ -348,19 +348,18 @@ _test.name
 		++n;
 	}
-	// for (auto r: test)
+	auto n2 = test.erase(cif::v2::key("id") == 1, [](cif::v2::row_handle r)
-	// 	test.erase(r);
+		{
+        BOOST_CHECK_EQUAL(r["id"].as<int>(), 1);
-	// BOOST_CHECK(test.empty());
+        BOOST_CHECK_EQUAL(r["name"].as<std::string>(), "aap"); });
-	// test.clear();
+	BOOST_CHECK_EQUAL(n2, 1);
-	// auto n = test.erase(cif::v2::key("id") == 1, [](const cif::Row &r)
+	// for (auto r: test)
-	// 	{
+	// 	test.erase(r);
-    //     BOOST_CHECK_EQUAL(r["id"].as<int>(), 1);
-    //     BOOST_CHECK_EQUAL(r["name"].as<std::string>(), "aap"); });
-	// BOOST_CHECK_EQUAL(n, 1);
+	test.clear();
+	BOOST_CHECK(test.empty());
 }
 // --------------------------------------------------------------------
@@ -2002,96 +2001,150 @@ BOOST_AUTO_TEST_CASE(reading_file_1)
 	BOOST_CHECK_THROW(file.load(is), std::runtime_error);
 }
-// BOOST_AUTO_TEST_CASE(parser_test_1)
+BOOST_AUTO_TEST_CASE(parser_test_1)
-// {
+{
-// 	auto data1 = R"(
+	auto data1 = R"(
-// data_QM
+data_QM
-// _test.text ??
+_test.text ??
-// )"_cf;
+)"_cf;
-// 	auto &db1 = data1.front();
+	auto &db1 = data1.front();
-// 	auto &test1 = db1["test"];
+	auto &test1 = db1["test"];
-// 	BOOST_CHECK_EQUAL(test1.size(), 1);
+	BOOST_CHECK_EQUAL(test1.size(), 1);
-// 	for (auto r : test1)
+	for (auto r : test1)
-// 	{
+	{
-// 		const auto &[text] = r.get<std::string>({"text"});
+		const auto &[text] = r.get<std::string>({"text"});
-// 		BOOST_CHECK_EQUAL(text, "??");
+		BOOST_CHECK_EQUAL(text, "??");
-// 	}
+	}
-// 	std::stringstream ss;
+	std::stringstream ss;
-// 	data1.save(ss);
+	data1.save(ss);
-// 	auto data2 = cif::File(ss);
+	auto data2 = cif::v2::file(ss);
-// 	auto &db2 = data2.front();
+	auto &db2 = data2.front();
-// 	auto &test2 = db2["test"];
+	auto &test2 = db2["test"];
-// 	BOOST_CHECK_EQUAL(test2.size(), 1);
+	BOOST_CHECK_EQUAL(test2.size(), 1);
-// 	for (auto r : test2)
+	for (auto r : test2)
-// 	{
+	{
-// 		const auto &[text] = r.get<std::string>({"text"});
+		const auto &[text] = r.get<std::string>({"text"});
-// 		BOOST_CHECK_EQUAL(text, "??");
+		BOOST_CHECK_EQUAL(text, "??");
-// 	}
+	}
-// }
+}
-// BOOST_AUTO_TEST_CASE(output_test_1)
+BOOST_AUTO_TEST_CASE(output_test_1)
-// {
+{
-// 	auto data1 = R"(
+	auto data1 = R"(
-// data_Q
+data_Q
-// loop_
+loop_
-// _test.text
+_test.text
-// "stop_the_crap"
+"stop_the_crap"
-// 'and stop_ this too'
+'and stop_ this too'
-// 'data_dinges'
+'data_dinges'
-// 'blablaglobal_bla'
+'blablaglobal_bla'
-// boo.data_.whatever
+boo.data_.whatever
-// )"_cf;
+)"_cf;
+	auto &db1 = data1.front();
+	auto &test1 = db1["test"];
+	struct T {
+		const char *s;
+		bool q;
+	} kS[] = {
+		{ "stop_the_crap", false },
+		{ "and stop_ this too", false },
+		{ "data_dinges", false },
+		{ "blablaglobal_bla", false },
+		{ "boo.data_.whatever", true }
+	};
-// 	auto &db1 = data1.front();
+	BOOST_CHECK_EQUAL(test1.size(), sizeof(kS) / sizeof(T));
-// 	auto &test1 = db1["test"];
-// 	struct T {
+	size_t i = 0;
-// 		const char *s;
+	for (auto r : test1)
-// 		bool q;
+	{
-// 	} kS[] = {
+		const auto &[text] = r.get<std::string>({"text"});
-// 		{ "stop_the_crap", false },
+		BOOST_CHECK_EQUAL(text, kS[i].s);
-// 		{ "and stop_ this too", false },
+		BOOST_CHECK_EQUAL(cif::v2::sac_parser::is_unquoted_string(kS[i].s), kS[i].q);
-// 		{ "data_dinges", false },
+		++i;
-// 		{ "blablaglobal_bla", false },
+	}
-// 		{ "boo.data_.whatever", true }
-// 	};
-// 	BOOST_CHECK_EQUAL(test1.size(), sizeof(kS) / sizeof(T));
+	std::stringstream ss;
+	data1.save(ss);
-// 	size_t i = 0;
+	auto data2 = cif::v2::file(ss);
-// 	for (auto r : test1)
-// 	{
-// 		const auto &[text] = r.get<std::string>({"text"});
-// 		BOOST_CHECK_EQUAL(text, kS[i].s);
-// 		BOOST_CHECK_EQUAL(cif::isUnquotedString(kS[i].s), kS[i].q);
-// 		++i;
-// 	}
-// 	std::stringstream ss;
+	auto &db2 = data2.front();
-// 	data1.save(ss);
+	auto &test2 = db2["test"];
-// 	auto data2 = cif::File(ss);
+	BOOST_CHECK_EQUAL(test2.size(), sizeof(kS) / sizeof(T));
-// 	auto &db2 = data2.front();
+	i = 0;
-// 	auto &test2 = db2["test"];
+	for (auto r : test2)
+	{
+		const auto &[text] = r.get<std::string>({"text"});
+		BOOST_CHECK_EQUAL(text, kS[i++].s);
+	}
+}
-// 	BOOST_CHECK_EQUAL(test2.size(), sizeof(kS) / sizeof(T));
+BOOST_AUTO_TEST_CASE(output_test_2)
+{
+	auto data1 = R"(
+data_Q
+loop_
+_test.text
+;A very, very loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong line
+;
+;A line with a newline, look:
+There it was!
+;
+)"_cf;
+	auto &db1 = data1.front();
+	auto &test1 = db1["test"];
+	struct T {
+		const char *s;
+		bool q;
+	} kS[] = {
+		{ "A very, very loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooong line", false },
+		{ R"(A line with a newline, look:
+There it was!)", false}
+	};
-// 	i = 0;
+	BOOST_CHECK_EQUAL(test1.size(), sizeof(kS) / sizeof(T));
-// 	for (auto r : test2)
-// 	{
+	size_t i = 0;
-// 		const auto &[text] = r.get<std::string>({"text"});
+	for (auto r : test1)
-// 		BOOST_CHECK_EQUAL(text, kS[i++].s);
+	{
-// 	}
+		const auto &[text] = r.get<std::string>({"text"});
-// }
+		BOOST_CHECK_EQUAL(text, kS[i].s);
+		BOOST_CHECK_EQUAL(cif::v2::sac_parser::is_unquoted_string(kS[i].s), kS[i].q);
+		++i;
+	}
+	std::stringstream ss;
+	data1.save(ss);
+	auto data2 = cif::v2::file(ss);
+	auto &db2 = data2.front();
+	auto &test2 = db2["test"];
+	BOOST_CHECK_EQUAL(test2.size(), sizeof(kS) / sizeof(T));
+	i = 0;
+	for (auto r : test2)
+	{
+		const auto &[text] = r.get<std::string>({"text"});
+		BOOST_CHECK_EQUAL(text, kS[i++].s);
+	}
+}
 BOOST_AUTO_TEST_CASE(trim_test)