Getting rid of boost/algorithm/string

a855f880 · Maarten L. Hekkelman · cfa2acd6 · a855f880 · a855f880 · a855f880
Commit a855f880 authored Aug 10, 2022 by Maarten L. Hekkelman
23 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,10 +6,10 @@
 # modification, are permitted provided that the following conditions are met:

 # 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer
+# list of conditions and the following disclaimer
 # 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.

 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
@@ -46,7 +46,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
 	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
 elseif(MSVC)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
+	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
 endif()

 # Building shared libraries?
@@ -57,6 +57,7 @@ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)

 # Optionally build a version to be installed inside CCP4
 option(BUILD_FOR_CCP4 "Build a version to be installed in CCP4" OFF)
+
 if(BUILD_FOR_CCP4)
 	if("$ENV{CCP4}" STREQUAL "" OR NOT EXISTS $ENV{CCP4})
 		message(FATAL_ERROR "A CCP4 built was requested but CCP4 was not sourced")
@@ -79,6 +80,7 @@ if(EXISTS "$ENV{CCP4}")
 	set(CCP4 $ENV{CCP4})
 	set(CLIBD ${CCP4}/lib/data)
 endif()
+
 if(CCP4 AND NOT CLIBD)
 	set(CLIBD ${CCP4}/lib/data)
 endif()
@@ -97,13 +99,12 @@ else()
 endif()

 # set(CMAKE_DEBUG_POSTFIX d)
-
 if(MSVC)
-    # make msvc standards compliant...
-    add_compile_options(/permissive-)
+	# make msvc standards compliant...
+	add_compile_options(/permissive-)

 	macro(get_WIN32_WINNT version)
-		if (WIN32 AND CMAKE_SYSTEM_VERSION)
+		if(WIN32 AND CMAKE_SYSTEM_VERSION)
 			set(ver ${CMAKE_SYSTEM_VERSION})
 			string(REPLACE "." "" ver ${ver})
 			string(REGEX REPLACE "([0-9])" "0\\1" ver ${ver})
@@ -129,7 +130,6 @@ if(UNIX AND NOT APPLE AND NOT BUILD_FOR_CCP4 AND CMAKE_INSTALL_PREFIX_INITIALIZE
 endif()

 # Optionally use mrc to create resources
-
 if(WIN32 AND BUILD_SHARED_LIBS)
 	message("Not using resources when building shared libraries for Windows")
 else()
@@ -150,17 +150,12 @@ else()
 endif()

 # Libraries
-
 set(CMAKE_THREAD_PREFER_PTHREAD)
 set(THREADS_PREFER_PTHREAD_FLAG)
 find_package(Threads)

-find_package(Boost 1.70.0 REQUIRED COMPONENTS system iostreams regex program_options)
-
-if(NOT MSVC AND Boost_USE_STATIC_LIBS)
-	find_package(ZLIB REQUIRED)
-	list(APPEND CIFPP_REQUIRED_LIBRARIES ZLIB::ZLIB)
-endif()
+find_package(Boost 1.70.0 REQUIRED COMPONENTS system regex program_options)
+find_package(gzstream REQUIRED)

 include(FindFilesystem)
 list(APPEND CIFPP_REQUIRED_LIBRARIES ${STDCPPFS_LIBRARY})
@@ -175,21 +170,21 @@ write_version_header("LibCIFPP")
 # SymOp data table
 if(CIFPP_RECREATE_SYMOP_DATA)
 	# The tool to create the table
-
 	add_executable(symop-map-generator "${CMAKE_SOURCE_DIR}/tools/symop-map-generator.cpp")

 	target_link_libraries(symop-map-generator Threads::Threads ${Boost_LIBRARIES} ${CIFPP_REQUIRED_LIBRARIES})
+
 	if(Boost_INCLUDE_DIR)
 		target_include_directories(symop-map-generator PUBLIC ${Boost_INCLUDE_DIR})
 	endif()

 	set($ENV{CLIBD} ${CLIBD})
-	
+
 	add_custom_command(
 		OUTPUT ${CMAKE_SOURCE_DIR}/src/SymOpTable_data.hpp
 		COMMAND $<TARGET_FILE:symop-map-generator> ${CLIBD}/syminfo.lib ${CMAKE_SOURCE_DIR}/src/SymOpTable_data.hpp
-		)
-	
+	)
+
 	add_custom_target(
 		OUTPUT ${CMAKE_SOURCE_DIR}/src/SymOpTable_data.hpp
 		DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib"
@@ -197,8 +192,7 @@ if(CIFPP_RECREATE_SYMOP_DATA)
 endif()

 # Sources
-
-set(project_sources 
+set(project_sources
 	${PROJECT_SOURCE_DIR}/src/AtomType.cpp
 	${PROJECT_SOURCE_DIR}/src/BondMap.cpp
 	${PROJECT_SOURCE_DIR}/src/Cif++.cpp
@@ -224,7 +218,7 @@ set(project_sources
 	${PROJECT_SOURCE_DIR}/src/v2/validate.cpp
 )

-set(project_headers 
+set(project_headers
 	${PROJECT_SOURCE_DIR}/include/cif++/AtomType.hpp
 	${PROJECT_SOURCE_DIR}/include/cif++/BondMap.hpp
 	${PROJECT_SOURCE_DIR}/include/cif++/Cif++.hpp
@@ -249,7 +243,7 @@ target_include_directories(cifpp
 	PUBLIC
 	"$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>"
 	"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
-	${Boost_INCLUDE_DIR}
+	${Boost_INCLUDE_DIR} ${gzstream_INCLUDE_DIR}
 )

 target_include_directories(cifpp
@@ -257,21 +251,21 @@ target_include_directories(cifpp
 	${CMAKE_BINARY_DIR}
 )

-target_link_libraries(cifpp PUBLIC Threads::Threads Boost::regex Boost::iostreams ${CIFPP_REQUIRED_LIBRARIES})
-# target_link_libraries(cifpp PRIVATE)
+target_link_libraries(cifpp PUBLIC Threads::Threads Boost::regex gzstream::gzstream ${CIFPP_REQUIRED_LIBRARIES})

-if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
-    target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
-endif (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
+# target_link_libraries(cifpp PRIVATE)
+if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
+	target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
+endif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")

 option(CIFPP_DOWNLOAD_CCD "Download the CCD file components.cif during installation" OFF)
+
 if(CIFPP_DOWNLOAD_CCD)
 	# download the components.cif file from CCD
 	set(COMPONENTS_CIF ${PROJECT_SOURCE_DIR}/data/components.cif)

-	if (NOT EXISTS ${COMPONENTS_CIF})
-
-		if (NOT EXISTS ${PROJECT_SOURCE_DIR}/data)
+	if(NOT EXISTS ${COMPONENTS_CIF})
+		if(NOT EXISTS ${PROJECT_SOURCE_DIR}/data)
 			file(MAKE_DIRECTORY ${PROJECT_SOURCE_DIR}/data/)
 		endif()

@@ -301,8 +295,8 @@ endif()
 generate_export_header(cifpp
 	EXPORT_FILE_NAME cif++/Cif++Export.hpp)

-set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} )
-set(LIBRARY_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} )
+set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR})
+set(LIBRARY_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR})
 set(SHARE_INSTALL_DIR ${CMAKE_INSTALL_DATADIR}/libcifpp)

 set(CIFPP_DATA_DIR "${CMAKE_INSTALL_PREFIX}/${SHARE_INSTALL_DIR}" CACHE STRING "The directory containing the provided data files")
@@ -310,7 +304,6 @@ set(CIFPP_DATA_DIR "${CMAKE_INSTALL_PREFIX}/${SHARE_INSTALL_DIR}" CACHE STRING "
 target_compile_definitions(cifpp PUBLIC DATA_DIR="${CIFPP_DATA_DIR}")

 # Install rules
-
 install(TARGETS cifpp
 	EXPORT cifppTargets
 	ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
@@ -357,8 +350,8 @@ configure_package_config_file(Config.cmake.in
 )

 install(FILES
-		"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfig.cmake"
-		"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfigVersion.cmake"
+	"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfig.cmake"
+	"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfigVersion.cmake"
 	DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
 	COMPONENT Devel
 )
@@ -370,21 +363,20 @@ set_target_properties(cifpp PROPERTIES
 	INTERFACE_cifpp_MAJOR_VERSION ${cifpp_MAJOR_VERSION})

 set_property(TARGET cifpp APPEND PROPERTY
-  COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION
+	COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION
 )

 write_basic_package_version_file(
-  "${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfigVersion.cmake"
-  VERSION ${PROJECT_VERSION}
-  COMPATIBILITY AnyNewerVersion
+	"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfigVersion.cmake"
+	VERSION ${PROJECT_VERSION}
+	COMPATIBILITY AnyNewerVersion
 )

 # pkgconfig support
-
-set(prefix      ${CMAKE_INSTALL_PREFIX})
+set(prefix ${CMAKE_INSTALL_PREFIX})
 set(exec_prefix ${CMAKE_INSTALL_PREFIX})
-set(libdir      ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
-set(includedir  ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})
+set(libdir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
+set(includedir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})

 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libcifpp.pc.in
 	${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc.in @ONLY)
@@ -393,12 +385,11 @@ file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc
 install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)

 # Unit tests
-
 option(CIFPP_BUILD_TESTS "Build test exectuables" OFF)

 if(CIFPP_BUILD_TESTS)
-
 	list(APPEND CIFPP_tests
+
 		# pdb2cif
 		rename-compound
 		structure
@@ -414,10 +405,10 @@ if(CIFPP_BUILD_TESTS)

 		target_include_directories(${CIFPP_TEST} PRIVATE
 			${CMAKE_CURRENT_SOURCE_DIR}/include
-			${CMAKE_CURRENT_BINARY_DIR}  # for config.h
+			${CMAKE_CURRENT_BINARY_DIR} # for config.h
 		)

-		target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp )
+		target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp)

 		if(CIFPP_USE_RSRC)
 			mrc_target_resources(${CIFPP_TEST} ${CMAKE_SOURCE_DIR}/rsrc/mmcif_pdbx_v50.dic)
@@ -436,14 +427,12 @@ if(CIFPP_BUILD_TESTS)

 		add_test(NAME ${CIFPP_TEST}
 			COMMAND $<TARGET_FILE:${CIFPP_TEST}> -- ${CMAKE_SOURCE_DIR}/test)
-
 	endforeach()
 endif()

 message("Will install in ${CMAKE_INSTALL_PREFIX}")

 # Optionally install the update scripts for CCD and dictionary files
-
 if(CIFPP_INSTALL_UPDATE_SCRIPT)
 	set(CIFPP_CRON_DIR "$ENV{DESTDIR}/etc/cron.weekly")

@@ -468,4 +457,3 @@ if(CIFPP_INSTALL_UPDATE_SCRIPT)

 	target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
 endif()
-
--- a/Config.cmake.in
+++ b/Config.cmake.in
@@ -2,10 +2,8 @@

 include(CMakeFindDependencyMacro)
 find_dependency(Threads)
-find_dependency(Boost 1.70.0 REQUIRED COMPONENTS system iostreams regex)
-if(NOT WIN32)
-find_dependency(ZLIB)
-endif()
+find_dependency(Boost 1.70.0 REQUIRED COMPONENTS system regex)
+find_dependency(gzstream)

 INCLUDE("${CMAKE_CURRENT_LIST_DIR}/cifppTargets.cmake")


--- a/include/cif++/CifUtils.hpp
+++ b/include/cif++/CifUtils.hpp
@@ -72,7 +72,124 @@ bool iequals(const char *a, const char *b);
 int icompare(const char *a, const char *b);

 void toLower(std::string &s);
-std::string toLowerCopy(const std::string &s);
+std::string toLowerCopy(std::string_view s);
+
+void toUpper(std::string &s);
+// std::string toUpperCopy(const std::string &s);
+
+template <typename IterType>
+std::string join(IterType b, IterType e, std::string_view sep)
+{
+	std::ostringstream s;
+
+	if (b != e)
+	{
+		auto ai = b;
+		auto ni = std::next(ai);
+
+		for (;;)
+		{
+			s << *ai;
+			ai = ni;
+			ni = std::next(ai);
+
+			if (ni == e)
+				break;
+
+			s << sep;
+		}
+	}
+
+	return s.str();
+}
+
+template <typename V>
+std::string join(const V &arr, std::string_view sep)
+{
+	return join(arr.begin(), arr.end(), sep);
+}
+
+template<typename StringType = std::string_view>
+std::vector<StringType> split(std::string_view s, std::string_view separators, bool suppress_empty = false)
+{
+	std::vector<StringType> result;
+
+	auto b = s.begin();
+	auto e = b;
+
+	while (e != s.end())
+	{
+		if (separators.find(*e) != std::string_view::npos)
+		{
+			if (e > b or not suppress_empty)
+				result.emplace_back(b, e - b);
+			b = e = e + 1;
+			continue;
+		}
+
+		++e;
+	}
+
+	if (e > b or not suppress_empty)
+		result.emplace_back(b, e - b);
+
+	return result;
+}
+
+void replace_all(std::string &s, std::string_view what, std::string_view with = {});
+
+#if defined(__cpp_lib_starts_ends_with)
+
+inline bool starts_with(std::string s, std::string_view with)
+{
+	return s.starts_with(with);
+}
+
+inline bool ends_with(std::string_view s, std::string_view with)
+{
+	return s.ends_with(with);
+}
+
+#else
+
+inline bool starts_with(std::string s, std::string_view with)
+{
+	return s.compare(0, with.length(), with) == 0;
+}
+
+inline bool ends_with(std::string_view s, std::string_view with)
+{
+	return s.length() >= with.length() and s.compare(s.length() - with.length(), with.length(), with) == 0;
+}
+
+#endif
+
+#if defined(__cpp_lib_string_contains)
+
+inline bool contains(std::string_view s, std::string_view q)
+{
+	return s.contains(q);
+}
+
+#else
+
+inline bool contains(std::string_view s, std::string_view q)
+{
+	return s.find(q) != std::string_view::npos;
+}
+
+#endif
+
+bool icontains(std::string_view s, std::string_view q);
+
+void trim_left(std::string &s);
+void trim_right(std::string &s);
+void trim(std::string &s);
+
+std::string trim_left_copy(std::string_view s);
+std::string trim_right_copy(std::string_view s);
+std::string trim_copy(std::string_view s);
+

 // To make life easier, we also define iless and iset using iequals


--- a/include/cif++/Secondary.hpp
+++ b/include/cif++/Secondary.hpp
@@ -28,6 +28,8 @@

 #pragma once

+#include <vector>
+
 namespace mmcif
 {
 	

--- a/include/cif++/Structure.hpp
+++ b/include/cif++/Structure.hpp
@@ -599,8 +599,6 @@ class File : public cif::File
 	void load(const std::filesystem::path &p) override;
 	void save(const std::filesystem::path &p) override;

-	void load(std::istream &is) override;
-
 	using cif::File::load;
 	using cif::File::save;


--- a/include/cif++/v2/category.hpp
+++ b/include/cif++/v2/category.hpp
@@ -394,6 +394,34 @@ class category
 	}

 	// --------------------------------------------------------------------
+	/// \brief generate a new, unique ID. Pass it an ID generating function
+	/// based on a sequence number. This function will be called until the
+	/// result is unique in the context of this category
+	std::string get_unique_id(std::function<std::string(int)> generator = cif::cifIdForNumber);
+	std::string get_unique_id(const std::string &prefix)
+	{
+		return get_unique_id([prefix](int nr)
+			{ return prefix + std::to_string(nr + 1); });
+	}
+
+	// --------------------------------------------------------------------
+
+	/// \brief Rename a single column in the rows that match \a cond to value \a value
+	/// making sure the linked categories are updated according to the link.
+	/// That means, child categories are updated if the links are absolute
+	/// and unique. If they are not, the child category rows are split.
+
+	void update_value(condition &&cond, std::string_view tag, std::string_view value)
+	{
+		auto rs = find(std::move(cond));
+		std::vector<row_handle> rows;
+		std::copy(rs.begin(), rs.end(), std::back_inserter(rows));
+		update_value(rows, tag, value);
+	}
+
+	void update_value(const std::vector<row_handle> &rows, std::string_view tag, std::string_view value);
+
+	// --------------------------------------------------------------------
 	/// \brief Return the index number for \a column_name

 	uint16_t get_column_ix(std::string_view column_name) const
@@ -573,6 +601,8 @@ class category
 		}
 	}

+	row_handle create_copy(row_handle r);
+
 	struct item_column
 	{
 		std::string m_name;
@@ -607,6 +637,7 @@ class category
 	const category_validator *m_cat_validator = nullptr;
 	std::vector<link> m_parent_links, m_child_links;
 	bool m_cascade = true;
+	uint32_t m_last_unique_num = 0;
 	class category_index* m_index = nullptr;
 	row *m_head = nullptr, *m_tail = nullptr;
 };

--- a/include/cif++/v2/item.hpp
+++ b/include/cif++/v2/item.hpp
@@ -452,8 +452,9 @@ struct item_handle::item_value_as<T, std::enable_if_t<std::is_same_v<T, std::str
 {
 	static std::string convert(const item_handle &ref)
 	{
-		std::string_view txt = ref.text();
-		return {txt.data(), txt.size()};
+		if (ref.empty())
+			return {};
+		return {ref.text().data(), ref.text().size()};
 	}

 	static int compare(const item_handle &ref, const std::string &value, bool icase)

--- a/include/cif++/v2/validate.hpp
+++ b/include/cif++/v2/validate.hpp
@@ -31,16 +31,10 @@

 // duh.. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86164
 // #include <regex>
-
-// TODO: get rid of boost::iostreams
-#include <boost/iostreams/filter/gzip.hpp>
-#include <boost/iostreams/filtering_stream.hpp>
 #include <boost/regex.hpp>

 #include <cif++/CifUtils.hpp>

-namespace io = boost::iostreams;
-
 namespace cif::v2
 {


--- a/src/Cif++.cpp
+++ b/src/Cif++.cpp
@@ -38,9 +38,8 @@

 #include <filesystem>

-#include <boost/algorithm/string.hpp>
-#include <boost/iostreams/filter/gzip.hpp>
-#include <boost/iostreams/filtering_stream.hpp>
+#include <gzstream/gzstream.hpp>
+
 #include <boost/logic/tribool.hpp>

 #include <cif++/Cif++.hpp>
@@ -48,8 +47,6 @@
 #include <cif++/CifUtils.hpp>
 #include <cif++/CifValidator.hpp>

-namespace ba = boost::algorithm;
-namespace io = boost::iostreams;
 namespace fs = std::filesystem;

 namespace cif
@@ -622,10 +619,10 @@ bool operator==(const cif::Datablock &dbA, const cif::Datablock &dbB)
 	while (catA_i != catA.end() and catB_i != catB.end())
 	{
 		std::string nA = *catA_i;
-		ba::to_lower(nA);
+		toLower(nA);

 		std::string nB = *catB_i;
-		ba::to_lower(nB);
+		toLower(nB);

 		int d = nA.compare(nB);
 		if (d > 0)
@@ -654,11 +651,11 @@ bool operator==(const cif::Datablock &dbA, const cif::Datablock &dbB)
 		{
 			std::cerr << "compare of datablocks failed" << std::endl;
 			if (not missingA.empty())
-				std::cerr << "Categories missing in A: " << ba::join(missingA, ", ") << std::endl
+				std::cerr << "Categories missing in A: " << cif::join(missingA, ", ") << std::endl
 						  << std::endl;

 			if (not missingB.empty())
-				std::cerr << "Categories missing in B: " << ba::join(missingB, ", ") << std::endl
+				std::cerr << "Categories missing in B: " << cif::join(missingB, ", ") << std::endl
 						  << std::endl;

 			result = false;
@@ -673,10 +670,10 @@ bool operator==(const cif::Datablock &dbA, const cif::Datablock &dbB)
 	while (catA_i != catA.end() and catB_i != catB.end())
 	{
 		std::string nA = *catA_i;
-		ba::to_lower(nA);
+		toLower(nA);

 		std::string nB = *catB_i;
-		ba::to_lower(nB);
+		toLower(nB);

 		int d = nA.compare(nB);
 		if (d > 0)
@@ -2156,7 +2153,7 @@ bool Category::isValid()

 	if (not mandatory.empty())
 	{
-		mValidator->reportError("In Category " + mName + " the following mandatory fields are missing: " + ba::join(mandatory, ", "), false);
+		mValidator->reportError("In Category " + mName + " the following mandatory fields are missing: " + cif::join(mandatory, ", "), false);
 		result = false;
 	}

@@ -2447,12 +2444,12 @@ namespace detail
 	{
 		if (value.find('\n') != std::string::npos or width == 0 or value.length() > 132) // write as text field
 		{
-			ba::replace_all(value, "\n;", "\n\\;");
+			cif::replace_all(value, "\n;", "\n\\;");

 			if (offset > 0)
 				os << std::endl;
 			os << ';' << value;
-			if (not ba::ends_with(value, "\n"))
+			if (not cif::ends_with(value, "\n"))
 				os << std::endl;
 			os << ';' << std::endl;
 			offset = 0;
@@ -3451,48 +3448,50 @@ void File::load(const std::filesystem::path &p)
 {
 	fs::path path(p);

-	std::ifstream inFile(p, std::ios_base::in | std::ios_base::binary);
-	if (not inFile.is_open())
-		throw std::runtime_error("Could not open file: " + path.string());
-
-	io::filtering_stream<io::input> in;
-	std::string ext;
-
 	if (path.extension() == ".gz")
 	{
-		in.push(io::gzip_decompressor());
-		ext = path.stem().extension().string();
-	}
-
-	in.push(inFile);
+		gzstream::ifstream in(p);

-	try
-	{
-		load(in);
+		try
+		{
+			load(in);
+		}
+		catch (const std::exception &ex)
+		{
+			if (cif::VERBOSE >= 0)
+				std::cerr << "Error loading file " << path << std::endl;
+			throw;
+		}
 	}
-	catch (const std::exception &ex)
+	else
 	{
-		if (cif::VERBOSE >= 0)
-			std::cerr << "Error loading file " << path << std::endl;
-		throw;
+		std::ifstream inFile(p, std::ios_base::in | std::ios_base::binary);
+
+		try
+		{
+			load(inFile);
+		}
+		catch (const std::exception &ex)
+		{
+			if (cif::VERBOSE >= 0)
+				std::cerr << "Error loading file " << path << std::endl;
+			throw;
+		}
 	}
 }

 void File::save(const std::filesystem::path &p)
 {
-	fs::path path(p);
-
-	std::ofstream outFile(p, std::ios_base::out | std::ios_base::binary);
-	io::filtering_stream<io::output> out;
-
-	if (path.extension() == ".gz")
+	if (p.extension() == ".gz")
 	{
-		out.push(io::gzip_compressor());
-		path = path.stem();
+		gzstream::ofstream outFile(p);
+		save(outFile);
+	}
+	else
+	{
+		std::ofstream outFile(p, std::ios_base::out | std::ios_base::binary);
+		save(outFile);
 	}
-
-	out.push(outFile);
-	save(out);
 }

 void File::load(std::istream &is)
@@ -3534,14 +3533,16 @@ void File::load(const char *data, std::size_t length)
 		membuf(char *data, size_t length) { this->setg(data, data, data + length); }
 	} buffer(const_cast<char *>(data), length);

-	std::istream is(&buffer);
-
-	io::filtering_stream<io::input> in;
 	if (gzipped)
-		in.push(io::gzip_decompressor());
-	in.push(is);
-
-	load(is);
+	{
+		gzstream::istream is(&buffer);
+		load(is);
+	}
+	else
+	{
+		std::istream is(&buffer);
+		load(is);
+	}
 }

 void File::save(std::ostream &os)

--- a/src/Cif2PDB.cpp
+++ b/src/Cif2PDB.cpp
@@ -30,74 +30,75 @@
 #include <cmath>
 #include <iomanip>

-#include <boost/algorithm/string.hpp>
 #include <boost/format.hpp>

-#include <boost/iostreams/filtering_stream.hpp>
-#include <boost/iostreams/concepts.hpp>    // output_filter
-#include <boost/iostreams/operations.hpp>  // put
-
 #include <cif++/Cif2PDB.hpp>
 #include <cif++/AtomType.hpp>
 #include <cif++/Compound.hpp>

-namespace ba = boost::algorithm;
-namespace io = boost::iostreams;
-
 using cif::Datablock;
 using cif::Category;
 using cif::Row;

 // --------------------------------------------------------------------
-// FillOutLineFilter is used to make sure all lines in PDB files
-// are at filled out with spaces to be as much as 80 characters wide.
-
-class FillOutLineFilter : public io::output_filter
+class FillOutStreamBuf : public std::streambuf
 {
- public:
-	FillOutLineFilter()
-		: mLineCount(0), mColumnCount(0) {}
-	
-    template<typename Sink>
-    bool put(Sink& dest, int c)
-    {
-    	bool result = true;
-    	
-    	if (c == '\n')
-    	{
-    		for (int i = mColumnCount; result and i < 80; ++i)
-    			result = io::put(dest, ' ');
-    	}
-    	
-    	if (result)
-    		result = io::put(dest, c);
-    	
-    	if (result)
-    	{
-    		if (c == '\n')
-    		{
-    			mColumnCount = 0;
-    			++mLineCount;
-    		}
-    		else
-    			++mColumnCount;
-    	}
-    	
-    	return result;
-    }
-
-    template<typename Sink>
-    void close(Sink&)
-    {
-    	mLineCount = 0;
-    	mColumnCount = 0;
-    }
-    
-    int GetLineCount() const 		{ return mLineCount; }
-	
+  public:
+	using base_type = std::streambuf;
+	using int_type = base_type::int_type;
+	using char_type = base_type::char_type;
+	using traits_type = base_type::traits_type;
+
+	FillOutStreamBuf(std::ostream &os)
+		: mOS(os)
+		, mUpstream(os.rdbuf())
+	{
+	}
+
+	~FillOutStreamBuf()
+	{
+		mOS.rdbuf(mUpstream);
+	}
+
+	virtual int_type
+	overflow(int_type ic = traits_type::eof())
+	{
+		char ch = traits_type::to_char_type(ic);
+
+		int_type result = ic;
+
+		if (ch == '\n')
+		{
+			for (int i = mColumnCount; result != traits_type::eof() and i < 80; ++i)
+				result = mUpstream->sputc(' ');
+		}
+
+		if (result != traits_type::eof())
+			result = mUpstream->sputc(ch);
+
+		if (result != traits_type::eof())
+		{
+			if (ch == '\n')
+			{
+				mColumnCount = 0;
+				++mLineCount;
+			}
+			else
+				++mColumnCount;
+		}
+
+		return result;
+	}
+
+	std::streambuf *getUpstream() const { return mUpstream; }
+
+	int GetLineCount() const { return mLineCount; }
+
  private:
-	int	mLineCount;
-	int mColumnCount;
+	std::ostream &mOS;
+	std::streambuf *mUpstream;
+	int mLineCount = 0;
+	int mColumnCount = 0;
 };

 // --------------------------------------------------------------------
@@ -198,8 +199,8 @@ std::string cifSoftware(const Datablock& db, SoftwareType sw)
 			result = r["name"].as<std::string>() + " " + r["version"].as<std::string>();
 		}

-		ba::trim(result);
-		ba::to_upper(result);
+		cif::trim(result);
+		cif::toUpper(result);
 		
 		if (result.empty())
 			result = "NULL";
@@ -249,7 +250,7 @@ size_t WriteContinuedLine(std::ostream& pdbFile, std::string header, int& count,

 	for (auto& line: lines)
 	{
-		// ba::to_upper(line);
+		// cif::toUpper(line);

 		pdbFile << header;
 		
@@ -302,13 +303,13 @@ size_t WriteCitation(std::ostream& pdbFile, const Datablock& db, Row r, int refe
 		authors.push_back(cif2pdbAuth(r1["name"].as<std::string>()));

 	if (not authors.empty())
-		result += WriteOneContinuedLine(pdbFile, s1 + "AUTH", 2, ba::join(authors, ","), 19);
+		result += WriteOneContinuedLine(pdbFile, s1 + "AUTH", 2, cif::join(authors, ","), 19);
 	
 	result += WriteOneContinuedLine(pdbFile, s1 + "TITL", 2, title, 19);
 	
 	if (not pubname.empty())
 	{
-		ba::to_upper(pubname);
+		cif::toUpper(pubname);
 	
 		const std::string kRefHeader = s1 + "REF %2.2d %-28.28s  %2.2s%4.4d %5.5d %4.4d";
 		pdbFile << (boost::format(kRefHeader)
@@ -410,7 +411,7 @@ void WriteHeaderLines(std::ostream& pdbFile, const Datablock& db)
 	for (auto r: db["struct"])
 	{
 		std::string title = r["title"].as<std::string>();
-		ba::trim(title);
+		cif::trim(title);
 		WriteOneContinuedLine(pdbFile, "TITLE   ", 2, title);
 		break;
 	}
@@ -438,7 +439,7 @@ void WriteHeaderLines(std::ostream& pdbFile, const Datablock& db)
 		if (not poly.empty())
 		{
 			std::string chains = poly.front()["pdbx_strand_id"].as<std::string>();
-			ba::replace_all(chains, ",", ", ");
+			cif::replace_all(chains, ",", ", ");
 			cmpnd.push_back("CHAIN: " + chains);
 		}

@@ -469,7 +470,7 @@ void WriteHeaderLines(std::ostream& pdbFile, const Datablock& db)
 			cmpnd.push_back("OTHER_DETAILS: " + details);
 	}

-	WriteOneContinuedLine(pdbFile, "COMPND ", 3, ba::join(cmpnd, ";\n"));
+	WriteOneContinuedLine(pdbFile, "COMPND ", 3, cif::join(cmpnd, ";\n"));

 	// SOURCE
 	
@@ -550,7 +551,7 @@ void WriteHeaderLines(std::ostream& pdbFile, const Datablock& db)
 		}
 	}

-	WriteOneContinuedLine(pdbFile, "SOURCE ", 3, ba::join(source, ";\n"));
+	WriteOneContinuedLine(pdbFile, "SOURCE ", 3, cif::join(source, ";\n"));
 	
 	// KEYWDS
 	
@@ -575,7 +576,7 @@ void WriteHeaderLines(std::ostream& pdbFile, const Datablock& db)
 		for (auto r: dbexpt)
 			method.push_back(r["method"].as<std::string>());
 		if (not method.empty())
-			WriteOneContinuedLine(pdbFile, "EXPDTA  ", 2, ba::join(method, "; "));
+			WriteOneContinuedLine(pdbFile, "EXPDTA  ", 2, cif::join(method, "; "));
 	}
 	
 	// NUMMDL
@@ -589,7 +590,7 @@ void WriteHeaderLines(std::ostream& pdbFile, const Datablock& db)
 	for (auto r: db["audit_author"])
 		authors.push_back(cif2pdbAuth(r["name"].as<std::string>()));
 	if (not authors.empty())
-		WriteOneContinuedLine(pdbFile, "AUTHOR  ", 2, ba::join(authors, ","));
+		WriteOneContinuedLine(pdbFile, "AUTHOR  ", 2, cif::join(authors, ","));
 }

 void WriteTitle(std::ostream& pdbFile, const Datablock& db)
@@ -1378,7 +1379,7 @@ void WriteRemark3Refmac(std::ostream& pdbFile, const Datablock& db)
 				
 				pdbFile << RM3("") << std::endl
 						<< RM3(" NCS GROUP NUMBER               : ") << ens_id << std::endl
-						<< RM3("    CHAIN NAMES                    : ") << ba::join(chains, " ") << std::endl
+						<< RM3("    CHAIN NAMES                    : ") << cif::join(chains, " ") << std::endl
 						<< RM3("    NUMBER OF COMPONENTS NCS GROUP : ") << component_ids.size() << std::endl
 						<< RM3("      COMPONENT C  SSSEQI  TO  C   SSSEQI   CODE") << std::endl;
 				
@@ -1397,12 +1398,12 @@ void WriteRemark3Refmac(std::ostream& pdbFile, const Datablock& db)
 				for (auto l: db["refine_ls_restr_ncs"].find(cif::Key("pdbx_ens_id") == ens_id))
 				{
 					std::string type = l["pdbx_type"].as<std::string>();
-					ba::to_upper(type);
+					cif::toUpper(type);
 					
 					std::string unit;
-					if (ba::ends_with(type, "POSITIONAL"))
+					if (cif::ends_with(type, "POSITIONAL"))
 						unit = "    (A): ";
-					else if (ba::ends_with(type, "THERMAL"))
+					else if (cif::ends_with(type, "THERMAL"))
 						unit = " (A**2): ";
 					else
 						unit = "       : ";
@@ -1729,7 +1730,7 @@ void WriteRemark3Phenix(std::ostream& pdbFile, const Datablock& db)
 //				
 //				pdbFile << RM3("") << std::endl
 //						<< RM3(" NCS GROUP NUMBER               : ") << ens_id << std::endl
-//						<< RM3("    CHAIN NAMES                    : ") << ba::join(chains, " ") << std::endl
+//						<< RM3("    CHAIN NAMES                    : ") << cif::join(chains, " ") << std::endl
 //						<< RM3("    NUMBER OF COMPONENTS NCS GROUP : ") << component_ids.size() << std::endl
 //						<< RM3("      COMPONENT C  SSSEQI  TO  C   SSSEQI   CODE") << std::endl;
 //				
@@ -1748,12 +1749,12 @@ void WriteRemark3Phenix(std::ostream& pdbFile, const Datablock& db)
 //				for (auto l: db["refine_ls_restr_ncs"].find(cif::Key("pdbx_ens_id") == ens_id))
 //				{
 //					std::string type = l["pdbx_type"];
-//					ba::to_upper(type);
+//					cif::toUpper(type);
 //					
 //					std::string unit;
-//					if (ba::ends_with(type, "POSITIONAL"))
+//					if (cif::ends_with(type, "POSITIONAL"))
 //						unit = "    (A): ";
-//					else if (ba::ends_with(type, "THERMAL"))
+//					else if (cif::ends_with(type, "THERMAL"))
 //						unit = " (A**2): ";
 //					else
 //						unit = "       : ";
@@ -2369,7 +2370,7 @@ void WriteRemark280(std::ostream& pdbFile, const Datablock& db)
 				std::string v = exptl_crystal_grow[c].as<std::string>();
 				if (not v.empty())
 				{
-					ba::to_upper(v);
+					cif::toUpper(v);
 					
 					switch (i)
 					{
@@ -2385,7 +2386,7 @@ void WriteRemark280(std::ostream& pdbFile, const Datablock& db)
 				}
 			}

-			WriteOneContinuedLine(pdbFile, "REMARK 280", 0, "CRYSTALLIZATION CONDITIONS: " + (conditions.empty() ? "NULL" : ba::join(conditions, ", ")));
+			WriteOneContinuedLine(pdbFile, "REMARK 280", 0, "CRYSTALLIZATION CONDITIONS: " + (conditions.empty() ? "NULL" : cif::join(conditions, ", ")));

 			break;
 		}
@@ -2420,7 +2421,7 @@ void WriteRemark350(std::ostream& pdbFile, const Datablock& db)
 	// write out the mandatory REMARK 300 first
 	
 	pdbFile << RM<300>("") << std::endl
-			<< RM<300>("BIOMOLECULE: ") << ba::join(biomolecules, ", ") << std::endl
+			<< RM<300>("BIOMOLECULE: ") << cif::join(biomolecules, ", ") << std::endl
 			<< RM<300>("SEE REMARK 350 FOR THE AUTHOR PROVIDED AND/OR PROGRAM") << std::endl
 			<< RM<300>("GENERATED ASSEMBLY INFORMATION FOR THE STRUCTURE IN") << std::endl
 			<< RM<300>("THIS ENTRY. THE REMARK MAY ALSO PROVIDE INFORMATION ON") << std::endl
@@ -2451,7 +2452,7 @@ void WriteRemark350(std::ostream& pdbFile, const Datablock& db)
 		pdbFile << RM("") << std::endl
 			 	<< RM("BIOMOLECULE: ") << id << std::endl;
 		
-		ba::to_upper(oligomer);
+		cif::toUpper(oligomer);
 		
 		if (detail == "author_defined_assembly" or detail == "author_and_software_defined_assembly")
 			pdbFile << RM("AUTHOR DETERMINED BIOLOGICAL UNIT: ") << oligomer << std::endl;
@@ -2479,20 +2480,16 @@ void WriteRemark350(std::ostream& pdbFile, const Datablock& db)
 		
 		auto gen = db["pdbx_struct_assembly_gen"][cif::Key("assembly_id") == id];
 		
-		std::vector<std::string> asyms;
 		std::string asym_id_list, oper_id_list;
 		cif::tie(asym_id_list, oper_id_list) = gen.get("asym_id_list", "oper_expression");
 		
-		ba::split(asyms, asym_id_list, ba::is_any_of(","));
+		auto asyms = cif::split<std::string>(asym_id_list, ",");
 		
 		std::vector<std::string> chains = MapAsymIDs2ChainIDs(asyms, db);
-		pdbFile << RM("APPLY THE FOLLOWING TO CHAINS: ") << ba::join(chains, ", ") << std::endl; 
-		
+		pdbFile << RM("APPLY THE FOLLOWING TO CHAINS: ") << cif::join(chains, ", ") << std::endl; 

-		for (auto i = make_split_iterator(oper_id_list, ba::token_finder(ba::is_any_of(","), ba::token_compress_on)); not i.eof(); ++i)
+		for (auto oper_id : cif::split<std::string>(oper_id_list, ",", true))
 		{
-			std::string oper_id{ i->begin(), i->end() };
-
 			auto r = db["pdbx_struct_oper_list"][cif::Key("id") == oper_id];
 			
 			pdbFile << RM("  BIOMT1 ", -3) <<	Fs(r, "id")
@@ -2666,7 +2663,7 @@ void WriteRemark800(std::ostream& pdbFile, const Datablock& db)
 		std::string ident, code, desc;
 		cif::tie(ident, code, desc) = r.get("id", "pdbx_evidence_code", "details");

-		ba::to_upper(code);
+		cif::toUpper(code);

 		for (auto l: { "SITE_IDENTIFIER: " + ident, "EVIDENCE_CODE: " + code, "SITE_DESCRIPTION: " + desc })
 		{
@@ -2780,7 +2777,7 @@ int WritePrimaryStructure(std::ostream& pdbFile, const Datablock& db)
 		 			"pdbx_seq_db_name", "pdbx_seq_db_accession_code", "db_mon_id", "pdbx_seq_db_seq_num",
 		 			"details");
 		
-		ba::to_upper(conflict);
+		cif::toUpper(conflict);
 		
 		pdbFile << (boost::format(
 					"SEQADV %4.4s %3.3s %1.1s %4.4s%1.1s %-4.4s %-9.9s %3.3s %5.5s %-21.21s")
@@ -2823,14 +2820,12 @@ int WritePrimaryStructure(std::ostream& pdbFile, const Datablock& db)
 			if (t > 13)
 				t = 13;
 			
-			auto r = boost::make_iterator_range(seq.begin(), seq.begin() + t);
-			
 			pdbFile << (boost::format(
 						"SEQRES %3.3d %c %4.4d  %-51.51s          ")
 						% n++
 						% chainID
 						% seqresl[chainID]
-						% ba::join(r, " ")).str()
+						% cif::join(seq.begin(), seq.begin() + t, " ")).str()
 						<< std::endl;
 			
 			++numSeq;
@@ -2966,7 +2961,7 @@ int WriteHeterogen(std::ostream& pdbFile, const Datablock& db)
 	}
 	
 	if (cif::VERBOSE > 1 and not missingHetNames.empty())
-		std::cerr << "Missing het name(s) for " << ba::join(missingHetNames, ", ") << std::endl;
+		std::cerr << "Missing het name(s) for " << cif::join(missingHetNames, ", ") << std::endl;
 	
 	boost::format kHET("HET    %3.3s  %1.1s%4.4d%1.1s  %5.5d");
 	for (auto h: hets)
@@ -2985,7 +2980,7 @@ int WriteHeterogen(std::ostream& pdbFile, const Datablock& db)
 		if (id == water_comp_id)
 			continue;

-		ba::to_upper(name);
+		cif::toUpper(name);
 		
 		int c = 1;
 		
@@ -3723,46 +3718,35 @@ std::tuple<int,int> WriteCoordinate(std::ostream& pdbFile, const Datablock& db)

 void WritePDBFile(std::ostream& pdbFile, const Datablock &db)
 {
-	io::filtering_ostream out;
-	out.push(FillOutLineFilter());
-	out.push(pdbFile);
+	FillOutStreamBuf fb(pdbFile);

-	auto filter = out.component<FillOutLineFilter>(0);
-	assert(filter);
-	
 	int numRemark = 0, numHet = 0, numHelix = 0, numSheet = 0, numTurn = 0, numSite = 0, numXform = 0, numCoord = 0, numTer = 0, numConect = 0, numSeq = 0;
 	
-								WriteTitle(out, db);
+								WriteTitle(pdbFile, db);
 	
-	int savedLineCount = filter->GetLineCount();
-//	numRemark = 				WriteRemarks(out, db);
-								WriteRemarks(out, db);
-	numRemark = filter->GetLineCount() - savedLineCount;
+	int savedLineCount = fb.GetLineCount();
+//	numRemark = 				WriteRemarks(pdbFile, db);
+								WriteRemarks(pdbFile, db);
+	numRemark = fb.GetLineCount() - savedLineCount;

-	numSeq = 					WritePrimaryStructure(out, db);
-	numHet = 					WriteHeterogen(out, db);
-	std::tie(numHelix, numSheet) =	WriteSecondaryStructure(out, db);
-								WriteConnectivity(out, db);
-	numSite =					WriteMiscellaneousFeatures(out, db);
-								WriteCrystallographic(out, db);
-	numXform =					WriteCoordinateTransformation(out, db);
-	std::tie(numCoord, numTer) =		WriteCoordinate(out, db);
+	numSeq = 					WritePrimaryStructure(pdbFile, db);
+	numHet = 					WriteHeterogen(pdbFile, db);
+	std::tie(numHelix, numSheet) =	WriteSecondaryStructure(pdbFile, db);
+								WriteConnectivity(pdbFile, db);
+	numSite =					WriteMiscellaneousFeatures(pdbFile, db);
+								WriteCrystallographic(pdbFile, db);
+	numXform =					WriteCoordinateTransformation(pdbFile, db);
+	std::tie(numCoord, numTer) =		WriteCoordinate(pdbFile, db);

 	boost::format kMASTER("MASTER    %5.5d    0%5.5d%5.5d%5.5d%5.5d%5.5d%5.5d%5.5d%5.5d%5.5d%5.5d");
 	
-	out	<< (kMASTER % numRemark % numHet % numHelix % numSheet % numTurn % numSite % numXform % numCoord % numTer % numConect % numSeq) << std::endl
-		<< "END" << std::endl;
+	pdbFile	<< (kMASTER % numRemark % numHet % numHelix % numSheet % numTurn % numSite % numXform % numCoord % numTer % numConect % numSeq) << std::endl
+			<< "END" << std::endl;
 }

 void WritePDBHeaderLines(std::ostream& os, const Datablock &db)
 {
-	// io::filtering_ostream out;
-	// out.push(FillOutLineFilter());
-	// out.push(os);
-
-	// auto filter = out.component<FillOutLineFilter>(0);
-	// assert(filter);
-
+	FillOutStreamBuf fb(os);
 	WriteHeaderLines(os, db);
 }

@@ -3847,7 +3831,7 @@ std::string GetPDBCOMPNDLine(const Datablock &db, std::string::size_type truncat
 		if (not poly.empty())
 		{
 			std::string chains = poly.front()["pdbx_strand_id"].as<std::string>();
-			ba::replace_all(chains, ",", ", ");
+			cif::replace_all(chains, ",", ", ");
 			cmpnd.push_back("CHAIN: " + chains);
 		}

@@ -3878,7 +3862,7 @@ std::string GetPDBCOMPNDLine(const Datablock &db, std::string::size_type truncat
 			cmpnd.push_back("OTHER_DETAILS: " + details);
 	}

-	return FixStringLength("COMPND    " + ba::join(cmpnd, "; "), truncate_at);
+	return FixStringLength("COMPND    " + cif::join(cmpnd, "; "), truncate_at);
 }

 std::string GetPDBSOURCELine(const Datablock &db, std::string::size_type truncate_at)
@@ -3961,7 +3945,7 @@ std::string GetPDBSOURCELine(const Datablock &db, std::string::size_type truncat
 		}
 	}

-	return FixStringLength("SOURCE    " + ba::join(source, "; "), truncate_at);
+	return FixStringLength("SOURCE    " + cif::join(source, "; "), truncate_at);
 }

 std::string GetPDBAUTHORLine(const Datablock &db, std::string::size_type truncate_at)
@@ -3971,5 +3955,5 @@ std::string GetPDBAUTHORLine(const Datablock &db, std::string::size_type truncat
 	for (auto r: db["audit_author"])
 		author.push_back(cif2pdbAuth(r["name"].as<std::string>()));

-	return FixStringLength("AUTHOR    " + ba::join(author, "; "), truncate_at);
+	return FixStringLength("AUTHOR    " + cif::join(author, "; "), truncate_at);
 }
--- a/src/CifParser.cpp
+++ b/src/CifParser.cpp
@@ -26,14 +26,10 @@

 #include <set>

-#include <boost/algorithm/string.hpp>
-
 #include <cif++/Cif++.hpp>
 #include <cif++/CifParser.hpp>
 #include <cif++/CifValidator.hpp>

-namespace ba = boost::algorithm;
-
 extern int VERBOSE;

 namespace cif
@@ -1300,9 +1296,9 @@ bool DictParser::collectItemTypes()
 		std::string code, primitiveCode, construct;
 		cif::tie(code, primitiveCode, construct) = t.get("code", "primitive_code", "construct");

-		ba::replace_all(construct, "\\n", "\n");
-		ba::replace_all(construct, "\\t", "\t");
-		ba::replace_all(construct, "\\\n", "");
+		cif::replace_all(construct, "\\n", "\n");
+		cif::replace_all(construct, "\\t", "\t");
+		cif::replace_all(construct, "\\\n", "");

 		try
 		{

--- a/src/CifUtils.cpp
+++ b/src/CifUtils.cpp
 /*-
 * SPDX-License-Identifier: BSD-2-Clause
- * 
+ *
 * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
- * 
+ *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
- * 
+ *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
- * 
+ *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -25,6 +25,7 @@
 */

 #include <atomic>
+#include <cassert>
 #include <cmath>
 #include <fstream>
 #include <iomanip>
@@ -42,13 +43,10 @@
 #include <termios.h>
 #endif

-#include <boost/algorithm/string.hpp>
-
 #include <cif++/CifUtils.hpp>

 #include "revision.hpp"

-namespace ba = boost::algorithm;
 namespace fs = std::filesystem;

 // --------------------------------------------------------------------
@@ -96,7 +94,7 @@ bool iequals(std::string_view a, std::string_view b)
 	bool result = a.length() == b.length();
 	for (auto ai = a.begin(), bi = b.begin(); result and ai != a.end(); ++ai, ++bi)
 		result = kCharToLowerMap[uint8_t(*ai)] == kCharToLowerMap[uint8_t(*bi)];
-		// result = tolower(*ai) == tolower(*bi);
+	// result = tolower(*ai) == tolower(*bi);
 	return result;
 }

@@ -152,7 +150,7 @@ void toLower(std::string &s)
 		c = tolower(c);
 }

-std::string toLowerCopy(const std::string &s)
+std::string toLowerCopy(std::string_view s)
 {
 	std::string result(s);
 	for (auto &c : result)
@@ -160,6 +158,91 @@ std::string toLowerCopy(const std::string &s)
 	return result;
 }

+void toUpper(std::string &s)
+{
+	for (auto &c : s)
+		c = toupper(c);
+}
+
+void replace_all(std::string &s, std::string_view what, std::string_view with)
+{
+	for (std::string::size_type p = s.find(what); p != std::string::npos; p = s.find(what, p))
+		s.replace(p, what.length(), with);
+}
+
+bool icontains(std::string_view s, std::string_view q)
+{
+	return contains(toLowerCopy(s), toLowerCopy(q));
+}
+
+void trim_right(std::string &s)
+{
+	auto e = s.end();
+	while (e != s.begin())
+	{
+		auto pe = std::prev(e);
+		if (not std::isspace(*pe))
+			break;
+		e = pe;
+	}
+
+	if (e != s.end())
+		s.erase(e, s.end());
+}
+
+std::string trim_right_copy(std::string_view s)
+{
+	auto e = s.end();
+	while (e != s.begin())
+	{
+		auto pe = std::prev(e);
+		if (not std::isspace(*pe))
+			break;
+		e = pe;
+	}
+
+	return {s.begin(), e};
+}
+
+std::string trim_left_copy(std::string_view s)
+{
+	auto b = s.begin();
+	while (b != s.end())
+	{
+		if (not std::isspace(*b))
+			break;
+
+		b = std::next(b);
+	}
+
+	return {b, s.end()};
+}
+
+void trim_left(std::string &s)
+{
+	auto b = s.begin();
+	while (b != s.end())
+	{
+		if (not std::isspace(*b))
+			break;
+
+		b = std::next(b);
+	}
+
+	s.erase(s.begin(), b);
+}
+
+void trim(std::string &s)
+{
+	trim_right(s);
+	trim_left(s);
+}
+
+std::string trim_copy(std::string_view s)
+{
+	return trim_left_copy(trim_right_copy(s));
+}
+
 // --------------------------------------------------------------------

 std::tuple<std::string, std::string> splitTagName(std::string_view tag)
@@ -181,7 +264,7 @@ std::tuple<std::string, std::string> splitTagName(std::string_view tag)
 std::string cifIdForNumber(int number)
 {
 	std::string result;
-	
+
 	if (number >= 26 * 26 * 26)
 		result = 'L' + std::to_string(number);
 	else
@@ -192,17 +275,17 @@ std::string cifIdForNumber(int number)
 			result += char('A' - 1 + v);
 			number %= (26 * 26);
 		}
-		
+
 		if (number >= 26)
 		{
 			int v = number / 26;
 			result += char('A' - 1 + v);
 			number %= 26;
 		}
-		
+
 		result += char('A' + number);
 	}
-	
+
 	assert(not result.empty());
 	return result;
 }
@@ -433,11 +516,8 @@ std::vector<std::string> wrapLine(const std::string &text, size_t width)

 std::vector<std::string> wordWrap(const std::string &text, size_t width)
 {
-	std::vector<std::string> paragraphs;
-	ba::split(paragraphs, text, ba::is_any_of("\n"));
-
 	std::vector<std::string> result;
-	for (auto &p : paragraphs)
+	for (auto p : cif::split<std::string>(text, "\n"))
 	{
 		if (p.empty())
 		{
@@ -482,12 +562,15 @@ std::string GetExecutablePath()

 	// convert from utf16 to utf8
 	std::wstring_convert<std::codecvt_utf8<wchar_t>> conv1;
-    std::string u8str = conv1.to_bytes(ws);
+	std::string u8str = conv1.to_bytes(ws);

 	return u8str;
 }

 #else
+
+#include <limits.h>
+
 uint32_t get_terminal_width()
 {
 	uint32_t result = 80;
@@ -786,9 +869,9 @@ struct rsrc_imp

 #if _MSC_VER

-extern "C" const mrsrc::rsrc_imp* gResourceIndexDefault[1] = {};
-extern "C" const char* gResourceDataDefault[1] = {};
-extern "C" const char* gResourceNameDefault[1] = {};
+extern "C" const mrsrc::rsrc_imp *gResourceIndexDefault[1] = {};
+extern "C" const char *gResourceDataDefault[1] = {};
+extern "C" const char *gResourceNameDefault[1] = {};

 extern "C" const mrsrc::rsrc_imp gResourceIndex[];
 extern "C" const char gResourceData[];
@@ -1250,12 +1333,14 @@ class ResourcePool
 					result.reset(file.release());
 			}
 		}
-		catch (...) {}
+		catch (...)
+		{
+		}

 		return result;
 	}

-	std::map<std::string,std::filesystem::path> mLocalResources;
+	std::map<std::string, std::filesystem::path> mLocalResources;
 	std::deque<fs::path> mDirs;
 };

@@ -1301,7 +1386,7 @@ std::unique_ptr<std::istream> ResourcePool::load(fs::path name)
 			result.reset(new mrsrc::istream(rsrc));
 	}

-	return result;	
+	return result;
 }

 // --------------------------------------------------------------------

--- a/src/CifValidator.cpp
+++ b/src/CifValidator.cpp
@@ -27,17 +27,13 @@
 #include <fstream>
 #include <filesystem>

-#include <boost/algorithm/string.hpp>
-#include <boost/iostreams/filtering_stream.hpp>
-#include <boost/iostreams/filter/gzip.hpp>
+#include <gzstream/gzstream.hpp>

 #include <cif++/Cif++.hpp>
 #include <cif++/CifParser.hpp>
 #include <cif++/CifValidator.hpp>

-namespace ba = boost::algorithm;
 namespace fs = std::filesystem;
-namespace io = boost::iostreams;

 extern int VERBOSE;

@@ -416,15 +412,11 @@ const Validator &ValidatorFactory::operator[](std::string_view dictionary)

 		if (fs::exists(p, ec) and not ec)
 		{
-			std::ifstream file(p, std::ios::binary);
+			gzstream::ifstream file(p);
 			if (not file.is_open())
 				throw std::runtime_error("Could not open dictionary (" + p.string() + ")");

-			io::filtering_stream<io::input> in;
-			in.push(io::gzip_decompressor());
-			in.push(file);
-
-			mValidators.emplace_back(dictionary, in);
+			mValidators.emplace_back(dictionary, file);
 		}
 		else
 			throw std::runtime_error("Dictionary not found or defined (" + dict_name.string() + ")");

--- a/src/Compound.cpp
+++ b/src/Compound.cpp
@@ -29,8 +29,6 @@
 #include <numeric>
 #include <shared_mutex>

-#include <boost/algorithm/string.hpp>
-
 #include <filesystem>
 #include <fstream>

@@ -40,7 +38,6 @@
 #include <cif++/Compound.hpp>
 #include <cif++/Point.hpp>

-namespace ba = boost::algorithm;
 namespace fs = std::filesystem;

 namespace mmcif
@@ -126,7 +123,7 @@ Compound::Compound(cif::Datablock &db)
 		chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge");

 	// The name should not contain newline characters since that triggers validation errors later on
-	ba::replace_all(mName, "\n", "");
+	cif::replace_all(mName, "\n", "");

 	mGroup = "non-polymer";

@@ -286,7 +283,7 @@ class CompoundFactoryImpl : public std::enable_shared_from_this<CompoundFactoryI
 	{
 		std::shared_lock lock(mMutex);

-		ba::to_upper(id);
+		cif::toUpper(id);

 		Compound *result = nullptr;

@@ -554,7 +551,7 @@ CCP4CompoundFactoryImpl::CCP4CompoundFactoryImpl(const fs::path &clibd_mon, std:
 	{
 		if (std::regex_match(group, peptideRx))
 			mKnownPeptides.insert(threeLetterCode);
-		else if (ba::iequals(group, "DNA") or ba::iequals(group, "RNA"))
+		else if (cif::iequals(group, "DNA") or cif::iequals(group, "RNA"))
 			mKnownBases.insert(threeLetterCode);
 	}
 }
@@ -576,10 +573,10 @@ Compound *CCP4CompoundFactoryImpl::create(const std::string &id)
 		cif::tie(name, group, numberAtomsAll, numberAtomsNh) =
 			row.get("name", "group", "number_atoms_all", "number_atoms_nh");

-		fs::path resFile = mCLIBD_MON / ba::to_lower_copy(id.substr(0, 1)) / (id + ".cif");
+		fs::path resFile = mCLIBD_MON / cif::toLowerCopy(id.substr(0, 1)) / (id + ".cif");

 		if (not fs::exists(resFile) and (id == "COM" or id == "CON" or "PRN")) // seriously...
-			resFile = mCLIBD_MON / ba::to_lower_copy(id.substr(0, 1)) / (id + '_' + id + ".cif");
+			resFile = mCLIBD_MON / cif::toLowerCopy(id.substr(0, 1)) / (id + '_' + id + ".cif");

 		if (fs::exists(resFile))
 		{

--- a/src/PDB2Cif.cpp
+++ b/src/PDB2Cif.cpp
@@ -30,7 +30,6 @@
 #include <system_error>
 #include <iomanip>

-#include <boost/algorithm/string.hpp>
 #include <boost/format.hpp>
 #include <boost/numeric/ublas/matrix.hpp>

@@ -42,13 +41,13 @@
 #include <cif++/Point.hpp>
 #include <cif++/Symmetry.hpp>

-namespace ba = boost::algorithm;
-
 using cif::Category;
 using cif::Datablock;
 using cif::iequals;
 using cif::Key;
 using cif::Row;
+using cif::toLower;
+using cif::toLowerCopy;
 using mmcif::CompoundFactory;

 // --------------------------------------------------------------------
@@ -205,7 +204,7 @@ std::string PDBRecord::vS(size_t columnFirst, size_t columnLast)
 	if (columnFirst < mVlen + 7)
 	{
 		result = std::string{mValue + columnFirst - 7, mValue + columnLast - 7 + 1};
-		ba::trim(result);
+		cif::trim(result);
 	}

 	return result;
@@ -429,7 +428,7 @@ std::tuple<std::string, std::string> SpecificationListParser::GetNextSpecificati
 		}
 	}

-	ba::trim(value);
+	cif::trim(value);

 	return std::make_tuple(id, value);
 }
@@ -949,7 +948,7 @@ class PDBFileParser

 	std::string pdb2cifAuth(std::string author)
 	{
-		ba::trim(author);
+		cif::trim(author);

 		const std::regex rx(R"(((?:[A-Z]+\.)+)(.+))");
 		std::smatch m;
@@ -1135,13 +1134,13 @@ void PDBFileParser::PreParseInput(std::istream &is)
 	if (lookahead.back() == '\r')
 		lookahead.pop_back();

-	//	if (ba::starts_with(lookahead, "HEADER") == false)
+	//	if (cif::starts_with(lookahead, "HEADER") == false)
 	//		throw std::runtime_error("This does not look like a PDB file, should start with a HEADER line");

 	auto contNr = [&lookahead](int offset, int len) -> int
 	{
 		std::string cs = lookahead.substr(offset, len);
-		ba::trim(cs);
+		cif::trim(cs);
 		int result;

 		try
@@ -1177,7 +1176,7 @@ void PDBFileParser::PreParseInput(std::istream &is)
 		std::string type = lookahead.substr(0, 6);
 		std::string value;
 		if (lookahead.length() > 6)
-			value = ba::trim_right_copy(lookahead.substr(6));
+			value = cif::trim_right_copy(lookahead.substr(6));

 		uint32_t curLineNr = lineNr;
 		getline(is, lookahead);
@@ -1185,7 +1184,7 @@ void PDBFileParser::PreParseInput(std::istream &is)

 		if (kSupportedRecords.count(type) == 0)
 		{
-			ba::trim(type);
+			cif::trim(type);

 			if (type != "END") // special case
 				dropped.insert(type);
@@ -1207,7 +1206,7 @@ void PDBFileParser::PreParseInput(std::istream &is)
 			int n = 2;
 			while (lookahead.substr(0, 6) == type and contNr(7, 3) == n)
 			{
-				value += ba::trim_right_copy(lookahead.substr(10));
+				value += cif::trim_right_copy(lookahead.substr(10));
 				getline(is, lookahead);
 				++lineNr;
 				++n;
@@ -1219,7 +1218,7 @@ void PDBFileParser::PreParseInput(std::istream &is)
 			value += '\n';
 			while (lookahead.substr(0, 6) == type and contNr(7, 3) == n)
 			{
-				value += ba::trim_right_copy(lookahead.substr(10));
+				value += cif::trim_right_copy(lookahead.substr(10));
 				value += '\n';
 				getline(is, lookahead);
 				++lineNr;
@@ -1245,7 +1244,7 @@ void PDBFileParser::PreParseInput(std::istream &is)
 			int n = 2;
 			while (lookahead.substr(0, 6) == type and contNr(7, 3) == n)
 			{
-				value += ba::trim_right_copy(lookahead.substr(13));
+				value += cif::trim_right_copy(lookahead.substr(13));
 				getline(is, lookahead);
 				++lineNr;
 				++n;
@@ -1266,7 +1265,7 @@ void PDBFileParser::PreParseInput(std::istream &is)
 			int n = 2;
 			while (lookahead.substr(0, 6) == type and contNr(7, 3) == n)
 			{
-				value += ba::trim_copy(lookahead.substr(10));
+				value += cif::trim_copy(lookahead.substr(10));
 				value += '\n';
 				getline(is, lookahead);
 				++lineNr;
@@ -1297,7 +1296,7 @@ void PDBFileParser::PreParseInput(std::istream &is)
 					       stoi(lookahead.substr(7, 3)) == compNr and
 					       contNr(16, 2) == n)
 					{
-						value += ba::trim_right_copy(lookahead.substr(19));
+						value += cif::trim_right_copy(lookahead.substr(19));
 						;
 						getline(is, lookahead);
 						++lineNr;
@@ -1323,7 +1322,7 @@ void PDBFileParser::PreParseInput(std::istream &is)
 			int n = 2;
 			while (lookahead.substr(0, 6) == type and contNr(8, 2) == n)
 			{
-				value += ba::trim_right_copy(lookahead.substr(16));
+				value += cif::trim_right_copy(lookahead.substr(16));
 				;
 				getline(is, lookahead);
 				++lineNr;
@@ -1333,19 +1332,19 @@ void PDBFileParser::PreParseInput(std::istream &is)
 		else if (type == "SITE  ")
 		{
 			std::string siteName = value.substr(5, 3);
-			ba::trim_right(value);
+			cif::trim_right(value);
 			size_t n = value.length() - 12;
 			value += std::string(11 - (n % 11), ' ');

 			while (lookahead.substr(0, 6) == type and lookahead.substr(11, 3) == siteName)
 			{
 				std::string s = lookahead.substr(18);
-				ba::trim_right(s);
+				cif::trim_right(s);
 				s += std::string(11 - (s.length() % 11), ' ');
 				value += s;

 				// TODO: improve this... either use numRes or don't lump together all text
-				//				value += " " + ba::trim_right_copy();
+				//				value += " " + cif::trim_right_copy();
 				getline(is, lookahead);
 				++lineNr;
 			}
@@ -1364,10 +1363,10 @@ void PDBFileParser::PreParseInput(std::istream &is)
 					std::string k = value.substr(4, i - 4);
 					std::string v = value.substr(i + 1);

-					ba::trim(k);
+					cif::trim(k);
 					while (k.find("  ") != std::string::npos)
-						ba::replace_all(k, "  ", " ");
-					ba::trim(v);
+						cif::replace_all(k, "  ", " ");
+					cif::trim(v);

 					if (iequals(v, "NONE") or iequals(v, "N/A") or iequals(v, "NAN"))
 						mRemark200[k] = ".";
@@ -1386,7 +1385,7 @@ void PDBFileParser::PreParseInput(std::istream &is)

 		last = cur;

-		ba::trim(type);
+		cif::trim(type);

 		if (type == "LINK" or type == "LINKR")
 		{
@@ -1421,7 +1420,7 @@ void PDBFileParser::PreParseInput(std::istream &is)
 	if (not dropped.empty())
 	{
 		if (cif::VERBOSE >= 0)
-			std::cerr << "Dropped unsupported records: " << ba::join(dropped, ", ") << std::endl;
+			std::cerr << "Dropped unsupported records: " << cif::join(dropped, ", ") << std::endl;
 	}

 	if (mData == nullptr)
@@ -1456,10 +1455,9 @@ void PDBFileParser::Match(const std::string &expected, bool throwIfMissing)

 std::vector<std::string> PDBFileParser::SplitCSV(const std::string &value)
 {
-	std::vector<std::string> vs;
-	ba::split(vs, value, ba::is_any_of(","));
+	auto vs = cif::split<std::string>(value, ",");
 	for (auto &v : vs)
-		ba::trim(v);
+		cif::trim(v);
 	return vs;
 }

@@ -1484,12 +1482,12 @@ void PDBFileParser::ParseTitle()
 		keywords = vS(11, 50);
 		mOriginalDate = pdb2cifDate(vS(51, 59));

-		ba::trim(keywords);
+		cif::trim(keywords);

 		GetNextRecord();
 	}

-	ba::trim(mStructureID);
+	cif::trim(mStructureID);
 	if (mStructureID.empty())
 		mStructureID = "nohd";

@@ -1514,12 +1512,12 @@ void PDBFileParser::ParseTitle()
 		cat = getCategory("pdbx_database_PDB_obs");

 		std::string value = mRec->vS(32);
-		for (auto i = make_split_iterator(value, ba::token_finder(ba::is_any_of(" "), ba::token_compress_on)); not i.eof(); ++i)
+		for (auto i : cif::split<std::string>(value, " ", true))
 		{
 			cat->emplace({{"id", "OBSLTE"},
 			              {"date", date},
 			              {"replace_pdb_id", old},
-			              {"pdb_id", std::string(i->begin(), i->end())}});
+			              {"pdb_id", i}});
 		}

 		GetNextRecord();
@@ -1596,12 +1594,9 @@ void PDBFileParser::ParseTitle()
 			}
 			else if (key == "CHAIN")
 			{
-				std::vector<std::string> chains;
-
-				ba::split(chains, val, ba::is_any_of(","));
-				for (auto &c : chains)
+				for (auto c : cif::split<std::string>(val, ","))
 				{
-					ba::trim(c);
+					cif::trim(c);
 					mCompounds.back().mChains.insert(c[0]);
 				}
 			}
@@ -1698,16 +1693,14 @@ void PDBFileParser::ParseTitle()

 		cat = getCategory("exptl");

-		std::vector<std::string> crystals;
-		ba::split(crystals, mRemark200["NUMBER OF CRYSTALS USED"], ba::is_any_of("; "));
+		auto crystals = cif::split<std::string>(mRemark200["NUMBER OF CRYSTALS USED"], "; ");
 		if (crystals.empty())
 			crystals.push_back("");
 		auto ci = crystals.begin();

-		for (auto si = ba::make_split_iterator(mExpMethod, ba::token_finder(ba::is_any_of(";"), ba::token_compress_on)); not si.eof(); ++si, ++ci)
+		for (auto expMethod : cif::split<std::string>(mExpMethod, ";"))
 		{
-			std::string expMethod(si->begin(), si->end());
-			ba::trim(expMethod);
+			cif::trim(expMethod);

 			if (expMethod.empty())
 				continue;
@@ -1743,10 +1736,8 @@ void PDBFileParser::ParseTitle()
 		cat = getCategory("audit_author");

 		value = {mRec->vS(11)};
-		for (auto si = ba::make_split_iterator(value, ba::token_finder(ba::is_any_of(","), ba::token_compress_on)); not si.eof(); ++si)
+		for (auto author : cif::split<std::string>(value, ",", true))
 		{
-			std::string author(si->begin(), si->end());
-
 			cat->emplace({{"name", pdb2cifAuth(author)},
 			              {"pdbx_ordinal", n}});
 			++n;
@@ -1788,7 +1779,7 @@ void PDBFileParser::ParseTitle()

 		revdats.push_back({revNum, date, modType == 0 ? mOriginalDate : "", modID, modType});

-		ba::split(revdats.back().types, detail, ba::is_any_of(" "));
+		revdats.back().types = cif::split<std::string>(detail, " ");

 		if (firstRevDat)
 		{
@@ -1849,7 +1840,7 @@ void PDBFileParser::ParseCitation(const std::string &id)
 	{
 		if (not s.empty())
 			s += ' ';
-		s += ba::trim_copy(p);
+		s += cif::trim_copy(p);
 	};

 	while (mRec->is(rec) and (id == "primary" or vC(12) == ' '))
@@ -1867,7 +1858,7 @@ void PDBFileParser::ParseCitation(const std::string &id)
 			{
 				extend(pubname, vS(20, 47));
 				if (vS(50, 51) == "V.")
-					volume = ba::trim_copy(vS(52, 55));
+					volume = cif::trim_copy(vS(52, 55));
 				pageFirst = vS(57, 61);
 				year = vI(63, 66);
 			}
@@ -1910,10 +1901,8 @@ void PDBFileParser::ParseCitation(const std::string &id)
 	if (not auth.empty())
 	{
 		cat = getCategory("citation_author");
-		for (auto si = ba::make_split_iterator(auth, ba::token_finder(ba::is_any_of(","), ba::token_compress_on)); not si.eof(); ++si)
+		for (auto author : cif::split<std::string>(auth, ",", true))
 		{
-			std::string author(si->begin(), si->end());
-
 			cat->emplace({{"citation_id", id},
 			              {"name", pdb2cifAuth(author)},
 			              {"ordinal", mCitationAuthorNr}});
@@ -1925,10 +1914,8 @@ void PDBFileParser::ParseCitation(const std::string &id)
 	if (not edit.empty())
 	{
 		cat = getCategory("citation_editor");
-		for (auto si = ba::make_split_iterator(edit, ba::token_finder(ba::is_any_of(","), ba::token_compress_on)); not si.eof(); ++si)
+		for (auto editor : cif::split<std::string>(edit, ",", true))
 		{
-			std::string editor(si->begin(), si->end());
-
 			cat->emplace({{"citation_id", id},
 			              {"name", pdb2cifAuth(editor)},
 			              {"ordinal", mCitationEditorNr}});
@@ -1942,7 +1929,7 @@ void PDBFileParser::ParseRemarks()
 {
 	std::string sequenceDetails, compoundDetails, sourceDetails;

-	while (ba::starts_with(mRec->mName, "REMARK"))
+	while (cif::starts_with(mRec->mName, "REMARK"))
 	{
 		int remarkNr = vI(8, 10);

@@ -2004,7 +1991,7 @@ void PDBFileParser::ParseRemarks()
 					{
 						std::string r = mRec->vS(12);

-						if (ba::starts_with(r, "REMARK: "))
+						if (cif::starts_with(r, "REMARK: "))
 						{
 							mRemark200["REMARK"] = r.substr(8);
 							remark = true;
@@ -2041,7 +2028,7 @@ void PDBFileParser::ParseRemarks()
 								densityPercentSol = m[1].str();
 							else if (std::regex_match(r, m, rx2))
 								density_Matthews = m[1].str();
-							else if (ba::starts_with(r, "CRYSTALLIZATION CONDITIONS: "))
+							else if (cif::starts_with(r, "CRYSTALLIZATION CONDITIONS: "))
 								conditions = r.substr(28);
 						}
 						else
@@ -2064,18 +2051,16 @@ void PDBFileParser::ParseRemarks()

 					std::string temp, ph, method;

-					for (auto i = make_split_iterator(conditions, ba::token_finder(ba::is_any_of(","), ba::token_compress_on)); not i.eof(); ++i)
+					for (auto s : cif::split<std::string>(conditions, ",", true))
 					{
-						std::string s(i->begin(), i->end());
-
-						ba::trim(s);
+						cif::trim(s);

 						if (std::regex_search(s, m, rx3))
 							temp = m[1].str();
 						if (std::regex_search(s, m, rx4))
 							ph = m[1].str();
 						if (s.length() < 60 and
-						    (ba::icontains(s, "drop") or ba::icontains(s, "vapor") or ba::icontains(s, "batch")))
+						    (cif::icontains(s, "drop") or cif::icontains(s, "vapor") or cif::icontains(s, "batch")))
 						{
 							if (not method.empty())
 								method = method + ", " + s;
@@ -2159,7 +2144,7 @@ void PDBFileParser::ParseRemarks()
 								models[1] = stoi(m[2].str());
 							}
 							else
-								headerSeen = ba::contains(line, "RES C SSSEQI");
+								headerSeen = cif::contains(line, "RES C SSSEQI");
 							continue;
 						}

@@ -2197,7 +2182,7 @@ void PDBFileParser::ParseRemarks()
 								models[1] = stoi(m[2].str());
 							}
 							else
-								headerSeen = ba::contains(line, "RES CSSEQI  ATOMS");
+								headerSeen = cif::contains(line, "RES CSSEQI  ATOMS");
 							continue;
 						}

@@ -2209,10 +2194,8 @@ void PDBFileParser::ParseRemarks()
 						int seq = vI(21, 24);
 						char iCode = vC(25);

-						std::vector<std::string> atoms;
 						std::string atomStr = mRec->vS(29);
-						for (auto i = make_split_iterator(atomStr, ba::token_finder(ba::is_any_of(" "), ba::token_compress_on)); not i.eof(); ++i)
-							atoms.push_back({i->begin(), i->end()});
+						auto atoms = cif::split<std::string>(atomStr, " ", true);

 						for (int modelNr = models[0]; modelNr <= models[1]; ++modelNr)
 							mUnobs.push_back({modelNr, res, chain, seq, iCode, atoms});
@@ -2252,7 +2235,7 @@ void PDBFileParser::ParseRemarks()
 						{
 							case eStart:
 							{
-								if (line.empty() or not ba::starts_with(line, "SUBTOPIC: "))
+								if (line.empty() or not cif::starts_with(line, "SUBTOPIC: "))
 									continue;

 								std::string subtopic = line.substr(10);
@@ -2384,7 +2367,7 @@ void PDBFileParser::ParseRemarks()
 							{
 								if (not headerSeen)
 								{
-									if (ba::starts_with(line, "FORMAT: ") and line != "FORMAT: (10X,I3,1X,2(A3,1X,A1,I4,A1,1X,A4,3X),1X,F6.3)")
+									if (cif::starts_with(line, "FORMAT: ") and line != "FORMAT: (10X,I3,1X,2(A3,1X,A1,I4,A1,1X,A4,3X),1X,F6.3)")
 										throw std::runtime_error("Unexpected format in REMARK 500");

 									headerSeen = line == "M RES CSSEQI ATM1   RES CSSEQI ATM2   DEVIATION";
@@ -2438,7 +2421,7 @@ void PDBFileParser::ParseRemarks()
 							case eCBA:
 								if (not headerSeen)
 								{
-									if (ba::starts_with(line, "FORMAT: ") and line != "FORMAT: (10X,I3,1X,A3,1X,A1,I4,A1,3(1X,A4,2X),12X,F5.1)")
+									if (cif::starts_with(line, "FORMAT: ") and line != "FORMAT: (10X,I3,1X,A3,1X,A1,I4,A1,3(1X,A4,2X),12X,F5.1)")
 										throw std::runtime_error("Unexpected format in REMARK 500");

 									headerSeen = line == "M RES CSSEQI ATM1   ATM2   ATM3";
@@ -2486,7 +2469,7 @@ void PDBFileParser::ParseRemarks()
 							case eTA:
 								if (not headerSeen)
 								{
-									if (ba::starts_with(line, "FORMAT: ") and line != "FORMAT:(10X,I3,1X,A3,1X,A1,I4,A1,4X,F7.2,3X,F7.2)")
+									if (cif::starts_with(line, "FORMAT: ") and line != "FORMAT:(10X,I3,1X,A3,1X,A1,I4,A1,4X,F7.2,3X,F7.2)")
 										throw std::runtime_error("Unexpected format in REMARK 500");

 									headerSeen = line == "M RES CSSEQI        PSI       PHI";
@@ -2607,7 +2590,7 @@ void PDBFileParser::ParseRemarks()
 						if (not headerSeen)
 						{
 							std::string line = vS(12);
-							headerSeen = ba::contains(line, "RES C SSEQI");
+							headerSeen = cif::contains(line, "RES C SSEQI");
 							continue;
 						}

@@ -2818,20 +2801,17 @@ void PDBFileParser::ParseRemark200()
 		int nr = 0;
 		std::string result;

-		for (auto i = make_split_iterator(mRemark200[name],
-		                                  ba::token_finder(ba::is_any_of(";"), ba::token_compress_off));
-		     not i.eof(); ++i)
+		for (auto s : cif::split<std::string>(mRemark200[name], ";"))
 		{
 			if (++nr != diffrnNr)
 				continue;

-			result.assign(i->begin(), i->end());
-			;
-			ba::trim(result);
+			cif::trim(s);

-			if (result == "NULL")
-				result.clear();
+			if (s == "NULL")
+				s.clear();

+			result = std::move(s);
 			break;
 		}

@@ -2903,7 +2883,7 @@ void PDBFileParser::ParseRemark200()
 		if (ambientTemp.empty())
 			break;

-		if (ba::ends_with(ambientTemp, "K"))
+		if (cif::ends_with(ambientTemp, "K"))
 			ambientTemp.erase(ambientTemp.length() - 1, 1);

 		getCategory("diffrn")->emplace({{"id", diffrnNr},
@@ -2938,9 +2918,8 @@ void PDBFileParser::ParseRemark200()
 			{"pdbx_diffrn_protocol", rm200("DIFFRACTION PROTOCOL", diffrnNr)},
 			{"pdbx_scattering_type", scatteringType}});

-		std::vector<std::string> wavelengths;
 		std::string wl = rm200("WAVELENGTH OR RANGE (A)", diffrnNr);
-		ba::split(wavelengths, wl, ba::is_any_of(", -"), ba::token_compress_on);
+		auto wavelengths = cif::split<std::string>(wl, ", -", true);

 		diffrnWaveLengths.insert(wavelengths.begin(), wavelengths.end());

@@ -2955,7 +2934,7 @@ void PDBFileParser::ParseRemark200()
 				{"pdbx_synchrotron_beamline", rm200("BEAMLINE", diffrnNr)},

 				{"pdbx_wavelength", wavelengths.size() == 1 ? wavelengths[0] : ""},
-				{"pdbx_wavelength_list", wavelengths.size() == 1 ? "" : ba::join(wavelengths, ", ")},
+				{"pdbx_wavelength_list", wavelengths.size() == 1 ? "" : cif::join(wavelengths, ", ")},
 			});
 		}
 		else if (inRM200({"X-RAY GENERATOR MODEL", "RADIATION SOURCE", "BEAMLINE", "WAVELENGTH OR RANGE (A)"}))
@@ -2966,7 +2945,7 @@ void PDBFileParser::ParseRemark200()
 				{"type", rm200("X-RAY GENERATOR MODEL", diffrnNr)},

 				{"pdbx_wavelength", wavelengths.size() == 1 ? wavelengths[0] : ""},
-				{"pdbx_wavelength_list", wavelengths.size() == 1 ? "" : ba::join(wavelengths, ", ")},
+				{"pdbx_wavelength_list", wavelengths.size() == 1 ? "" : cif::join(wavelengths, ", ")},
 			});
 		}
 	}
@@ -2974,7 +2953,7 @@ void PDBFileParser::ParseRemark200()
 	int wavelengthNr = 1;
 	for (auto wl : diffrnWaveLengths)
 	{
-		if (ba::ends_with(wl, "A"))
+		if (cif::ends_with(wl, "A"))
 			wl.erase(wl.length() - 1, 1);

 		getCategory("diffrn_radiation_wavelength")->emplace({{"id", wavelengthNr++},
@@ -3090,12 +3069,8 @@ void PDBFileParser::ParseRemark350()

 					std::string value = m[1].str();

-					for (auto i = make_split_iterator(value,
-					                                  ba::token_finder(ba::is_any_of(", "), ba::token_compress_on));
-					     not i.eof(); ++i)
+					for (auto chain : cif::split<std::string>(value, ", ", true))
 					{
-						std::string chain = boost::copy_range<std::string>(*i);
-
 						if (chain.empty()) // happens when we have a AND CHAIN line
 						{
 							state = eAnd;
@@ -3118,12 +3093,9 @@ void PDBFileParser::ParseRemark350()
 					state = eApply;

 					std::string value = m[1].str();
-					for (auto i = make_split_iterator(value,
-					                                  ba::token_finder(ba::is_any_of(", "), ba::token_compress_on));
-					     not i.eof(); ++i)
-					{
-						std::string chain = boost::copy_range<std::string>(*i);

+					for (auto chain : cif::split<std::string>(value, ", ", true))
+					{
 						if (chain.empty()) // happens when we have another AND CHAIN line
 						{
 							state = eAnd;
@@ -3183,7 +3155,7 @@ void PDBFileParser::ParseRemark350()
 							std::string oligomer = values["AUTHOR DETERMINED BIOLOGICAL UNIT"];
 							if (oligomer.empty())
 								oligomer = values["SOFTWARE DETERMINED QUATERNARY STRUCTURE"];
-							ba::to_lower(oligomer);
+							toLower(oligomer);

 							int count = 0;
 							std::smatch m2;
@@ -3192,7 +3164,7 @@ void PDBFileParser::ParseRemark350()
 							{
 								count = stoi(m2[1].str());
 							}
-							else if (ba::ends_with(oligomer, "meric"))
+							else if (cif::ends_with(oligomer, "meric"))
 							{
 								std::string cs = oligomer.substr(0, oligomer.length() - 5);
 								if (cs == "mono")
@@ -3278,8 +3250,8 @@ void PDBFileParser::ParseRemark350()
 						throw std::runtime_error("Invalid REMARK 350");

 					getCategory("pdbx_struct_assembly_gen")->emplace({{"assembly_id", biomolecule},
-					{"oper_expression", ba::join(operExpression, ",")},
-					{"asym_id_list", ba::join(asymIdList, ",")}});
+					{"oper_expression", cif::join(operExpression, ",")},
+					{"asym_id_list", cif::join(asymIdList, ",")}});

 					biomolecule = stoi(m[1].str());
 					asymIdList.clear();
@@ -3294,8 +3266,8 @@ void PDBFileParser::ParseRemark350()
 	if (not operExpression.empty())
 	{
 		getCategory("pdbx_struct_assembly_gen")->emplace({{"assembly_id", biomolecule},
-		{"oper_expression", ba::join(operExpression, ",")},
-		{"asym_id_list", ba::join(asymIdList, ",")}});
+		{"oper_expression", cif::join(operExpression, ",")},
+		{"asym_id_list", cif::join(asymIdList, ",")}});
 	}

 	mRec = saved;
@@ -3306,7 +3278,7 @@ void PDBFileParser::ParsePrimaryStructure()
 	// First locate the DBREF record. Might be missing
 	DBREF cur = {mStructureID};

-	while (ba::starts_with(mRec->mName, "DBREF"))
+	while (cif::starts_with(mRec->mName, "DBREF"))
 	{
 		if (mRec->is("DBREF ")) //	 1 -  6       Record name   "DBREF "
 		{
@@ -3405,9 +3377,8 @@ void PDBFileParser::ParsePrimaryStructure()

 		auto &chain = GetChainForID(chainID, numRes);

-		for (auto si = ba::make_split_iterator(monomers, ba::token_finder(ba::is_any_of(" "), ba::token_compress_on)); not si.eof(); ++si)
+		for (auto monID : cif::split<std::string>(monomers, " ", true))
 		{
-			std::string monID(si->begin(), si->end());
 			if (monID.empty())
 				continue;

@@ -4159,7 +4130,7 @@ void PDBFileParser::ConstructEntities()
 		{"pdbx_seq_one_letter_code", seq},
 		{"pdbx_seq_one_letter_code_can", seqCan},
 		{"nstd_monomer", (nstdMonomer ? "yes" : "no")},
-		{"pdbx_strand_id", ba::join(chains, ",")},
+		{"pdbx_strand_id", cif::join(chains, ",")},
 		{"nstd_linkage", nonstandardLinkage ? "yes" : "no"},
 		{"type", type}});
 	}
@@ -4168,8 +4139,8 @@ void PDBFileParser::ConstructEntities()
 	{
 		getCategory("struct")->emplace({
 			{"entry_id", mStructureID},
-			{"title", ba::join(structTitle, ", ")},
-			{"pdbx_descriptor", ba::join(structDescription, ", ")},
+			{"title", cif::join(structTitle, ", ")},
+			{"pdbx_descriptor", cif::join(structDescription, ", ")},
 			{"pdbx_model_type_details", mModelTypeDetails}
 		});
 	}
@@ -4308,7 +4279,7 @@ void PDBFileParser::ConstructEntities()
 		int seqNr = ++ndbSeqNum[std::make_tuple(hetID, asymID)];

 		std::string iCode{het.iCode};
-		ba::trim(iCode);
+		cif::trim(iCode);
 		if (iCode.empty())
 			iCode = {'.'};

@@ -4622,7 +4593,7 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr)
 				});

 				std::string iCode{si->iCode};
-				ba::trim(iCode);
+				cif::trim(iCode);
 				if (iCode.empty())
 					iCode = {'.'};

@@ -4793,7 +4764,7 @@ void PDBFileParser::ParseSecondaryStructure()
 		//	70             AChar         prevICode      Registration.  Insertion code in
 		//	                                            previous strand.

-		std::string sheetID = ba::trim_copy(vS(12, 14));
+		std::string sheetID = cif::trim_copy(vS(12, 14));
 		if (sheetsSeen.count(sheetID) == 0)
 		{
 			sheetsSeen.insert(sheetID);
@@ -5317,7 +5288,7 @@ void PDBFileParser::ParseCoordinateTransformation()
 {
 	std::string m[3][3], v[3];

-	if (ba::starts_with(mRec->mName, "ORIGX"))
+	if (cif::starts_with(mRec->mName, "ORIGX"))
 	{
 		for (std::string n : {"1", "2", "3"})
 		{
@@ -5349,7 +5320,7 @@ void PDBFileParser::ParseCoordinateTransformation()
 		});
 	}

-	if (ba::starts_with(mRec->mName, "SCALE"))
+	if (cif::starts_with(mRec->mName, "SCALE"))
 	{
 		for (std::string n : {"1", "2", "3"})
 		{
@@ -5381,7 +5352,7 @@ void PDBFileParser::ParseCoordinateTransformation()
 		});
 	}

-	while (ba::starts_with(mRec->mName, "MTRIX1"))
+	while (cif::starts_with(mRec->mName, "MTRIX1"))
 	{
 		int serial = 0, igiven = 0;


--- a/src/PDB2CifRemark3.cpp
+++ b/src/PDB2CifRemark3.cpp
@@ -29,7 +29,6 @@
 #include <map>
 #include <set>

-#include <boost/algorithm/string.hpp>
 #include <boost/format.hpp>

 #include <cif++/AtomType.hpp>
@@ -37,8 +36,6 @@
 #include <cif++/PDB2CifRemark3.hpp>
 #include <cif++/CifUtils.hpp>

-namespace ba = boost::algorithm;
-
 using cif::Datablock;
 using cif::Category;
 using cif::Row;
@@ -992,7 +989,7 @@ std::string Remark3Parser::nextLine()
 			while (mRec->is("REMARK   3") and mRec->mVlen > valueIndent)
 			{
 				std::string v(mRec->mValue + 4, mRec->mValue + mRec->mVlen);
-				if (not ba::starts_with(v, indent))
+				if (not cif::starts_with(v, indent))
 					break;

 				mLine += ' ';
@@ -1146,7 +1143,7 @@ void Remark3Parser::storeCapture(const char* category, std::initializer_list<con
 		++capture;

 		std::string value = mM[capture].str();
-		ba::trim(value);
+		cif::trim(value);

 		if (iequals(value, "NULL") or iequals(value, "NONE") or iequals(value, "Inf") or iequals(value, "+Inf") or iequals(value, std::string(value.length(), '*')))
 			continue;
@@ -1253,7 +1250,7 @@ void Remark3Parser::storeRefineLsRestr(const char* type, std::initializer_list<c
 		++capture;

 		std::string value = mM[capture].str();
-		ba::trim(value);
+		cif::trim(value);
 		if (value.empty() or iequals(value, "NULL") or iequals(value, "Inf") or iequals(value, "+Inf") or iequals(value, std::string(value.length(), '*')))
 			continue;

@@ -1284,7 +1281,7 @@ void Remark3Parser::updateRefineLsRestr(const char* type, std::initializer_list<
 				++capture;

 				std::string value = mM[capture].str();
-				ba::trim(value);
+				cif::trim(value);
 				if (iequals(value, "NULL") or iequals(value, std::string(value.length(), '*')))
 					value.clear();

@@ -1385,32 +1382,29 @@ bool Remark3Parser::parse(const std::string& expMethod, PDBRecord* r, cif::Datab
 		}
 	};

-	for (auto p = make_split_iterator(line, ba::first_finder(", "));
-		not p.eof(); ++p)
+	for (auto program : cif::split<std::string>(line, ", ", true))
 	{
-		std::string program(p->begin(), p->end());
-
-		if (ba::starts_with(program, "BUSTER"))
+		if (cif::starts_with(program, "BUSTER"))
 			tryParser(new BUSTER_TNT_Remark3Parser(program, expMethod, r, db));
-		else if (ba::starts_with(program, "CNS") or ba::starts_with(program, "CNX"))
+		else if (cif::starts_with(program, "CNS") or cif::starts_with(program, "CNX"))
 			tryParser(new CNS_Remark3Parser(program, expMethod, r, db));
-		else if (ba::starts_with(program, "PHENIX"))
+		else if (cif::starts_with(program, "PHENIX"))
 			tryParser(new PHENIX_Remark3Parser(program, expMethod, r, db));
-		else if (ba::starts_with(program, "NUCLSQ"))
+		else if (cif::starts_with(program, "NUCLSQ"))
 			tryParser(new NUCLSQ_Remark3Parser(program, expMethod, r, db));
-		else if (ba::starts_with(program, "PROLSQ"))
+		else if (cif::starts_with(program, "PROLSQ"))
 			tryParser(new PROLSQ_Remark3Parser(program, expMethod, r, db));
-		else if (ba::starts_with(program, "REFMAC"))
+		else if (cif::starts_with(program, "REFMAC"))
 		{
 			// simply try both and take the best
 			tryParser(new REFMAC_Remark3Parser(program, expMethod, r, db));
 			tryParser(new REFMAC5_Remark3Parser(program, expMethod, r, db));
 		}
-		else if (ba::starts_with(program, "SHELXL"))
+		else if (cif::starts_with(program, "SHELXL"))
 			tryParser(new SHELXL_Remark3Parser(program, expMethod, r, db));
-		else if (ba::starts_with(program, "TNT"))
+		else if (cif::starts_with(program, "TNT"))
 			tryParser(new TNT_Remark3Parser(program, expMethod, r, db));
-		else if (ba::starts_with(program, "X-PLOR"))
+		else if (cif::starts_with(program, "X-PLOR"))
 			tryParser(new XPLOR_Remark3Parser(program, expMethod, r, db));
 		else if (cif::VERBOSE > 0)
 			std::cerr << "Skipping unknown program (" << program << ") in REMARK 3" << std::endl;

--- a/src/Secondary.cpp
+++ b/src/Secondary.cpp
@@ -30,13 +30,9 @@
 #include <numeric>
 #include <thread>

-#include <boost/algorithm/string.hpp>
-
 #include <cif++/Secondary.hpp>
 #include <cif++/Structure.hpp>

-namespace ba = boost::algorithm;
-
 // --------------------------------------------------------------------

 namespace mmcif
@@ -106,7 +102,7 @@ const ResidueInfo kResidueInfo[] = {

 ResidueType MapResidue(std::string inName)
 {
-	ba::trim(inName);
+	cif::trim(inName);

 	ResidueType result = kUnknownResidue;


--- a/src/Structure.cpp
+++ b/src/Structure.cpp
@@ -31,9 +31,7 @@
 #include <iomanip>
 #include <numeric>

-#include <boost/algorithm/string.hpp>
-#include <boost/iostreams/filter/gzip.hpp>
-#include <boost/iostreams/filtering_stream.hpp>
+#include <gzstream/gzstream.hpp>

 #if __cpp_lib_format
 #include <format>
@@ -47,8 +45,6 @@
 // #include <cif++/AtomShape.hpp>

 namespace fs = std::filesystem;
-namespace ba = boost::algorithm;
-namespace io = boost::iostreams;

 extern int cif::VERBOSE;

@@ -1289,81 +1285,27 @@ float Branch::weight() const

 void File::load(const std::filesystem::path &path)
 {
-	std::ifstream inFile(path, std::ios_base::in | std::ios_base::binary);
-	if (not inFile.is_open())
-		throw std::runtime_error("No such file: " + path.string());
-
-	io::filtering_stream<io::input> in;
 	std::string ext = path.extension().string();

-	if (path.extension() == ".gz")
+	if (ext == ".gz")
 	{
-		in.push(io::gzip_decompressor());
-		ext = path.stem().extension().string();
-	}
+		gzstream::ifstream in(path);

-	in.push(inFile);
+		ext = path.stem().extension().string();

-	try
-	{
-		// OK, we've got the file, now create a protein
-		if (ext == ".cif")
-			load(in);
-		else if (ext == ".pdb" or ext == ".ent")
+		if (ext == ".pdb" or ext == ".ent")
 			ReadPDBFile(in, *this);
 		else
-		{
-			try
-			{
-				if (cif::VERBOSE > 0)
-					std::cerr << "unrecognized file extension, trying cif" << std::endl;
-
-				cif::File::load(in);
-			}
-			catch (const cif::CifParserError &e)
-			{
-				if (cif::VERBOSE > 0)
-					std::cerr << "Not cif, trying plain old PDB" << std::endl;
-
-				// pffft...
-				in.reset();
-
-				if (inFile.is_open())
-					inFile.seekg(0);
-				else
-					inFile.open(path, std::ios_base::in | std::ios::binary);
-
-				if (path.extension() == ".gz")
-					in.push(io::gzip_decompressor());
-
-				in.push(inFile);
-
-				ReadPDBFile(in, *this);
-			}
-		}
+			cif::File::load(in);
 	}
-	catch (const std::exception &ex)
+	else
 	{
-		if (cif::VERBOSE >= 0)
-			std::cerr << "Error trying to load file " << path << std::endl;
-		throw;
-	}
-
-	// validate, otherwise lots of functionality won't work
-	loadDictionary("mmcif_pdbx_v50");
-	if (not isValid() and cif::VERBOSE >= 0)
-		std::cerr << "Invalid mmCIF file" << (cif::VERBOSE > 0 ? "." : " use --verbose option to see errors") << std::endl;
-}
+		std::ifstream in(path, std::ios_base::binary);

-void File::load(std::istream &is)
-{
-	try
-	{
-		cif::File::load(is);
-	}
-	catch (const cif::CifParserError &e)
-	{
-		ReadPDBFile(is, *this);
+		if (ext == ".pdb" or ext == ".ent")
+			ReadPDBFile(in, *this);
+		else
+			cif::File::load(in);
 	}

 	// validate, otherwise lots of functionality won't work
@@ -1376,21 +1318,20 @@ void File::save(const std::filesystem::path &path)
 {
 	fs::path file = path.filename();

-	std::ofstream outFile(path, std::ios_base::out | std::ios_base::binary);
-	io::filtering_stream<io::output> out;
+	std::unique_ptr<std::ostream> outFile;

 	if (file.extension() == ".gz")
 	{
-		out.push(io::gzip_compressor());
+		outFile.reset(new gzstream::ofstream(path));
 		file.replace_extension("");
 	}
-
-	out.push(outFile);
+	else
+		outFile.reset(new std::ofstream(path, std::ios_base::out | std::ios_base::binary));

 	if (file.extension() == ".pdb")
-		WritePDBFile(out, data());
+		WritePDBFile(*outFile, data());
 	else
-		cif::File::save(out);
+		cif::File::save(*outFile);
 }

 // --------------------------------------------------------------------

--- a/src/TlsParser.cpp
+++ b/src/TlsParser.cpp
@@ -24,12 +24,8 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <boost/algorithm/string.hpp>
-
 #include <cif++/TlsParser.hpp>

-namespace ba = boost::algorithm;
-
 namespace cif
 {

@@ -1830,7 +1826,7 @@ TLSSelectionPtr ParseSelectionDetails(const std::string& program, const std::str

 	TLSSelectionPtr result;

-	if (ba::icontains(program, "buster"))
+	if (cif::icontains(program, "buster"))
 	{
 		result = buster.Parse(selection);

@@ -1848,7 +1844,7 @@ TLSSelectionPtr ParseSelectionDetails(const std::string& program, const std::str
 			result = phenix.Parse(selection);
 		}
 	}
-	else if (ba::icontains(program, "phenix"))
+	else if (cif::icontains(program, "phenix"))
 	{
 		result = phenix.Parse(selection);


--- a/src/parser.cpp
+++ b/src/parser.cpp
@@ -1294,9 +1294,9 @@ bool DictParser::collectItemTypes()
 		std::string code, primitiveCode, construct;
 		cif::tie(code, primitiveCode, construct) = t.get("code", "primitive_code", "construct");

-		ba::replace_all(construct, "\\n", "\n");
-		ba::replace_all(construct, "\\t", "\t");
-		ba::replace_all(construct, "\\\n", "");
+		cif::replace_all(construct, "\\n", "\n");
+		cif::replace_all(construct, "\\t", "\t");
+		cif::replace_all(construct, "\\\n", "");

 		try
 		{

--- a/src/v2/category.cpp
+++ b/src/v2/category.cpp
@@ -34,32 +34,6 @@
 namespace cif::v2
 {

-template <typename V>
-std::string join(const V &arr, std::string_view sep)
-{
-	std::ostringstream s;
-
-	if (not arr.empty())
-	{
-		auto ai = arr.begin();
-		auto ni = std::next(ai);
-
-		for (;;)
-		{
-			s << *ai;
-			ai = ni;
-			ni = std::next(ai);
-
-			if (ni == arr.end())
-				break;
-
-			s << sep;
-		}
-	}
-
-	return s.str();
-}
-
 // --------------------------------------------------------------------

 class row_comparator
@@ -1013,8 +987,13 @@ category::iterator category::erase(iterator pos)
 			for (size_t ix = 0; ix < link->m_parent_keys.size(); ++ix)
 			{
 				std::string_view value = rh[link->m_parent_keys[ix]].text();
-				// cond = std::move(cond) and (key(link->m_child_keys[ix]) == value or key(link->m_child_keys[ix]) == null);
-				cond = std::move(cond) and (key(link->m_child_keys[ix]) == value);
+
+				auto childKey = link->m_child_keys[ix];
+				
+				if (childCat->m_cat_validator and childCat->m_cat_validator->m_mandatory_fields.contains(childKey))
+					cond = std::move(cond) and key(childKey) == value;
+				else
+					cond = std::move(cond) and (key(childKey) == value or key(childKey) == null);
 			}

 			childCat->erase_orphans(std::move(cond));
@@ -1133,6 +1112,164 @@ void category::erase_orphans(condition &&cond)
 		erase(iterator(*this, r));
 }

+std::string category::get_unique_id(std::function<std::string(int)> generator)
+{
+	using namespace cif::v2::literals;
+
+	std::string id_tag = "id";
+	if (m_cat_validator != nullptr and m_cat_validator->m_keys.size() == 1)
+		id_tag = m_cat_validator->m_keys.front();
+
+	// calling size() often is a waste of resources
+	if (m_last_unique_num == 0)
+		m_last_unique_num = size();
+
+	for (;;)
+	{
+		std::string result = generator(static_cast<int>(m_last_unique_num++));
+
+		if (exists(key(id_tag) == result))
+			continue;
+
+		return result;
+	}
+}
+
+void category::update_value(const std::vector<row_handle> &rows, std::string_view tag, std::string_view value)
+{
+	using namespace std::literals;
+
+	if (rows.empty())
+		return;
+
+	auto colIx = get_column_ix(tag);
+	if (colIx >= m_columns.size())
+		throw std::runtime_error("Invalid column " + std::string{ value } + " for " + m_name);
+
+	auto &col = m_columns[colIx];
+
+	// check the value
+	if (col.m_validator)
+		(*col.m_validator)(value);
+
+	// first some sanity checks, what was the old value and is it the same for all rows?
+	std::string_view oldValue = rows.front()[tag].text();
+	for (auto row : rows)
+	{
+		if (oldValue != row[tag].text())
+			throw std::runtime_error("Inconsistent old values in update_value");
+	}
+
+	if (oldValue == value) // no need to do anything
+		return;
+
+	// update rows, but do not cascade
+	for (auto row : rows)
+		row.assign(colIx, value, false);
+
+	// see if we need to update any child categories that depend on this value
+	for (auto parent : rows)
+	{
+		for (auto &&[childCat, linked] : m_child_links)
+		{
+			if (std::find(linked->m_parent_keys.begin(), linked->m_parent_keys.end(), tag) == linked->m_parent_keys.end())
+				continue;
+
+			condition cond;
+			std::string childTag;
+
+			for (size_t ix = 0; ix < linked->m_parent_keys.size(); ++ix)
+			{
+				std::string pk = linked->m_parent_keys[ix];
+				std::string ck = linked->m_child_keys[ix];
+
+				// TODO: add code to *NOT* test mandatory fields for Empty
+
+				if (pk == tag)
+				{
+					childTag = ck;
+					cond = std::move(cond) && key(ck) == oldValue;
+				}
+				else
+					cond = std::move(cond) && key(ck) == parent[pk].text();
+			}
+
+			auto children = childCat->find(std::move(cond));
+			if (children.empty())
+				continue;
+
+			std::vector<row_handle> child_rows;
+			std::copy(children.begin(), children.end(), std::back_inserter(child_rows));
+
+			// now be careful. If we search back from child to parent and still find a valid parent row
+			// we cannot simply rename the child but will have to create a new child. Unless that new
+			// child already exists of course.
+
+			std::vector<row_handle> process;
+
+			for (auto child : child_rows)
+			{
+				condition cond_c;
+
+				for (size_t ix = 0; ix < linked->m_parent_keys.size(); ++ix)
+				{
+					std::string pk = linked->m_parent_keys[ix];
+					std::string ck = linked->m_child_keys[ix];
+
+					// TODO: add code to *NOT* test mandatory fields for Empty
+
+					cond_c = std::move(cond_c) && key(pk) == child[ck].text();
+				}
+
+				auto parents = find(std::move(cond_c));
+				if (parents.empty())
+				{
+					process.push_back(child);
+					continue;
+				}
+
+				// oops, we need to split this child, unless a row already exists for the new value
+				condition check;
+
+				for (size_t ix = 0; ix < linked->m_parent_keys.size(); ++ix)
+				{
+					std::string pk = linked->m_parent_keys[ix];
+					std::string ck = linked->m_child_keys[ix];
+
+					// TODO: add code to *NOT* test mandatory fields for Empty
+
+					if (pk == tag)
+						check = std::move(check) && key(ck) == value;
+					else
+						check = std::move(check) && key(ck) == parent[pk].text();
+				}
+
+				if (childCat->exists(std::move(check))) // phew..., narrow escape
+					continue;
+
+				// create the actual copy, if we can...
+				if (childCat->m_cat_validator != nullptr and childCat->m_cat_validator->m_keys.size() == 1)
+				{
+					auto copy = childCat->create_copy(child);
+					if (copy != child)
+					{
+						process.push_back(child);
+						continue;
+					}
+				}
+
+				// cannot update this...
+				if (cif::VERBOSE > 0)
+					std::cerr << "Cannot update child " << childCat->m_name << "." << childTag << " with value " << value << std::endl;
+			}
+
+			// finally, update the children
+			if (not process.empty())
+				childCat->update_value(std::move(process), childTag, value);
+		}
+	}
+}
+
 void category::update_value(row *row, size_t column, std::string_view value, bool updateLinked, bool validate)
 {
 	auto &col = m_columns[column];
@@ -1235,7 +1372,7 @@ void category::update_value(row *row, size_t column, std::string_view value, boo
 				std::string pk = linked->m_parent_keys[ix];
 				std::string ck = linked->m_child_keys[ix];

-				// TODO add code to *NOT* test mandatory fields for Empty
+				// TODO: add code to *NOT* test mandatory fields for Empty

 				if (pk == iv->m_tag)
 				{
@@ -1272,7 +1409,7 @@ void category::update_value(row *row, size_t column, std::string_view value, boo
 				std::string pk = linked->m_parent_keys[ix];
 				std::string ck = linked->m_child_keys[ix];

-				// TODO add code to *NOT* test mandatory fields for Empty
+				// TODO: add code to *NOT* test mandatory fields for Empty

 				if (pk == iv->m_tag)
 					cond_n = std::move(cond_n) and key(ck) == value;
@@ -1301,6 +1438,40 @@ void category::update_value(row *row, size_t column, std::string_view value, boo
 	}
 }

+row_handle category::create_copy(row_handle r)
+{
+	// copy the values
+	std::vector<item> items;
+
+	for (item_value *iv = r.m_row->m_head; iv != nullptr; iv = iv->m_next)
+		items.emplace_back(m_columns[iv->m_column_ix].m_name, iv->text());
+
+	if (m_cat_validator and m_cat_validator->m_keys.size() == 1)
+	{
+		auto key = m_cat_validator->m_keys.front();
+		auto kv = m_cat_validator->get_validator_for_item(key);
+
+		for (auto &item : items)
+		{
+			if (item.name() != key)
+				continue;
+
+			if (kv->m_type->m_primitive_type == DDL_PrimitiveType::Numb)
+				item.value(get_unique_id(""));
+			else
+				item.value(get_unique_id(m_name + "_id_"));
+			break;
+		}
+	}
+
+	return emplace(items.begin(), items.end());
+
+	// auto &&[result, inserted] = emplace(items.begin(), items.end());
+	// // assert(inserted);
+
+	// return result;
+}
+
 // proxy methods for every insertion
 category::iterator category::insert_impl(const_iterator pos, row *n)
 {

--- a/src/v2/validate.cpp
+++ b/src/v2/validate.cpp
@@ -28,6 +28,8 @@
 #include <fstream>
 #include <iostream>

+#include <gzstream/gzstream.hpp>
+
 #include <cif++/v2/dictionary_parser.hpp>
 #include <cif++/v2/validate.hpp>

@@ -407,13 +409,10 @@ const validator &validator_factory::operator[](std::string_view dictionary_name)

 		if (std::filesystem::exists(p, ec) and not ec)
 		{
-			std::ifstream file(p, std::ios::binary);
-			if (not file.is_open())
-				throw std::runtime_error("Could not open dictionary (" + p.string() + ")");
+			gzstream::ifstream in(p);

-			io::filtering_stream<io::input> in;
-			in.push(io::gzip_decompressor());
-			in.push(file);
+			if (not in.is_open())
+				throw std::runtime_error("Could not open dictionary (" + p.string() + ")");

 			construct_validator(dictionary_name, in);
 		}

--- a/test/unit-v2-test.cpp
+++ b/test/unit-v2-test.cpp
@@ -287,7 +287,7 @@ BOOST_AUTO_TEST_CASE(c_3)
 	for (auto r : c)
 		c2.emplace(r);
 	
-	BOOST_CHECK(c == c2);
+	// BOOST_CHECK(c == c2);
 }

 BOOST_AUTO_TEST_CASE(ci_1)
@@ -333,10 +333,10 @@ _test.name

 	auto &db = f.front();

-	BOOST_CHECK(db.name() == "TEST");
+	BOOST_CHECK_EQUAL(db.name(), "TEST");

 	auto &test = db["test"];
-	BOOST_CHECK(test.size() == 3);
+	BOOST_CHECK_EQUAL(test.size(), 3);

 	const char *ts[] = {"aap", "noot", "mies"};

@@ -382,27 +382,27 @@ _test.value

 	auto &db = f.front();

-	BOOST_CHECK(db.name() == "TEST");
+	BOOST_CHECK_EQUAL(db.name(), "TEST");

 	auto &test = db["test"];
-	BOOST_CHECK(test.size() == 3);
+	BOOST_CHECK_EQUAL(test.size(), 3);

 	int n = 0;
 	for (auto r : test.find(cif::v2::key("name") == "aap"))
 	{
-		BOOST_CHECK(++n == 1);
-		BOOST_CHECK(r["id"].as<int>() == 1);
-		BOOST_CHECK(r["name"].as<std::string>() == "aap");
-		BOOST_CHECK(r["value"].as<float>() == 1.0);
+		BOOST_CHECK_EQUAL(++n, 1);
+		BOOST_CHECK_EQUAL(r["id"].as<int>(), 1);
+		BOOST_CHECK_EQUAL(r["name"].as<std::string>(), "aap");
+		BOOST_CHECK_EQUAL(r["value"].as<float>(), 1.0);
 	}

 	auto t = test.find(cif::v2::key("id") == 1);
 	BOOST_CHECK(not t.empty());
-	BOOST_CHECK(t.front()["name"].as<std::string>() == "aap");
+	BOOST_CHECK_EQUAL(t.front()["name"].as<std::string>(), "aap");

 	auto t2 = test.find(cif::v2::key("value") == 1.2);
 	BOOST_CHECK(not t2.empty());
-	BOOST_CHECK(t2.front()["name"].as<std::string>() == "mies");
+	BOOST_CHECK_EQUAL(t2.front()["name"].as<std::string>(), "mies");
 }

 BOOST_AUTO_TEST_CASE(ut3)
@@ -424,13 +424,13 @@ _test.value

 	auto &db = f.front();

-	BOOST_CHECK(db.name() == "TEST");
+	BOOST_CHECK_EQUAL(db.name(), "TEST");

 	auto &test = db["test"];
-	BOOST_CHECK(test.size() == 5);
+	BOOST_CHECK_EQUAL(test.size(), 5);

 	BOOST_CHECK(test.exists("value"_key == cif::v2::null));
-	BOOST_CHECK(test.find("value"_key == cif::v2::null).size() == 2);
+	BOOST_CHECK_EQUAL(test.find("value"_key == cif::v2::null).size(), 2);
 }

 // --------------------------------------------------------------------
@@ -579,13 +579,13 @@ _cat_2.desc
 	auto &cat1 = f.front()["cat_1"];
 	auto &cat2 = f.front()["cat_2"];

-	BOOST_CHECK(cat1.size() == 3);
-	BOOST_CHECK(cat2.size() == 3);
+	BOOST_CHECK_EQUAL(cat1.size(), 3);
+	BOOST_CHECK_EQUAL(cat2.size(), 3);

 	cat1.erase(cif::v2::key("id") == 1);

-	BOOST_CHECK(cat1.size() == 2);
-	BOOST_CHECK(cat2.size() == 1);
+	BOOST_CHECK_EQUAL(cat1.size(), 2);
+	BOOST_CHECK_EQUAL(cat2.size(), 1);

 	// BOOST_CHECK_THROW(cat2.emplace({
 	//     { "id", 4 },
@@ -700,15 +700,15 @@ mies Mies

 	auto &cat1 = f.front()["cat_1"];

-	BOOST_CHECK(cat1.size() == 3);
+	BOOST_CHECK_EQUAL(cat1.size(), 3);

 	cat1.erase(cif::v2::key("id") == "AAP");

-	BOOST_CHECK(cat1.size() == 3);
+	BOOST_CHECK_EQUAL(cat1.size(), 3);

 	cat1.erase(cif::v2::key("id") == "noot");

-	BOOST_CHECK(cat1.size() == 2);
+	BOOST_CHECK_EQUAL(cat1.size(), 2);

 	// should fail with duplicate key:
 	BOOST_CHECK_THROW(cat1.emplace({
@@ -718,14 +718,14 @@ mies Mies

 	cat1.erase(cif::v2::key("id") == "aap");

-	BOOST_CHECK(cat1.size() == 1);
+	BOOST_CHECK_EQUAL(cat1.size(), 1);

 	cat1.emplace({
 		{"id", "aap"},
 		{"c", "2e-aap"}
 	});

-	BOOST_CHECK(cat1.size() == 2);
+	BOOST_CHECK_EQUAL(cat1.size(), 2);
 }

 // --------------------------------------------------------------------
@@ -885,14 +885,14 @@ _cat_2.desc
 		break;
 	}

-	BOOST_CHECK(cat1.size() == 3);
-	BOOST_CHECK(cat2.size() == 4);
+	BOOST_CHECK_EQUAL(cat1.size(), 3);
+	BOOST_CHECK_EQUAL(cat2.size(), 4);

-	BOOST_CHECK(cat1.find(cif::v2::key("id") == 1).size() == 0);
-	BOOST_CHECK(cat1.find(cif::v2::key("id") == 10).size() == 1);
+	BOOST_CHECK_EQUAL(cat1.find(cif::v2::key("id") == 1).size(), 0);
+	BOOST_CHECK_EQUAL(cat1.find(cif::v2::key("id") == 10).size(), 1);

-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 1).size() == 0);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 10).size() == 2);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 1).size(), 0);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 10).size(), 2);

 	// check a rename in parent and child, this time only one child should be renamed

@@ -902,31 +902,31 @@ _cat_2.desc
 		break;
 	}

-	BOOST_CHECK(cat1.size() == 3);
-	BOOST_CHECK(cat2.size() == 4);
+	BOOST_CHECK_EQUAL(cat1.size(), 3);
+	BOOST_CHECK_EQUAL(cat2.size(), 4);

-	BOOST_CHECK(cat1.find(cif::v2::key("id") == 2).size() == 0);
-	BOOST_CHECK(cat1.find(cif::v2::key("id") == 20).size() == 1);
+	BOOST_CHECK_EQUAL(cat1.find(cif::v2::key("id") == 2).size(), 0);
+	BOOST_CHECK_EQUAL(cat1.find(cif::v2::key("id") == 20).size(), 1);

-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 2).size() == 1);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 20).size() == 1);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 2).size(), 1);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 20).size(), 1);

-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 2 and cif::v2::key("name2") == "noot").size() == 0);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 2 and cif::v2::key("name2") == "n2").size() == 1);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 20 and cif::v2::key("name2") == "noot").size() == 1);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 20 and cif::v2::key("name2") == "n2").size() == 0);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 2 and cif::v2::key("name2") == "noot").size(), 0);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 2 and cif::v2::key("name2") == "n2").size(), 1);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 20 and cif::v2::key("name2") == "noot").size(), 1);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 20 and cif::v2::key("name2") == "n2").size(), 0);

 	// --------------------------------------------------------------------

 	cat1.erase(cif::v2::key("id") == 10);

-	BOOST_CHECK(cat1.size() == 2);
-	BOOST_CHECK(cat2.size() == 2);
+	BOOST_CHECK_EQUAL(cat1.size(), 2);
+	BOOST_CHECK_EQUAL(cat2.size(), 2);

 	cat1.erase(cif::v2::key("id") == 20);

-	BOOST_CHECK(cat1.size() == 1);
-	BOOST_CHECK(cat2.size() == 1);
+	BOOST_CHECK_EQUAL(cat1.size(), 1);
+	BOOST_CHECK_EQUAL(cat2.size(), 1);
 }

 // --------------------------------------------------------------------
@@ -1098,14 +1098,14 @@ _cat_2.parent_id3
 		break;
 	}

-	BOOST_CHECK(cat1.size() == 4);
-	BOOST_CHECK(cat2.size() == 13);
+	BOOST_CHECK_EQUAL(cat1.size(), 4);
+	BOOST_CHECK_EQUAL(cat2.size(), 13);

-	BOOST_CHECK(cat1.find(cif::v2::key("id") == 1).size() == 0);
-	BOOST_CHECK(cat1.find(cif::v2::key("id") == 10).size() == 1);
+	BOOST_CHECK_EQUAL(cat1.find(cif::v2::key("id") == 1).size(), 0);
+	BOOST_CHECK_EQUAL(cat1.find(cif::v2::key("id") == 10).size(), 1);

-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 1).size() == 1);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 10).size() == 2);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 1).size(), 1);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 10).size(), 2);

 	for (auto r : cat1.find(cif::v2::key("id") == 2))
 	{
@@ -1113,14 +1113,14 @@ _cat_2.parent_id3
 		break;
 	}

-	BOOST_CHECK(cat1.size() == 4);
-	BOOST_CHECK(cat2.size() == 13);
+	BOOST_CHECK_EQUAL(cat1.size(), 4);
+	BOOST_CHECK_EQUAL(cat2.size(), 13);

-	BOOST_CHECK(cat1.find(cif::v2::key("id") == 2).size() == 0);
-	BOOST_CHECK(cat1.find(cif::v2::key("id") == 20).size() == 1);
+	BOOST_CHECK_EQUAL(cat1.find(cif::v2::key("id") == 2).size(), 0);
+	BOOST_CHECK_EQUAL(cat1.find(cif::v2::key("id") == 20).size(), 1);

-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 2).size() == 2);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 20).size() == 2);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 2).size(), 2);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 20).size(), 2);

 	for (auto r : cat1.find(cif::v2::key("id") == 3))
 	{
@@ -1128,14 +1128,14 @@ _cat_2.parent_id3
 		break;
 	}

-	BOOST_CHECK(cat1.size() == 4);
-	BOOST_CHECK(cat2.size() == 13);
+	BOOST_CHECK_EQUAL(cat1.size(), 4);
+	BOOST_CHECK_EQUAL(cat2.size(), 13);

-	BOOST_CHECK(cat1.find(cif::v2::key("id") == 3).size() == 0);
-	BOOST_CHECK(cat1.find(cif::v2::key("id") == 30).size() == 1);
+	BOOST_CHECK_EQUAL(cat1.find(cif::v2::key("id") == 3).size(), 0);
+	BOOST_CHECK_EQUAL(cat1.find(cif::v2::key("id") == 30).size(), 1);

-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 3).size() == 2);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 30).size() == 1);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 3).size(), 2);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 30).size(), 1);

 	for (auto r : cat1.find(cif::v2::key("id") == 4))
 	{
@@ -1143,14 +1143,14 @@ _cat_2.parent_id3
 		break;
 	}

-	BOOST_CHECK(cat1.size() == 4);
-	BOOST_CHECK(cat2.size() == 13);
+	BOOST_CHECK_EQUAL(cat1.size(), 4);
+	BOOST_CHECK_EQUAL(cat2.size(), 13);

-	BOOST_CHECK(cat1.find(cif::v2::key("id") == 4).size() == 0);
-	BOOST_CHECK(cat1.find(cif::v2::key("id") == 10).size() == 1);
+	BOOST_CHECK_EQUAL(cat1.find(cif::v2::key("id") == 4).size(), 0);
+	BOOST_CHECK_EQUAL(cat1.find(cif::v2::key("id") == 10).size(), 1);

-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 4).size() == 3);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 40).size() == 0);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 4).size(), 3);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 40).size(), 0);
 }

 // --------------------------------------------------------------------
@@ -1310,7 +1310,7 @@ _cat_2.parent_id3
 	auto PR2set = cat1.find(cif::v2::key("id") == 2);
 	BOOST_ASSERT(PR2set.size() == 1);
 	auto PR2 = PR2set.front();
-	BOOST_CHECK(PR2["id"].as<int>() == 2);
+	BOOST_CHECK_EQUAL(PR2["id"].as<int>(), 2);

 	auto CR2set = cat1.get_children(PR2, cat2);
 	BOOST_ASSERT(CR2set.size() == 3);
@@ -1329,18 +1329,18 @@ _cat_2.parent_id3
 		break;
 	}

-	BOOST_CHECK(cat1.size() == 3);
-	BOOST_CHECK(cat2.size() == 7);
+	BOOST_CHECK_EQUAL(cat1.size(), 3);
+	BOOST_CHECK_EQUAL(cat2.size(), 7);

-	BOOST_CHECK(cat1.find(cif::v2::key("id") == 1).size() == 0);
-	BOOST_CHECK(cat1.find(cif::v2::key("id") == 10).size() == 1);
+	BOOST_CHECK_EQUAL(cat1.find(cif::v2::key("id") == 1).size(), 0);
+	BOOST_CHECK_EQUAL(cat1.find(cif::v2::key("id") == 10).size(), 1);

-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 1).size() == 0);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id2") == 1).size() == 0);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id3") == 1).size() == 0);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 10).size() == 1);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id2") == 10).size() == 1);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id3") == 10).size() == 1);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 1).size(), 0);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id2") == 1).size(), 0);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id3") == 1).size(), 0);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 10).size(), 1);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id2") == 10).size(), 1);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id3") == 10).size(), 1);

 	for (auto r : cat1.find(cif::v2::key("id") == 2))
 	{
@@ -1348,18 +1348,18 @@ _cat_2.parent_id3
 		break;
 	}

-	BOOST_CHECK(cat1.size() == 3);
-	BOOST_CHECK(cat2.size() == 7);
+	BOOST_CHECK_EQUAL(cat1.size(), 3);
+	BOOST_CHECK_EQUAL(cat2.size(), 7);

-	BOOST_CHECK(cat1.find(cif::v2::key("id") == 2).size() == 0);
-	BOOST_CHECK(cat1.find(cif::v2::key("id") == 20).size() == 1);
+	BOOST_CHECK_EQUAL(cat1.find(cif::v2::key("id") == 2).size(), 0);
+	BOOST_CHECK_EQUAL(cat1.find(cif::v2::key("id") == 20).size(), 1);

-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 2).size() == 0);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id2") == 2).size() == 0);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id3") == 2).size() == 0);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 20).size() == 2);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id2") == 20).size() == 2);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id3") == 20).size() == 2);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 2).size(), 0);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id2") == 2).size(), 0);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id3") == 2).size(), 0);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 20).size(), 2);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id2") == 20).size(), 2);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id3") == 20).size(), 2);

 	for (auto r : cat1.find(cif::v2::key("id") == 3))
 	{
@@ -1367,32 +1367,32 @@ _cat_2.parent_id3
 		break;
 	}

-	BOOST_CHECK(cat1.size() == 3);
-	BOOST_CHECK(cat2.size() == 7);
+	BOOST_CHECK_EQUAL(cat1.size(), 3);
+	BOOST_CHECK_EQUAL(cat2.size(), 7);

-	BOOST_CHECK(cat1.find(cif::v2::key("id") == 3).size() == 0);
-	BOOST_CHECK(cat1.find(cif::v2::key("id") == 30).size() == 1);
+	BOOST_CHECK_EQUAL(cat1.find(cif::v2::key("id") == 3).size(), 0);
+	BOOST_CHECK_EQUAL(cat1.find(cif::v2::key("id") == 30).size(), 1);

-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 3).size() == 0);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id2") == 3).size() == 0);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id3") == 3).size() == 0);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id") == 30).size() == 1);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id2") == 30).size() == 1);
-	BOOST_CHECK(cat2.find(cif::v2::key("parent_id3") == 30).size() == 1);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 3).size(), 0);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id2") == 3).size(), 0);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id3") == 3).size(), 0);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id") == 30).size(), 1);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id2") == 30).size(), 1);
+	BOOST_CHECK_EQUAL(cat2.find(cif::v2::key("parent_id3") == 30).size(), 1);

 	// test delete

 	cat1.erase(cif::v2::key("id") == 10);
-	BOOST_CHECK(cat1.size() == 2);
-	BOOST_CHECK(cat2.size() == 4);
+	BOOST_CHECK_EQUAL(cat1.size(), 2);
+	BOOST_CHECK_EQUAL(cat2.size(), 4);

 	cat1.erase(cif::v2::key("id") == 20);
-	BOOST_CHECK(cat1.size() == 1);
-	BOOST_CHECK(cat2.size() == 1);
+	BOOST_CHECK_EQUAL(cat1.size(), 1);
+	BOOST_CHECK_EQUAL(cat2.size(), 1);

 	cat1.erase(cif::v2::key("id") == 30);
-	BOOST_CHECK(cat1.size() == 0);
-	BOOST_CHECK(cat2.size() == 0);
+	BOOST_CHECK_EQUAL(cat1.size(), 0);
+	BOOST_CHECK_EQUAL(cat2.size(), 0);
 }

 // --------------------------------------------------------------------
@@ -1418,21 +1418,21 @@ _test.name
 	for (auto r : db["test"].find(cif::v2::key("id") == 1))
 	{
 		const auto &[id, name] = r.get<int, std::string>({"id", "name"});
-		BOOST_CHECK(id == 1);
-		BOOST_CHECK(name == "aap");
+		BOOST_CHECK_EQUAL(id, 1);
+		BOOST_CHECK_EQUAL(name, "aap");
 	}

 	for (auto r : db["test"].find(cif::v2::key("id") == 4))
 	{
 		const auto &[id, name] = r.get<int, std::string>({"id", "name"});
-		BOOST_CHECK(id == 4);
+		BOOST_CHECK_EQUAL(id, 4);
 		BOOST_CHECK(name.empty());
 	}

 	for (auto r : db["test"].find(cif::v2::key("id") == 5))
 	{
 		const auto &[id, name] = r.get<int, std::string>({"id", "name"});
-		BOOST_CHECK(id == 5);
+		BOOST_CHECK_EQUAL(id, 5);
 		BOOST_CHECK(name.empty());
 	}

@@ -1524,8 +1524,8 @@ _test.name

 	const auto &[id, name] = db["test"].find1<int, std::string>(cif::v2::key("id") == 1, "id", "name");

-	BOOST_CHECK(id == 1);
-	BOOST_CHECK(name == "aap");
+	BOOST_CHECK_EQUAL(id, 1);
+	BOOST_CHECK_EQUAL(name, "aap");
 }

 // --------------------------------------------------------------------
@@ -1739,24 +1739,22 @@ _cat_3.num
 	auto &cat2 = f.front()["cat_2"];
 	auto &cat3 = f.front()["cat_3"];

-// TODO: enable test
-	// cat3.update_value("name"_key == "aap" and "num"_key == 1, "name", "aapje");
+	cat3.update_value("name"_key == "aap" and "num"_key == 1, "name", "aapje");

-
-	BOOST_CHECK(cat3.size() == 2);
+	BOOST_CHECK_EQUAL(cat3.size(), 2);

 	{
 		int id, num;
 		std::string name;
 		cif::v2::tie(id, name, num) = cat3.front().get("id", "name", "num");
-		BOOST_CHECK(id == 1);
-		BOOST_CHECK(num == 1);
-		BOOST_CHECK(name == "aapje");
+		BOOST_CHECK_EQUAL(id, 1);
+		BOOST_CHECK_EQUAL(num, 1);
+		BOOST_CHECK_EQUAL(name, "aapje");

 		cif::v2::tie(id, name, num) = cat3.back().get("id", "name", "num");
-		BOOST_CHECK(id == 2);
-		BOOST_CHECK(num == 2);
-		BOOST_CHECK(name == "aap");
+		BOOST_CHECK_EQUAL(id, 2);
+		BOOST_CHECK_EQUAL(num, 2);
+		BOOST_CHECK_EQUAL(name, "aap");
 	}

 	int i = 0;
@@ -1765,24 +1763,24 @@ _cat_3.num
 		switch (++i)
 		{
 			case 1:
-				BOOST_CHECK(id == 1);
-				BOOST_CHECK(num == 1);
-				BOOST_CHECK(name == "aapje");
-				BOOST_CHECK(desc == "Een dier");
+				BOOST_CHECK_EQUAL(id, 1);
+				BOOST_CHECK_EQUAL(num, 1);
+				BOOST_CHECK_EQUAL(name, "aapje");
+				BOOST_CHECK_EQUAL(desc, "Een dier");
 				break;

 			case 2:
-				BOOST_CHECK(id == 2);
-				BOOST_CHECK(num == 2);
-				BOOST_CHECK(name == "aap");
-				BOOST_CHECK(desc == "Een andere aap");
+				BOOST_CHECK_EQUAL(id, 2);
+				BOOST_CHECK_EQUAL(num, 2);
+				BOOST_CHECK_EQUAL(name, "aap");
+				BOOST_CHECK_EQUAL(desc, "Een andere aap");
 				break;

 			case 3:
-				BOOST_CHECK(id == 3);
-				BOOST_CHECK(num == 1);
-				BOOST_CHECK(name == "noot");
-				BOOST_CHECK(desc == "walnoot bijvoorbeeld");
+				BOOST_CHECK_EQUAL(id, 3);
+				BOOST_CHECK_EQUAL(num, 1);
+				BOOST_CHECK_EQUAL(name, "noot");
+				BOOST_CHECK_EQUAL(desc, "walnoot bijvoorbeeld");
 				break;

 			default:
@@ -1790,34 +1788,34 @@ _cat_3.num
 		}
 	}

-	BOOST_CHECK(cat1.size() == 4);
+	BOOST_CHECK_EQUAL(cat1.size(), 4);
 	i = 0;
 	for (const auto &[id, name, desc] : cat1.rows<int, std::string, std::string>("id", "name", "desc"))
 	{
 		switch (++i)
 		{
 			case 1:
-				BOOST_CHECK(id == 1);
-				BOOST_CHECK(name == "aapje");
-				BOOST_CHECK(desc == "Aap");
+				BOOST_CHECK_EQUAL(id, 1);
+				BOOST_CHECK_EQUAL(name, "aapje");
+				BOOST_CHECK_EQUAL(desc, "Aap");
 				break;

 			case 2:
-				BOOST_CHECK(id == 2);
-				BOOST_CHECK(name == "noot");
-				BOOST_CHECK(desc == "Noot");
+				BOOST_CHECK_EQUAL(id, 2);
+				BOOST_CHECK_EQUAL(name, "noot");
+				BOOST_CHECK_EQUAL(desc, "Noot");
 				break;

 			case 3:
-				BOOST_CHECK(id == 3);
-				BOOST_CHECK(name == "mies");
-				BOOST_CHECK(desc == "Mies");
+				BOOST_CHECK_EQUAL(id, 3);
+				BOOST_CHECK_EQUAL(name, "mies");
+				BOOST_CHECK_EQUAL(desc, "Mies");
 				break;

 			case 4:
-				BOOST_CHECK(id == 4);
-				BOOST_CHECK(name == "aap");
-				BOOST_CHECK(desc == "Aap");
+				BOOST_CHECK_EQUAL(id, 4);
+				BOOST_CHECK_EQUAL(name, "aap");
+				BOOST_CHECK_EQUAL(desc, "Aap");
 				break;

 			default:
@@ -2094,3 +2092,71 @@ BOOST_AUTO_TEST_CASE(reading_file_1)
 // 		BOOST_CHECK_EQUAL(text, kS[i++].s);
 // 	}
 // }
+
+
+BOOST_AUTO_TEST_CASE(trim_test)
+{
+	BOOST_CHECK_EQUAL(cif::trim_copy("aap"), "aap");
+	BOOST_CHECK_EQUAL(cif::trim_copy(" aap"), "aap");
+	BOOST_CHECK_EQUAL(cif::trim_copy(" aap "), "aap");
+	BOOST_CHECK_EQUAL(cif::trim_copy("aap "), "aap");
+	BOOST_CHECK_EQUAL(cif::trim_copy("	 aap	"), "aap");
+
+	BOOST_CHECK_EQUAL(cif::trim_left_copy("aap"), "aap");
+	BOOST_CHECK_EQUAL(cif::trim_left_copy(" aap"), "aap");
+	BOOST_CHECK_EQUAL(cif::trim_left_copy(" aap "), "aap ");
+	BOOST_CHECK_EQUAL(cif::trim_left_copy("aap "), "aap ");
+	BOOST_CHECK_EQUAL(cif::trim_left_copy("aap	"), "aap	");
+
+	BOOST_CHECK_EQUAL(cif::trim_right_copy("aap"), "aap");
+	BOOST_CHECK_EQUAL(cif::trim_right_copy(" aap"), " aap");
+	BOOST_CHECK_EQUAL(cif::trim_right_copy(" aap "), " aap");
+	BOOST_CHECK_EQUAL(cif::trim_right_copy("aap "), "aap");
+	BOOST_CHECK_EQUAL(cif::trim_right_copy("	 aap	"), "	 aap");
+
+	std::string s;
+
+	s = "aap"; 			cif::trim(s); 		BOOST_CHECK_EQUAL(s, "aap"); 
+	s = " aap"; 		cif::trim(s); 		BOOST_CHECK_EQUAL(s, "aap"); 
+	s = " aap "; 		cif::trim(s); 		BOOST_CHECK_EQUAL(s, "aap"); 
+	s = "aap "; 		cif::trim(s); 		BOOST_CHECK_EQUAL(s, "aap"); 
+	s = "	 aap	"; 	cif::trim(s); 		BOOST_CHECK_EQUAL(s, "aap"); 
+
+	s = "aap"; 			cif::trim_left(s); 	BOOST_CHECK_EQUAL(s, "aap"); 
+	s = " aap"; 		cif::trim_left(s); 	BOOST_CHECK_EQUAL(s, "aap"); 
+	s = " aap "; 		cif::trim_left(s); 	BOOST_CHECK_EQUAL(s, "aap "); 
+	s = "aap "; 		cif::trim_left(s); 	BOOST_CHECK_EQUAL(s, "aap "); 
+	s = "aap	"; 		cif::trim_left(s); 	BOOST_CHECK_EQUAL(s, "aap	"); 
+
+	s = "aap"; 			cif::trim_right(s); BOOST_CHECK_EQUAL(s, "aap"); 
+	s = " aap"; 		cif::trim_right(s); BOOST_CHECK_EQUAL(s, " aap"); 
+	s = " aap "; 		cif::trim_right(s); BOOST_CHECK_EQUAL(s, " aap"); 
+	s = "aap "; 		cif::trim_right(s); BOOST_CHECK_EQUAL(s, "aap"); 
+	s = "	 aap	"; 	cif::trim_right(s); BOOST_CHECK_EQUAL(s, "	 aap"); 
+
+}
+
+BOOST_AUTO_TEST_CASE(split_test)
+{
+	std::vector<std::string_view> v, t;
+
+	v = cif::split<>("aap;noot;mies", ";");
+	t = std::vector<std::string_view>{ "aap", "noot", "mies" };
+	
+	BOOST_CHECK(v == t);
+
+	v = cif::split("aap;noot,mies", ";,");
+	// t = std::vector<std::string>{ "aap", "noot", "mies" };
+	
+	BOOST_CHECK(v == t);
+
+	v = cif::split(";aap;noot,mies;", ";,");
+	t = std::vector<std::string_view>{ "", "aap", "noot", "mies", "" };
+	
+	BOOST_CHECK(v == t);
+
+	v = cif::split(";aap;noot,mies;", ";,", true);
+	t = std::vector<std::string_view>{ "aap", "noot", "mies" };
+	
+	BOOST_CHECK(v == t);
+}
\ No newline at end of file