Merge remote-tracking branch 'origin/develop' into trunk

a96b1e07 · Maarten L. Hekkelman · f48c31bc · d85ab93a · a96b1e07 · a96b1e07
Commit a96b1e07 authored Sep 10, 2024 by Maarten L. Hekkelman
20 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -27,11 +27,12 @@ cmake_minimum_required(VERSION 3.23)
 # set the project name
 project(
 	libcifpp
-	VERSION 7.0.4
+	VERSION 7.0.5
 	LANGUAGES CXX)

 list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

+include(FindAtomic)
 include(CheckFunctionExists)
 include(CheckIncludeFiles)
 include(CheckLibraryExists)
@@ -41,8 +42,6 @@ include(GenerateExportHeader)
 include(CTest)
 include(FetchContent)

-message(STATUS "DESTDIR is '${DESTDIR}'")
-
 # When building with ninja-multiconfig, build both debug and release by default
 if(CMAKE_GENERATOR STREQUAL "Ninja Multi-Config")
 	set(CMAKE_CROSS_CONFIGS "Debug;Release")
@@ -188,10 +187,6 @@ if(GXX_LIBSTDCPP)
 	endif()
 endif()

-set(CMAKE_THREAD_PREFER_PTHREAD)
-set(THREADS_PREFER_PTHREAD_FLAG)
-find_package(Threads)
-
 if(MSVC)
 	# Avoid linking the shared library of zlib Search ZLIB_ROOT first if it is
 	# set.
@@ -221,6 +216,7 @@ if(MSVC)
 endif()

 find_package(ZLIB QUIET)
+find_package(Threads)

 if(NOT ZLIB_FOUND)
 	message(FATAL_ERROR "The zlib development files were not found you this system, please install them and try again (hint: on debian/ubuntu use apt-get install zlib1g-dev)")
@@ -260,6 +256,8 @@ if(CIFPP_RECREATE_SYMOP_DATA)
 	add_executable(symop-map-generator
 		"${CMAKE_CURRENT_SOURCE_DIR}/src/symop-map-generator.cpp")

+	target_compile_features(symop-map-generator PUBLIC cxx_std_20)
+
 	add_custom_command(
 		OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/symop_table_data.hpp
 		COMMAND
@@ -331,7 +329,7 @@ set(project_headers
 	include/cif++/validate.hpp
 )

-add_library(cifpp STATIC)
+add_library(cifpp)
 add_library(cifpp::cifpp ALIAS cifpp)

 target_sources(cifpp
@@ -371,7 +369,7 @@ target_include_directories(
 	"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
 	PRIVATE "${BOOST_REGEX_INCLUDE_DIR}" "${EIGEN_INCLUDE_DIR}")

-target_link_libraries(cifpp PUBLIC Threads::Threads ZLIB::ZLIB)
+target_link_libraries(cifpp PUBLIC Threads::Threads ZLIB::ZLIB std::atomic)

 if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
 	target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
@@ -444,6 +442,10 @@ if(CIFPP_DATA_DIR)
 	target_compile_definitions(cifpp PUBLIC DATA_DIR="${CIFPP_DATA_DIR}")
 endif()

+if(NOT PROJECT_IS_TOP_LEVEL)
+	set(CIFPP_SHARE_DIR ${CIFPP_DATA_DIR} PARENT_SCOPE)
+endif()
+
 if(UNIX AND NOT BUILD_FOR_CCP4)
 	if("${CMAKE_INSTALL_PREFIX}" STREQUAL "/usr/local")
 		set(CIFPP_CACHE_DIR
@@ -464,75 +466,72 @@ else()
 	unset(CIFPP_CACHE_DIR)
 endif()

-# Avoid full installation in case we are not the top level target
-if(PROJECT_IS_TOP_LEVEL OR BUILD_FOR_CCP4)
-	# Install rules
-	install(TARGETS cifpp
-		EXPORT cifpp
-		FILE_SET cifpp_headers DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+# Install rules
+install(TARGETS cifpp
+	EXPORT cifpp
+	FILE_SET cifpp_headers DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})

-	if(MSVC AND BUILD_SHARED_LIBS)
-		install(
-			FILES $<TARGET_PDB_FILE:cifpp>
-			DESTINATION ${CMAKE_INSTALL_LIBDIR}
-			OPTIONAL)
-	endif()
+if(MSVC AND BUILD_SHARED_LIBS)
+	install(
+		FILES $<TARGET_PDB_FILE:cifpp>
+		DESTINATION ${CMAKE_INSTALL_LIBDIR}
+		OPTIONAL)
+endif()

-	# Clean up old config files (with old names)
-	file(GLOB OLD_CONFIG_FILES
-		${CMAKE_INSTALL_FULL_LIBDIR}/cmake/cifpp/cifppConfig*.cmake
-		${CMAKE_INSTALL_FULL_LIBDIR}/cmake/cifpp/cifppTargets*.cmake)
+# Clean up old config files (with old names)
+file(GLOB OLD_CONFIG_FILES
+	${CMAKE_INSTALL_FULL_LIBDIR}/cmake/cifpp/cifppConfig*.cmake
+	${CMAKE_INSTALL_FULL_LIBDIR}/cmake/cifpp/cifppTargets*.cmake)

-	if(OLD_CONFIG_FILES)
-		message(
-			STATUS "Installation will remove old config files: ${OLD_CONFIG_FILES}")
-		install(CODE "file(REMOVE ${OLD_CONFIG_FILES})")
-	endif()
+if(OLD_CONFIG_FILES)
+	message(
+		STATUS "Installation will remove old config files: ${OLD_CONFIG_FILES}")
+	install(CODE "file(REMOVE ${OLD_CONFIG_FILES})")
+endif()

-	install(EXPORT cifpp
-		NAMESPACE cifpp::
-		FILE "cifpp-targets.cmake"
-		DESTINATION lib/cmake/cifpp)
+install(EXPORT cifpp
+	NAMESPACE cifpp::
+	FILE "cifpp-targets.cmake"
+	DESTINATION lib/cmake/cifpp)

-	if(CIFPP_DATA_DIR AND CIFPP_DOWNLOAD_CCD)
-		install(
-			FILES ${CMAKE_CURRENT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
-			${CMAKE_CURRENT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
-			${CMAKE_CURRENT_SOURCE_DIR}/rsrc/mmcif_ma.dic ${COMPONENTS_CIF}
-			DESTINATION ${CMAKE_INSTALL_DATADIR}/libcifpp)
-	endif()
+install(
+	FILES ${CMAKE_CURRENT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
+	${CMAKE_CURRENT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
+	${CMAKE_CURRENT_SOURCE_DIR}/rsrc/mmcif_ma.dic
+	DESTINATION ${CMAKE_INSTALL_DATADIR}/libcifpp)
+
+if(CIFPP_DATA_DIR AND CIFPP_DOWNLOAD_CCD)
+	install(FILES ${COMPONENTS_CIF}
+		DESTINATION ${CMAKE_INSTALL_DATADIR}/libcifpp)
+endif()

-	set(CONFIG_TEMPLATE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cifpp-config.cmake.in)
+set(CONFIG_TEMPLATE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cifpp-config.cmake.in)

-	configure_package_config_file(
-		${CONFIG_TEMPLATE_FILE} ${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake
-		INSTALL_DESTINATION lib/cmake/cifpp
-		PATH_VARS CIFPP_DATA_DIR)
+configure_package_config_file(
+	${CONFIG_TEMPLATE_FILE} ${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake
+	INSTALL_DESTINATION lib/cmake/cifpp
+	PATH_VARS CIFPP_DATA_DIR)

-	install(
-		FILES "${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake"
-		"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config-version.cmake"
-		DESTINATION lib/cmake/cifpp)
-
-	set_target_properties(
-		cifpp
-		PROPERTIES VERSION ${PROJECT_VERSION}
-		SOVERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}"
-		INTERFACE_cifpp_MAJOR_VERSION ${PROJECT_VERSION_MAJOR})
-
-	set_property(
-		TARGET cifpp
-		APPEND
-		PROPERTY COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION)
-
-	write_basic_package_version_file(
-		"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config-version.cmake"
-		VERSION ${PROJECT_VERSION}
-		COMPATIBILITY AnyNewerVersion)
-else()
-	# Set this variable so that consumers can find the files in rsrc
-	set(CIFPP_SHARE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/rsrc PARENT_SCOPE)
-endif()
+install(
+	FILES "${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake"
+	"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config-version.cmake"
+	DESTINATION lib/cmake/cifpp)
+
+set_target_properties(
+	cifpp
+	PROPERTIES VERSION ${PROJECT_VERSION}
+	SOVERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}"
+	INTERFACE_cifpp_MAJOR_VERSION ${PROJECT_VERSION_MAJOR})
+
+set_property(
+	TARGET cifpp
+	APPEND
+	PROPERTY COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION)
+
+write_basic_package_version_file(
+	"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config-version.cmake"
+	VERSION ${PROJECT_VERSION}
+	COMPATIBILITY AnyNewerVersion)

 if(BUILD_TESTING)
 	add_subdirectory(test)

--- a/README.md
+++ b/README.md
@@ -52,7 +52,7 @@ int main(int argc, char *argv[])

    if (file.empty())
    {
-        std::cerr << "Empty file" << std::endl;
+        std::cerr << "Empty file\n";
        exit(1);
    }

@@ -66,8 +66,8 @@ int main(int argc, char *argv[])
    auto n = atom_site.count(cif::key("label_atom_id") == "OXT");

    std::cout << "File contains " << atom_site.size() << " atoms of which "
-              << n << (n == 1 ? " is" : " are") << " OXT" << std::endl
-              << "residues with an OXT are:" << std::endl;
+              << n << (n == 1 ? " is" : " are") << " OXT\n"
+              << "residues with an OXT are:\n";

    // Loop over all atoms with atom-id "OXT" and print out some info.
    // That info is extracted using structured binding in C++
@@ -76,7 +76,7 @@ int main(int argc, char *argv[])
                cif::key("label_atom_id") == "OXT",
                "label_asym_id", "label_comp_id", "label_seq_id"))
    {
-        std::cout << asym << ' ' << comp << ' ' << seqnr << std::endl;
+        std::cout << asym << ' ' << comp << ' ' << seqnr << '\n';
    }

    return 0;

--- a/changelog
+++ b/changelog
+Version 7.0.5
+- Fix case where category index was not updated for updated value
+
 Version 7.0.4
 - Do not install headers and library in case we're not the top project


--- a/cmake/FindAtomic.cmake
+++ b/cmake/FindAtomic.cmake
+# Simple check to see if we need a library for std::atomic
+
+if(TARGET std::atomic)
+	return()
+endif()
+
+cmake_minimum_required(VERSION 3.10)
+
+include(CMakePushCheckState)
+include(CheckIncludeFileCXX)
+include(CheckCXXSourceRuns)
+
+cmake_push_check_state()
+
+check_include_file_cxx("atomic" _CXX_ATOMIC_HAVE_HEADER)
+mark_as_advanced(_CXX_ATOMIC_HAVE_HEADER)
+
+set(code [[
+#include <atomic>
+int main(int argc, char** argv) {
+  std::atomic<long long> s;
+  ++s;
+  return 0;
+}
+]])
+
+check_cxx_source_runs("${code}" _CXX_ATOMIC_BUILTIN)
+
+if(_CXX_ATOMIC_BUILTIN)
+	set(_found 1)
+else()
+  list(APPEND CMAKE_REQUIRED_LIBRARIES atomic)
+  list(APPEND FOLLY_LINK_LIBRARIES atomic)
+
+  check_cxx_source_runs("${code}" _CXX_ATOMIC_LIB_NEEDED)
+  if (NOT _CXX_ATOMIC_LIB_NEEDED)
+    message(FATAL_ERROR "unable to link C++ std::atomic code: you may need \
+      to install GNU libatomic")
+  else()
+	set(_found 1)
+  endif()
+endif()
+
+if(_found)
+	add_library(std::atomic INTERFACE IMPORTED)
+	set_property(TARGET std::atomic APPEND PROPERTY INTERFACE_COMPILE_FEATURES cxx_std_14)
+
+	if(_CXX_ATOMIC_BUILTIN)
+		# Nothing to add...
+	elseif(_CXX_ATOMIC_LIB_NEEDED)
+		set_target_properties(std::atomic PROPERTIES IMPORTED_LIBNAME atomic)
+		set(STDCPPATOMIC_LIBRARY atomic)
+	endif()
+endif()
+
+cmake_pop_check_state()
+
+set(Atomic_FOUND ${_found} CACHE BOOL "TRUE if we can run a program using std::atomic" FORCE)
+mark_as_advanced(Atomic_FOUND)
+
+if(Atomic_FIND_REQUIRED AND NOT Atomic_FOUND)
+    message(FATAL_ERROR "Cannot run simple program using std::atomic")
+endif()
--- a/examples/example.cpp
+++ b/examples/example.cpp
@@ -13,7 +13,7 @@ int main(int argc, char *argv[])
 		exit(1);
 	}

-	cif::file file = cif::pdb::read(argv[1]);
+	cif::file file(argv[1]);

 	if (file.empty())
 	{

--- a/include/cif++/compound.hpp
+++ b/include/cif++/compound.hpp
@@ -138,7 +138,7 @@ struct compound_bond
 /// This information is derived from the CDD by default.
 ///
 /// To create compounds, you use the factory method. You can add your own
-/// compound definitions by calling the addExtraComponents function and
+/// compound definitions by calling the push_dictionary function and
 /// pass it a valid CCD formatted file.

 class compound

--- a/include/cif++/condition.hpp
+++ b/include/cif++/condition.hpp
@@ -1285,6 +1285,19 @@ condition operator==(const key &key, const std::optional<T> &v)
 }

 /**
+ * @brief Create a condition to search any item for a value @a v if @a v contains a value
+ * compare to null if not.
+ */
+template <typename T>
+condition operator!=(const key &key, const std::optional<T> &v)
+{
+	if (v.has_value())
+		return condition(new detail::not_condition_impl(condition(new detail::key_equals_condition_impl({ key.m_item_name, *v }))));
+	else
+		return condition(new detail::not_condition_impl(condition(new detail::key_is_empty_condition_impl(key.m_item_name))));
+}
+
+/**
 * @brief Operator to create a boolean opposite of the condition in @a rhs
 */
 inline condition operator not(condition &&rhs)

--- a/include/cif++/item.hpp
+++ b/include/cif++/item.hpp
@@ -378,7 +378,7 @@ struct item_handle
 	template <typename T>
 	item_handle &operator=(T &&value)
 	{
-		assign_value(item{ "", std::move(value) }.value());
+		assign_value(item{ "", std::forward<T>(value) }.value());
 		return *this;
 	}


--- a/include/cif++/model.hpp
+++ b/include/cif++/model.hpp
@@ -350,7 +350,12 @@ class atom
 	std::string get_pdb_ins_code() const { return get_property("pdbx_PDB_ins_code"); } ///< Return the pdb_ins_code property

 	/// Return true if this atom is an alternate
-	bool is_alternate() const { return not get_label_alt_id().empty(); }
+	bool is_alternate() const
+	{
+		if (auto alt_id = get_label_alt_id(); alt_id.empty() or alt_id == ".")
+			return false;
+		return true;
+	}

 	/// Convenience method to return a string that might be ID in PDB space
 	std::string pdb_id() const
@@ -550,6 +555,9 @@ class residue
 	/// \brief Return true if this residue has alternate atoms
 	bool has_alternate_atoms() const;

+	/// \brief Return true if this residue has alternate atoms for the atom \a atomID
+	bool has_alternate_atoms_for(const std::string &atomID) const;
+
 	/// \brief Return the list of unique alt ID's present in this residue
 	std::set<std::string> get_alternate_ids() const;

@@ -572,6 +580,10 @@ class residue
 								   m_auth_seq_id == rhs.m_auth_seq_id);
 	}

+	/// @brief Create a new atom and add it to the list
+	/// @return newly created atom
+	virtual atom create_new_atom(atom_type inType, const std::string &inAtomID, point inLocation);
+
  protected:
 	/** @cond */
 	residue() {}
@@ -672,6 +684,8 @@ class monomer : public residue
 		return m_polymer == rhs.m_polymer and m_index == rhs.m_index;
 	}

+	atom create_new_atom(atom_type inType, const std::string &inAtomID, point inLocation) override;
+
  private:
 	const polymer *m_polymer;
 	std::size_t m_index;
@@ -1091,6 +1105,9 @@ class structure
 	/// \brief emplace the moved atom @a atom
 	atom &emplace_atom(atom &&atom);

+	/// \brief Reorder atom_site atoms based on 'natural' ordering
+	void reorder_atoms();
+
  private:
 	friend polymer;
 	friend residue;

--- a/rsrc/mmcif_pdbx.dic
+++ b/rsrc/mmcif_pdbx.dic
--- a/src/category.cpp
+++ b/src/category.cpp
@@ -1321,7 +1321,7 @@ void category::update_value(const std::vector<row_handle> &rows, std::string_vie
 		std::string oldValue{ parent[item_name].text() };
 		std::string value{ value_provider(oldValue) };

-		parent.assign(colIx, value, false);
+		update_value(parent.get_row(), colIx, value, false, false);

 		for (auto &&[childCat, linked] : m_child_links)
 		{
@@ -1444,8 +1444,7 @@ void category::update_value(row *row, uint16_t item, std::string_view value, boo
 	// before updating

 	bool reinsert = false;
-	if (updateLinked and // an update of an Item's value
-		m_index != nullptr and key_item_indices().count(item))
+	if (m_index != nullptr and key_item_indices().count(item))
 	{
 		reinsert = m_index->find(*this, row);
 		if (reinsert)
@@ -1698,6 +1697,12 @@ void category::swap_item(uint16_t item_ix, row_handle &a, row_handle &b)
 	auto &ra = *a.m_row;
 	auto &rb = *b.m_row;

+	while (ra.size() <= item_ix)
+		ra.emplace_back("");
+
+	while (rb.size() <= item_ix)
+		rb.emplace_back("");
+
 	std::swap(ra.at(item_ix), rb.at(item_ix));
 }


--- a/src/compound.cpp
+++ b/src/compound.cpp
@@ -496,7 +496,7 @@ compound *compound_factory_impl::create(const std::string &id)
 		m_index = parser.index_datablocks();

 		if (cif::VERBOSE > 1)
-			std::cout << " done" << std::endl;
+			std::cout << " done\n";

 		// reload the resource, perhaps this should be improved...
 		if (m_file.empty())
@@ -519,7 +519,7 @@ compound *compound_factory_impl::create(const std::string &id)
 	parser.parse_single_datablock(id, m_index);

 	if (cif::VERBOSE > 1)
-		std::cout << " done" << std::endl;
+		std::cout << " done\n";

 	if (not file.empty())
 	{
@@ -545,20 +545,20 @@ class local_compound_factory_impl : public compound_factory_impl
 		: compound_factory_impl(next)
 		, m_local_file(file)
 	{
-		const std::regex peptideRx("(?:[lmp]-)?peptide", std::regex::icase);
+		// const std::regex peptideRx("(?:[lmp]-)?peptide", std::regex::icase);

-		for (const auto &[id, name, threeLetterCode, group] :
-			file["comp_list"]["chem_comp"].rows<std::string, std::string, std::string, std::string>("id", "name", "three_letter_code", "group"))
-		{
-			auto &rdb = m_local_file["comp_" + id];
-			if (rdb.empty())
-			{
-				std::cerr << "Missing data in restraint file for id " + id + '\n';
-				continue;
-			}
+		// for (const auto &[id, name, threeLetterCode, group] :
+		// 	file["comp_list"]["chem_comp"].rows<std::string, std::string, std::string, std::string>("id", "name", "three_letter_code", "group"))
+		// {
+		// 	auto &rdb = m_local_file["comp_" + id];
+		// 	if (rdb.empty())
+		// 	{
+		// 		// std::cerr << "Missing data in restraint file for id " + id + '\n';
+		// 		continue;
+		// 	}

-			construct_compound(rdb, id, name, threeLetterCode, group);
-		}
+		// 	construct_compound(rdb, id, name, threeLetterCode, group);
+		// }
 	}

 	compound *create(const std::string &id) override;

--- a/src/datablock.cpp
+++ b/src/datablock.cpp
@@ -49,6 +49,7 @@ void datablock::set_validator(const validator *v)
 	}
 	catch (const std::exception &)
 	{
+		m_validator = nullptr;
 		throw_with_nested(std::runtime_error("Error while setting validator in datablock " + m_name));
 	}
 }

--- a/src/file.cpp
+++ b/src/file.cpp
@@ -31,11 +31,32 @@ namespace cif
 {

 // --------------------------------------------------------------------
+// TODO: This is wrong. A validator should be assigned to datablocks,
+// not to a file. Since audit_conform is a category specifying the
+// content of a datablock. Not the entire file.
+
 void file::set_validator(const validator *v)
 {
 	m_validator = v;
-	for (auto &db : *this)
-		db.set_validator(v);
+	for (bool first = true; auto &db : *this)
+	{
+		try
+		{
+			db.set_validator(v);
+		}
+		catch (const std::exception &e)
+		{
+			if (first)
+				throw;
+
+			// Accept failure on secondary datablocks
+			// now that many mmCIF files have invalid
+			// restraint data concatenated.
+			std::cerr << e.what() << '\n';
+		}
+
+		first = false;
+	}
 }

 bool file::is_valid() const
@@ -78,12 +99,12 @@ bool file::validate_links() const
 {
 	if (m_validator == nullptr)
 		std::runtime_error("No validator loaded explicitly, cannot continue");
-	
+
 	bool result = true;

 	for (auto &db : *this)
 		result = db.validate_links() and result;
-	
+
 	return result;
 }

@@ -97,7 +118,7 @@ void file::load_dictionary()
 			std::string name = audit_conform->front().get<std::string>("dict_name");

 			if (name == "mmcif_pdbx_v50")
-				name = "mmcif_pdbx.dic";	// we had a bug here in libcifpp... 
+				name = "mmcif_pdbx.dic"; // we had a bug here in libcifpp...

 			if (not name.empty())
 			{
@@ -125,7 +146,8 @@ void file::load_dictionary(std::string_view name)

 bool file::contains(std::string_view name) const
 {
-	return std::find_if(begin(), end(), [name](const datablock &db) { return iequals(db.name(), name); }) != end();
+	return std::find_if(begin(), end(), [name](const datablock &db)
+			   { return iequals(db.name(), name); }) != end();
 }

 datablock &file::operator[](std::string_view name)

--- a/src/model.cpp
+++ b/src/model.cpp
@@ -313,7 +313,7 @@ residue::residue(structure &structure, const std::vector<atom> &atoms)
 {
 	if (atoms.empty())
 		throw std::runtime_error("Empty list of atoms");
-	
+
 	auto &a = atoms.front();

 	m_compound_id = a.get_label_comp_id();
@@ -352,9 +352,42 @@ EntityType residue::entity_type() const

 void residue::add_atom(atom &atom)
 {
+	// update atom since it is now part of this residue
 	m_atoms.push_back(atom);
 }

+atom residue::create_new_atom(atom_type inType, const std::string &inAtomID, point inLocation)
+{
+	auto &db = m_structure->get_datablock();
+	auto &atom_site = db["atom_site"];
+
+	auto ai = atom_site.emplace({
+		{ "group_PDB", "HETATM" },
+		{ "id", atom_site.get_unique_id("") },
+		{ "type_symbol", atom_type_traits(inType).symbol() },
+		{ "label_entity_id", get_entity_id() },
+		{ "label_atom_id", inAtomID },
+		{ "label_asym_id", m_asym_id },
+		{ "label_alt_id", "." },
+		{ "label_comp_id", m_compound_id },
+		{ "label_seq_id", m_seq_id },
+		{ "auth_asym_id", m_auth_asym_id },
+		{ "auth_atom_id", inAtomID },
+		{ "auth_comp_id", m_compound_id },
+		{ "auth_seq_id", m_auth_seq_id },
+		{ "occupancy", 1.0f, 2 },
+		{ "pdbx_PDB_model_num", m_structure->get_model_nr() },
+	});
+
+	atom a(db, *ai);
+
+	m_atoms.push_back(a);
+
+	a.set_location(inLocation);
+
+	return a;
+}
+
 std::vector<atom> residue::unique_atoms() const
 {
 	std::vector<atom> result;
@@ -455,6 +488,12 @@ bool residue::has_alternate_atoms() const
 			   { return atom.is_alternate(); }) != m_atoms.end();
 }

+bool residue::has_alternate_atoms_for(const std::string &atomID) const
+{
+	return std::find_if(m_atoms.begin(), m_atoms.end(), [atomID](const atom &atom)
+			   { return atom.get_label_atom_id() == atomID and atom.is_alternate(); }) != m_atoms.end();
+}
+
 std::set<std::string> residue::get_atom_ids() const
 {
 	std::set<std::string> ids;
@@ -673,25 +712,26 @@ float monomer::omega() const
 }

 const std::map<std::string, std::vector<std::string>> kChiAtomsMap = {
-	{"ASP", {"CG", "OD1"}},
-	{"ASN", {"CG", "OD1"}},
-	{"ARG", {"CG", "CD", "NE", "CZ"}},
-	{"HIS", {"CG", "ND1"}},
-	{"GLN", {"CG", "CD", "OE1"}},
-	{"GLU", {"CG", "CD", "OE1"}},
-	{"SER", {"OG"}},
-	{"THR", {"OG1"}},
-	{"LYS", {"CG", "CD", "CE", "NZ"}},
-	{"TYR", {"CG", "CD1"}},
-	{"PHE", {"CG", "CD1"}},
-	{"LEU", {"CG", "CD1"}},
-	{"TRP", {"CG", "CD1"}},
-	{"CYS", {"SG"}},
-	{"ILE", {"CG1", "CD1"}},
-	{"MET", {"CG", "SD", "CE"}},
-	{"MSE", {"CG", "SE", "CE"}},
-	{"PRO", {"CG", "CD"}},
-	{"VAL", {"CG1"}}};
+	{ "ASP", { "CG", "OD1" } },
+	{ "ASN", { "CG", "OD1" } },
+	{ "ARG", { "CG", "CD", "NE", "CZ" } },
+	{ "HIS", { "CG", "ND1" } },
+	{ "GLN", { "CG", "CD", "OE1" } },
+	{ "GLU", { "CG", "CD", "OE1" } },
+	{ "SER", { "OG" } },
+	{ "THR", { "OG1" } },
+	{ "LYS", { "CG", "CD", "CE", "NZ" } },
+	{ "TYR", { "CG", "CD1" } },
+	{ "PHE", { "CG", "CD1" } },
+	{ "LEU", { "CG", "CD1" } },
+	{ "TRP", { "CG", "CD1" } },
+	{ "CYS", { "SG" } },
+	{ "ILE", { "CG1", "CD1" } },
+	{ "MET", { "CG", "SD", "CE" } },
+	{ "MSE", { "CG", "SE", "CE" } },
+	{ "PRO", { "CG", "CD" } },
+	{ "VAL", { "CG1" } }
+};

 std::size_t monomer::nr_of_chis() const
 {
@@ -713,7 +753,7 @@ float monomer::chi(std::size_t nr) const
 		auto i = kChiAtomsMap.find(m_compound_id);
 		if (i != kChiAtomsMap.end() and nr < i->second.size())
 		{
-			std::vector<std::string> atoms{"N", "CA", "CB"};
+			std::vector<std::string> atoms{ "N", "CA", "CB" };

 			atoms.insert(atoms.end(), i->second.begin(), i->second.end());

@@ -839,7 +879,8 @@ bool monomer::are_bonded(const monomer &a, const monomer &b, float errorMargin)
 			a.get_atom_by_atom_id("CA").get_location(),
 			a.get_atom_by_atom_id("C").get_location(),
 			b.get_atom_by_atom_id("N").get_location(),
-			b.get_atom_by_atom_id("CA").get_location()};
+			b.get_atom_by_atom_id("CA").get_location()
+		};

 		auto distanceCACA = distance(atoms[0], atoms[3]);
 		double omega = dihedral_angle(atoms[0], atoms[1], atoms[2], atoms[3]);
@@ -880,6 +921,15 @@ bool monomer::is_cis(const monomer &a, const monomer &b)
 	return std::abs(omega(a, b)) < 30.0f;
 }

+atom monomer::create_new_atom(atom_type inType, const std::string &inAtomID, point inLocation)
+{
+	atom a = residue::create_new_atom(inType, inAtomID, inLocation);
+
+	a.set_property("group_PDB", "ATOM");
+
+	return a;
+}
+
 // --------------------------------------------------------------------
 // polymer

@@ -916,7 +966,7 @@ polymer::polymer(structure &s, const std::string &entityID, const std::string &a
 		}
 		else if (VERBOSE > 0)
 		{
-			monomer m{*this, index, seqID, authSeqID, pdbInsCode, compoundID};
+			monomer m{ *this, index, seqID, authSeqID, pdbInsCode, compoundID };
 			std::cerr << "Dropping alternate residue " << m << '\n';
 		}
 	}
@@ -984,7 +1034,6 @@ sugar::sugar(sugar &&rhs)
 	: residue(std::forward<residue>(rhs))
 	, m_branch(rhs.m_branch)
 {
-
 }

 sugar &sugar::operator=(sugar &&rhs)
@@ -1048,19 +1097,19 @@ cif::mm::atom sugar::add_atom(row_initializer atom_info)

 	auto atom_id = atom_site.get_unique_id("");

-	atom_info.set_value({"group_PDB", "HETATM"});
-	atom_info.set_value({"id", atom_id});
-	atom_info.set_value({"label_entity_id", m_branch->get_entity_id()});
-	atom_info.set_value({"label_asym_id", m_branch->get_asym_id()});
-	atom_info.set_value({"label_comp_id", m_compound_id});
-	atom_info.set_value({"label_seq_id", "."});
-	atom_info.set_value({"label_alt_id", "."});
-	atom_info.set_value({"auth_asym_id", m_branch->get_asym_id()});
-	atom_info.set_value({"auth_comp_id", m_compound_id});
-	atom_info.set_value({"auth_seq_id", m_auth_seq_id});
-	atom_info.set_value({"occupancy", 1.0, 2});
-	atom_info.set_value({"B_iso_or_equiv", 30.0, 2});
-	atom_info.set_value({"pdbx_PDB_model_num", 1});
+	atom_info.set_value({ "group_PDB", "HETATM" });
+	atom_info.set_value({ "id", atom_id });
+	atom_info.set_value({ "label_entity_id", m_branch->get_entity_id() });
+	atom_info.set_value({ "label_asym_id", m_branch->get_asym_id() });
+	atom_info.set_value({ "label_comp_id", m_compound_id });
+	atom_info.set_value({ "label_seq_id", "." });
+	atom_info.set_value({ "label_alt_id", "." });
+	atom_info.set_value({ "auth_asym_id", m_branch->get_asym_id() });
+	atom_info.set_value({ "auth_comp_id", m_compound_id });
+	atom_info.set_value({ "auth_seq_id", m_auth_seq_id });
+	atom_info.set_value({ "occupancy", 1.0, 2 });
+	atom_info.set_value({ "B_iso_or_equiv", 30.0, 2 });
+	atom_info.set_value({ "pdbx_PDB_model_num", 1 });

 	auto row = atom_site.emplace(std::move(atom_info));
 	auto result = m_structure->emplace_atom(db, row);
@@ -1118,7 +1167,7 @@ void branch::link_atoms()
 		auto entity_id = front().get_entity_id();

 		for (const auto &[num1, num2, atom1, atom2] : branch_link.find<std::size_t, std::size_t, std::string, std::string>(
-				"entity_id"_key == entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"))
+				 "entity_id"_key == entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"))
 		{
 			// if (not iequals(atom1, "c1"))
 			// 	throw std::runtime_error("invalid pdbx_entity_branch_link");
@@ -1133,10 +1182,11 @@ void branch::link_atoms()

 sugar &branch::get_sugar_by_num(int nr)
 {
-	auto i = find_if(begin(), end(), [nr](const sugar &s) { return s.num() == nr; });
+	auto i = find_if(begin(), end(), [nr](const sugar &s)
+		{ return s.num() == nr; });
 	if (i == end())
 		throw std::out_of_range("Sugar with num " + std::to_string(nr) + " not found in branch " + m_asym_id);
-	
+
 	return *i;
 }

@@ -1157,32 +1207,29 @@ sugar &branch::construct_sugar(const std::string &compound_id)
 	auto r = chemComp.find(key("id") == compound_id);
 	if (r.empty())
 	{
-		chemComp.emplace({
-			{"id", compound_id},
-			{"name", compound->name()},
-			{"formula", compound->formula()},
-			{"formula_weight", compound->formula_weight()},
-			{"type", compound->type()}});
+		chemComp.emplace({ { "id", compound_id },
+			{ "name", compound->name() },
+			{ "formula", compound->formula() },
+			{ "formula_weight", compound->formula_weight() },
+			{ "type", compound->type() } });
 	}

 	sugar &result = emplace_back(*this, compound_id, m_asym_id, static_cast<int>(size() + 1));

-	db["pdbx_branch_scheme"].emplace({
-		{"asym_id", result.get_asym_id()},
-		{"entity_id", result.get_entity_id()},
-		{"num", result.num()},
-		{"mon_id", result.get_compound_id()},
+	db["pdbx_branch_scheme"].emplace({ { "asym_id", result.get_asym_id() },
+		{ "entity_id", result.get_entity_id() },
+		{ "num", result.num() },
+		{ "mon_id", result.get_compound_id() },

-		{"pdb_asym_id", result.get_asym_id()},
-		{"pdb_seq_num", result.num()},
-		{"pdb_mon_id", result.get_compound_id()},
+		{ "pdb_asym_id", result.get_asym_id() },
+		{ "pdb_seq_num", result.num() },
+		{ "pdb_mon_id", result.get_compound_id() },

-		{"auth_asym_id", result.get_auth_asym_id()},
-		{"auth_mon_id", result.get_compound_id()},
-		{"auth_seq_num", result.get_auth_seq_id()},
+		{ "auth_asym_id", result.get_auth_asym_id() },
+		{ "auth_mon_id", result.get_compound_id() },
+		{ "auth_seq_num", result.get_auth_seq_id() },

-		{"hetero", "n"}
-	});
+		{ "hetero", "n" } });

 	return result;
 }
@@ -1200,19 +1247,17 @@ sugar &branch::construct_sugar(const std::string &compound_id, const std::string
 	auto &pdbx_entity_branch_link = db["pdbx_entity_branch_link"];
 	auto linkID = pdbx_entity_branch_link.get_unique_id("");

-	db["pdbx_entity_branch_link"].emplace({
-		{ "link_id", linkID },
+	db["pdbx_entity_branch_link"].emplace({ { "link_id", linkID },
 		{ "entity_id", get_entity_id() },
-		{ "entity_branch_list_num_1", result.num() }, 
-		{ "comp_id_1", compound_id }, 
+		{ "entity_branch_list_num_1", result.num() },
+		{ "comp_id_1", compound_id },
 		{ "atom_id_1", atom_id },
-		{ "leaving_atom_id_1", "O1" }, 	/// TODO: Need to fix this!
-		{ "entity_branch_list_num_2", linked.num() }, 
-		{ "comp_id_2", linked.get_compound_id() }, 
-		{ "atom_id_2", linked_atom_id }, 
-		{ "leaving_atom_id_2", "." }, 
-		{ "value_order", "sing" }
-	});
+		{ "leaving_atom_id_1", "O1" }, /// TODO: Need to fix this!
+		{ "entity_branch_list_num_2", linked.num() },
+		{ "comp_id_2", linked.get_compound_id() },
+		{ "atom_id_2", linked_atom_id },
+		{ "leaving_atom_id_2", "." },
+		{ "value_order", "sing" } });

 	return result;
 }
@@ -1321,7 +1366,7 @@ void structure::load_data()
 {
 	auto &polySeqScheme = m_db["pdbx_poly_seq_scheme"];

-	for (const auto &[asym_id, auth_asym_id, entityID] : polySeqScheme.rows<std::string,std::string,std::string>("asym_id", "pdb_strand_id", "entity_id"))
+	for (const auto &[asym_id, auth_asym_id, entityID] : polySeqScheme.rows<std::string, std::string, std::string>("asym_id", "pdb_strand_id", "entity_id"))
 	{
 		if (m_polymers.empty() or m_polymers.back().get_asym_id() != asym_id or m_polymers.back().get_entity_id() != entityID)
 			m_polymers.emplace_back(*this, entityID, asym_id, auth_asym_id);
@@ -1329,7 +1374,7 @@ void structure::load_data()

 	auto &branchScheme = m_db["pdbx_branch_scheme"];

-	for (const auto &[asym_id, entity_id] : branchScheme.rows<std::string,std::string>("asym_id", "entity_id"))
+	for (const auto &[asym_id, entity_id] : branchScheme.rows<std::string, std::string>("asym_id", "entity_id"))
 	{
 		if (m_branches.empty() or m_branches.back().get_asym_id() != asym_id)
 			m_branches.emplace_back(*this, asym_id, entity_id);
@@ -1337,8 +1382,8 @@ void structure::load_data()

 	auto &nonPolyScheme = m_db["pdbx_nonpoly_scheme"];

-	for (const auto&[asym_id, monID, pdbStrandID, pdbSeqNum, pdbInsCode] :
-			nonPolyScheme.rows<std::string,std::string,std::string,std::string,std::string>("asym_id", "mon_id", "pdb_strand_id", "pdb_seq_num", "pdb_ins_code"))
+	for (const auto &[asym_id, monID, pdbStrandID, pdbSeqNum, pdbInsCode] :
+		nonPolyScheme.rows<std::string, std::string, std::string, std::string, std::string>("asym_id", "mon_id", "pdb_strand_id", "pdb_seq_num", "pdb_ins_code"))
 		m_non_polymers.emplace_back(*this, monID, asym_id, 0, pdbStrandID, pdbSeqNum, pdbInsCode);

 	// place atoms in residues
@@ -1349,18 +1394,18 @@ void structure::load_data()
 	for (auto &poly : m_polymers)
 	{
 		for (auto &res : poly)
-			resMap[{res.get_asym_id(), res.get_seq_id(), res.get_auth_seq_id()}] = &res;
+			resMap[{ res.get_asym_id(), res.get_seq_id(), res.get_auth_seq_id() }] = &res;
 	}

 	for (auto &res : m_non_polymers)
-		resMap[{res.get_asym_id(), res.get_seq_id(), res.get_auth_seq_id()}] = &res;
+		resMap[{ res.get_asym_id(), res.get_seq_id(), res.get_auth_seq_id() }] = &res;

 	std::set<std::string> sugars;
 	for (auto &branch : m_branches)
 	{
 		for (auto &sugar : branch)
 		{
-			resMap[{sugar.get_asym_id(), sugar.get_seq_id(), sugar.get_auth_seq_id()}] = &sugar;
+			resMap[{ sugar.get_asym_id(), sugar.get_seq_id(), sugar.get_auth_seq_id() }] = &sugar;
 			sugars.insert(sugar.get_compound_id());
 		}
 	}
@@ -1392,7 +1437,9 @@ void structure::load_data()
 	}

 	// what the ...
-	m_branches.erase(std::remove_if(m_branches.begin(), m_branches.end(), [](const branch &b) { return b.empty(); }), m_branches.end());
+	m_branches.erase(std::remove_if(m_branches.begin(), m_branches.end(), [](const branch &b)
+						 { return b.empty(); }),
+		m_branches.end());

 	for (auto &branch : m_branches)
 		branch.link_atoms();
@@ -1638,7 +1685,7 @@ residue &structure::get_residue(const std::string &asym_id, int seqID, const std

 	if (seqID != 0)
 		desc += "/" + std::to_string(seqID);
-	
+
 	if (not authSeqID.empty())
 		desc += "-" + authSeqID;

@@ -1684,7 +1731,7 @@ residue &structure::get_residue(const std::string &asym_id, const std::string &c

 	if (seqID != 0)
 		desc += "/" + std::to_string(seqID);
-	
+
 	if (not authSeqID.empty())
 		desc += "-" + authSeqID;

@@ -1714,12 +1761,11 @@ std::string structure::insert_compound(const std::string &compoundID, bool is_en
 	auto r = chemComp.find(key("id") == compoundID);
 	if (r.empty())
 	{
-		chemComp.emplace({
-			{"id", compoundID},
-			{"name", compound->name()},
-			{"formula", compound->formula()},
-			{"formula_weight", compound->formula_weight()},
-			{"type", compound->type()}});
+		chemComp.emplace({ { "id", compoundID },
+			{ "name", compound->name() },
+			{ "formula", compound->formula() },
+			{ "formula_weight", compound->formula_weight() },
+			{ "type", compound->type() } });
 	}

 	std::string entity_id;
@@ -1735,16 +1781,14 @@ std::string structure::insert_compound(const std::string &compoundID, bool is_en
 			auto &entity = m_db["entity"];
 			entity_id = entity.get_unique_id("");

-			entity.emplace({
-				{"id", entity_id},
-				{"type", "non-polymer"},
-				{"pdbx_description", compound->name()},
-				{"formula_weight", compound->formula_weight()}});
+			entity.emplace({ { "id", entity_id },
+				{ "type", "non-polymer" },
+				{ "pdbx_description", compound->name() },
+				{ "formula_weight", compound->formula_weight() } });

-			pdbxEntityNonpoly.emplace({
-				{"entity_id", entity_id},
-				{"name", compound->name()},
-				{"comp_id", compoundID}});
+			pdbxEntityNonpoly.emplace({ { "entity_id", entity_id },
+				{ "name", compound->name() },
+				{ "comp_id", compoundID } });
 		}
 	}

@@ -1773,7 +1817,7 @@ atom &structure::emplace_atom(atom &&atom)
 			R = i - 1;
 	}

-	if (R == -1)	// msvc... 
+	if (R == -1) // msvc...
 		m_atom_index.insert(m_atom_index.begin(), m_atoms.size());
 	else
 		m_atom_index.insert(m_atom_index.begin() + R + 1, m_atoms.size());
@@ -1829,19 +1873,15 @@ void structure::remove_atom(atom &a, bool removeFromResidue)
 		for (std::string prefix : { "ptnr1_", "ptnr2_", "pdbx_ptnr3_" })
 		{
 			if (a.get_label_seq_id() == 0)
-				cond = std::move(cond) or (
-					cif::key(prefix + "label_asym_id") == a.get_label_asym_id() and
-					cif::key(prefix + "label_seq_id") == null and
-					cif::key(prefix + "auth_seq_id") == a.get_auth_seq_id() and
-					cif::key(prefix + "label_atom_id") == a.get_label_atom_id()
-				);
+				cond = std::move(cond) or (cif::key(prefix + "label_asym_id") == a.get_label_asym_id() and
+											  cif::key(prefix + "label_seq_id") == null and
+											  cif::key(prefix + "auth_seq_id") == a.get_auth_seq_id() and
+											  cif::key(prefix + "label_atom_id") == a.get_label_atom_id());
 			else
-				cond = std::move(cond) or (
-					cif::key(prefix + "label_asym_id") == a.get_label_asym_id() and
-					cif::key(prefix + "label_seq_id") == a.get_label_seq_id() and
-					cif::key(prefix + "auth_seq_id") == a.get_auth_seq_id() and
-					cif::key(prefix + "label_atom_id") == a.get_label_atom_id()
-				);
+				cond = std::move(cond) or (cif::key(prefix + "label_asym_id") == a.get_label_asym_id() and
+											  cif::key(prefix + "label_seq_id") == a.get_label_seq_id() and
+											  cif::key(prefix + "auth_seq_id") == a.get_auth_seq_id() and
+											  cif::key(prefix + "label_atom_id") == a.get_label_atom_id());
 		}

 		if (cond)
@@ -1947,15 +1987,15 @@ void structure::change_residue(residue &res, const std::string &newCompound,
 		if (entityID.empty())
 		{
 			entityID = entity.get_unique_id("");
-			entity.emplace({{"id", entityID},
-				{"type", "non-polymer"},
-				{"pdbx_description", compound->name()},
-				{"formula_weight", compound->formula_weight()}});
+			entity.emplace({ { "id", entityID },
+				{ "type", "non-polymer" },
+				{ "pdbx_description", compound->name() },
+				{ "formula_weight", compound->formula_weight() } });

 			auto &pdbxEntityNonpoly = m_db["pdbx_entity_nonpoly"];
-			pdbxEntityNonpoly.emplace({{"entity_id", entityID},
-				{"name", compound->name()},
-				{"comp_id", newCompound}});
+			pdbxEntityNonpoly.emplace({ { "entity_id", entityID },
+				{ "name", compound->name() },
+				{ "comp_id", newCompound } });
 		}

 		auto &pdbxNonPolyScheme = m_db["pdbx_nonpoly_scheme"];
@@ -1971,11 +2011,11 @@ void structure::change_residue(residue &res, const std::string &newCompound,
 		auto &chemComp = m_db["chem_comp"];
 		if (not chemComp.contains(key("id") == newCompound))
 		{
-			chemComp.emplace({{"id", newCompound},
-				{"name", compound->name()},
-				{"formula", compound->formula()},
-				{"formula_weight", compound->formula_weight()},
-				{"type", compound->type()}});
+			chemComp.emplace({ { "id", newCompound },
+				{ "name", compound->name() },
+				{ "formula", compound->formula() },
+				{ "formula_weight", compound->formula_weight() },
+				{ "type", compound->type() } });
 		}

 		// update the struct_asym for the new entity
@@ -2105,7 +2145,7 @@ void structure::remove_residue(residue &res)

 		case EntityType::Branched:
 		{
-			auto &s = dynamic_cast<sugar&>(res);
+			auto &s = dynamic_cast<sugar &>(res);

 			remove_sugar(s);

@@ -2133,7 +2173,7 @@ void structure::remove_sugar(sugar &s)
 		throw std::runtime_error("sugar not part of branch");
 	std::size_t six = si - branch.begin();

-	if (six == 0)	// first sugar, means the death of this branch
+	if (six == 0) // first sugar, means the death of this branch
 		remove_branch(branch);
 	else
 	{
@@ -2148,7 +2188,7 @@ void structure::remove_sugar(sugar &s)

 			if (dix.count(tix))
 				continue;
-			
+
 			dix.insert(tix);

 			for (auto &s2 : branch)
@@ -2161,7 +2201,9 @@ void structure::remove_sugar(sugar &s)
 				remove_atom(atom, false);
 		}

-		branch.erase(remove_if(branch.begin(), branch.end(), [dix](const sugar &s) { return dix.count(s.num()); }), branch.end());
+		branch.erase(remove_if(branch.begin(), branch.end(), [dix](const sugar &s)
+						 { return dix.count(s.num()); }),
+			branch.end());

 		auto entity_id = create_entity_for_branch(branch);

@@ -2181,23 +2223,21 @@ void structure::remove_sugar(sugar &s)

 		for (auto &sugar : branch)
 		{
-			pdbx_branch_scheme.emplace({
-				{"asym_id", asym_id},
-				{"entity_id", entity_id},
-				{"num", sugar.num()},
-				{"mon_id", sugar.get_compound_id()},
+			pdbx_branch_scheme.emplace({ { "asym_id", asym_id },
+				{ "entity_id", entity_id },
+				{ "num", sugar.num() },
+				{ "mon_id", sugar.get_compound_id() },

-				{"pdb_asym_id", asym_id},
-				{"pdb_seq_num", sugar.num()},
-				{"pdb_mon_id", sugar.get_compound_id()},
+				{ "pdb_asym_id", asym_id },
+				{ "pdb_seq_num", sugar.num() },
+				{ "pdb_mon_id", sugar.get_compound_id() },

 				// TODO: need fix, collect from nag_atoms?
-				{"auth_asym_id", asym_id},
-				{"auth_mon_id", sugar.get_compound_id()},
-				{"auth_seq_num", sugar.get_auth_seq_id()},
+				{ "auth_asym_id", asym_id },
+				{ "auth_mon_id", sugar.get_compound_id() },
+				{ "auth_seq_num", sugar.get_auth_seq_id() },

-				{"hetero", "n"}
-			});
+				{ "hetero", "n" } });
 		}
 	}
 }
@@ -2232,13 +2272,11 @@ std::string structure::create_non_poly(const std::string &entity_id, const std::
 	auto &struct_asym = m_db["struct_asym"];
 	std::string asym_id = struct_asym.get_unique_id();

-	struct_asym.emplace({
-		{"id", asym_id},
-		{"pdbx_blank_PDB_chainid_flag", "N"},
-		{"pdbx_modified", "N"},
-		{"entity_id", entity_id},
-		{"details", "?"}
-	});
+	struct_asym.emplace({ { "id", asym_id },
+		{ "pdbx_blank_PDB_chainid_flag", "N" },
+		{ "pdbx_modified", "N" },
+		{ "entity_id", entity_id },
+		{ "details", "?" } });

 	std::string comp_id = m_db["pdbx_entity_nonpoly"].find1<std::string>("entity_id"_key == entity_id, "comp_id");

@@ -2250,29 +2288,27 @@ std::string structure::create_non_poly(const std::string &entity_id, const std::
 	{
 		auto atom_id = atom_site.get_unique_id("");

-		auto row = atom_site.emplace({
-			{"group_PDB", atom.get_property("group_PDB")},
-			{"id", atom_id},
-			{"type_symbol", atom.get_property("type_symbol")},
-			{"label_atom_id", atom.get_property("label_atom_id")},
-			{"label_alt_id", atom.get_property("label_alt_id")},
-			{"label_comp_id", comp_id},
-			{"label_asym_id", asym_id},
-			{"label_entity_id", entity_id},
-			{"label_seq_id", "."},
-			{"pdbx_PDB_ins_code", ""},
-			{"Cartn_x", atom.get_property("Cartn_x")},
-			{"Cartn_y", atom.get_property("Cartn_y")},
-			{"Cartn_z", atom.get_property("Cartn_z")},
-			{"occupancy", atom.get_property("occupancy")},
-			{"B_iso_or_equiv", atom.get_property("B_iso_or_equiv")},
-			{"pdbx_formal_charge", atom.get_property("pdbx_formal_charge")},
-			{"auth_seq_id", 1},
-			{"auth_comp_id", comp_id},
-			{"auth_asym_id", asym_id},
-			{"auth_atom_id", atom.get_property("label_atom_id")},
-			{"pdbx_PDB_model_num", 1}
-		});
+		auto row = atom_site.emplace({ { "group_PDB", atom.get_property("group_PDB") },
+			{ "id", atom_id },
+			{ "type_symbol", atom.get_property("type_symbol") },
+			{ "label_atom_id", atom.get_property("label_atom_id") },
+			{ "label_alt_id", atom.get_property("label_alt_id") },
+			{ "label_comp_id", comp_id },
+			{ "label_asym_id", asym_id },
+			{ "label_entity_id", entity_id },
+			{ "label_seq_id", "." },
+			{ "pdbx_PDB_ins_code", "" },
+			{ "Cartn_x", atom.get_property("Cartn_x") },
+			{ "Cartn_y", atom.get_property("Cartn_y") },
+			{ "Cartn_z", atom.get_property("Cartn_z") },
+			{ "occupancy", atom.get_property("occupancy") },
+			{ "B_iso_or_equiv", atom.get_property("B_iso_or_equiv") },
+			{ "pdbx_formal_charge", atom.get_property("pdbx_formal_charge") },
+			{ "auth_seq_id", 1 },
+			{ "auth_comp_id", comp_id },
+			{ "auth_asym_id", asym_id },
+			{ "auth_atom_id", atom.get_property("label_atom_id") },
+			{ "pdbx_PDB_model_num", 1 } });

 		auto &newAtom = emplace_atom(std::make_shared<atom::atom_impl>(m_db, atom_id));
 		res.add_atom(newAtom);
@@ -2281,16 +2317,16 @@ std::string structure::create_non_poly(const std::string &entity_id, const std::
 	auto &pdbx_nonpoly_scheme = m_db["pdbx_nonpoly_scheme"];
 	std::size_t ndb_nr = pdbx_nonpoly_scheme.find("asym_id"_key == asym_id and "entity_id"_key == entity_id).size() + 1;
 	pdbx_nonpoly_scheme.emplace({
-		{"asym_id", asym_id},
-		{"entity_id", entity_id},
-		{"mon_id", comp_id},
-		{"ndb_seq_num", ndb_nr},
-		{"pdb_seq_num", res.get_auth_seq_id()},
-		{"auth_seq_num", res.get_auth_seq_id()},
-		{"pdb_mon_id", comp_id},
-		{"auth_mon_id", comp_id},
-		{"pdb_strand_id", asym_id},
-		{"pdb_ins_code", "."},
+		{ "asym_id", asym_id },
+		{ "entity_id", entity_id },
+		{ "mon_id", comp_id },
+		{ "ndb_seq_num", ndb_nr },
+		{ "pdb_seq_num", res.get_auth_seq_id() },
+		{ "auth_seq_num", res.get_auth_seq_id() },
+		{ "pdb_mon_id", comp_id },
+		{ "auth_mon_id", comp_id },
+		{ "pdb_strand_id", asym_id },
+		{ "pdb_ins_code", "." },
 	});

 	return asym_id;
@@ -2303,13 +2339,11 @@ std::string structure::create_non_poly(const std::string &entity_id, std::vector
 	auto &struct_asym = m_db["struct_asym"];
 	std::string asym_id = struct_asym.get_unique_id();

-	struct_asym.emplace({
-		{"id", asym_id},
-		{"pdbx_blank_PDB_chainid_flag", "N"},
-		{"pdbx_modified", "N"},
-		{"entity_id", entity_id},
-		{"details", "?"}
-	});
+	struct_asym.emplace({ { "id", asym_id },
+		{ "pdbx_blank_PDB_chainid_flag", "N" },
+		{ "pdbx_modified", "N" },
+		{ "entity_id", entity_id },
+		{ "details", "?" } });

 	std::string comp_id = m_db["pdbx_entity_nonpoly"].find1<std::string>("entity_id"_key == entity_id, "comp_id");

@@ -2326,14 +2360,14 @@ std::string structure::create_non_poly(const std::string &entity_id, std::vector
 		atom.set_value("auth_asym_id", asym_id);
 		atom.set_value("label_entity_id", entity_id);

-		atom.set_value_if_empty({"group_PDB", "HETATM"});
-		atom.set_value_if_empty({"label_comp_id", comp_id});
-		atom.set_value_if_empty({"label_seq_id", "."});
-		atom.set_value_if_empty({"auth_comp_id", comp_id});
-		atom.set_value_if_empty({"auth_seq_id", 1});
-		atom.set_value_if_empty({"pdbx_PDB_model_num", 1});
-		atom.set_value_if_empty({"label_alt_id", ""});
-		atom.set_value_if_empty({"occupancy", 1.0, 2});
+		atom.set_value_if_empty({ "group_PDB", "HETATM" });
+		atom.set_value_if_empty({ "label_comp_id", comp_id });
+		atom.set_value_if_empty({ "label_seq_id", "." });
+		atom.set_value_if_empty({ "auth_comp_id", comp_id });
+		atom.set_value_if_empty({ "auth_seq_id", 1 });
+		atom.set_value_if_empty({ "pdbx_PDB_model_num", 1 });
+		atom.set_value_if_empty({ "label_alt_id", "" });
+		atom.set_value_if_empty({ "occupancy", 1.0, 2 });

 		auto row = atom_site.emplace(atom.begin(), atom.end());

@@ -2344,16 +2378,16 @@ std::string structure::create_non_poly(const std::string &entity_id, std::vector
 	auto &pdbx_nonpoly_scheme = m_db["pdbx_nonpoly_scheme"];
 	std::size_t ndb_nr = pdbx_nonpoly_scheme.find("asym_id"_key == asym_id and "entity_id"_key == entity_id).size() + 1;
 	pdbx_nonpoly_scheme.emplace({
-		{"asym_id", asym_id},
-		{"entity_id", entity_id},
-		{"mon_id", comp_id},
-		{"ndb_seq_num", ndb_nr},
-		{"pdb_seq_num", res.get_auth_seq_id()},
-		{"auth_seq_num", res.get_auth_seq_id()},
-		{"pdb_mon_id", comp_id},
-		{"auth_mon_id", comp_id},
-		{"pdb_strand_id", asym_id},
-		{"pdb_ins_code", "."},
+		{ "asym_id", asym_id },
+		{ "entity_id", entity_id },
+		{ "mon_id", comp_id },
+		{ "ndb_seq_num", ndb_nr },
+		{ "pdb_seq_num", res.get_auth_seq_id() },
+		{ "auth_seq_num", res.get_auth_seq_id() },
+		{ "pdb_mon_id", comp_id },
+		{ "auth_mon_id", comp_id },
+		{ "pdb_strand_id", asym_id },
+		{ "pdb_ins_code", "." },
 	});

 	return asym_id;
@@ -2375,15 +2409,13 @@ void structure::create_water(row_initializer atom)
 	{
 		asym_id = struct_asym.get_unique_id();

-		struct_asym.emplace({
-			{"id", asym_id},
-			{"pdbx_blank_PDB_chainid_flag", "N"},
-			{"pdbx_modified", "N"},
-			{"entity_id", entity_id},
-			{"details", "?"}
-		});
+		struct_asym.emplace({ { "id", asym_id },
+			{ "pdbx_blank_PDB_chainid_flag", "N" },
+			{ "pdbx_modified", "N" },
+			{ "entity_id", entity_id },
+			{ "details", "?" } });
 	}
-	
+
 	auto &atom_site = m_db["atom_site"];
 	auto auth_seq_id = atom_site.find_max<int>("auth_seq_id", "label_entity_id"_key == entity_id) + 1;
 	if (auth_seq_id < 0)
@@ -2397,13 +2429,13 @@ void structure::create_water(row_initializer atom)
 	atom.set_value("label_entity_id", entity_id);
 	atom.set_value("auth_seq_id", std::to_string(auth_seq_id));

-	atom.set_value_if_empty({"group_PDB", "HETATM"});
-	atom.set_value_if_empty({"label_comp_id", "HOH"});
-	atom.set_value_if_empty({"label_seq_id", "."});
-	atom.set_value_if_empty({"auth_comp_id", "HOH"});
-	atom.set_value_if_empty({"pdbx_PDB_model_num", 1});
-	atom.set_value_if_empty({"label_alt_id", ""});
-	atom.set_value_if_empty({"occupancy", 1.0, 2});
+	atom.set_value_if_empty({ "group_PDB", "HETATM" });
+	atom.set_value_if_empty({ "label_comp_id", "HOH" });
+	atom.set_value_if_empty({ "label_seq_id", "." });
+	atom.set_value_if_empty({ "auth_comp_id", "HOH" });
+	atom.set_value_if_empty({ "pdbx_PDB_model_num", 1 });
+	atom.set_value_if_empty({ "label_alt_id", "" });
+	atom.set_value_if_empty({ "occupancy", 1.0, 2 });

 	auto row = atom_site.emplace(atom.begin(), atom.end());

@@ -2412,16 +2444,16 @@ void structure::create_water(row_initializer atom)
 	auto &pdbx_nonpoly_scheme = m_db["pdbx_nonpoly_scheme"];
 	int ndb_nr = pdbx_nonpoly_scheme.find_max<int>("ndb_seq_num") + 1;
 	pdbx_nonpoly_scheme.emplace({
-		{"asym_id", asym_id},
-		{"entity_id", entity_id},
-		{"mon_id", "HOH"},
-		{"ndb_seq_num", ndb_nr},
-		{"pdb_seq_num", auth_seq_id},
-		{"auth_seq_num", auth_seq_id},
-		{"pdb_mon_id", "HOH"},
-		{"auth_mon_id", "HOH"},
-		{"pdb_strand_id", asym_id},
-		{"pdb_ins_code", "."},
+		{ "asym_id", asym_id },
+		{ "entity_id", entity_id },
+		{ "mon_id", "HOH" },
+		{ "ndb_seq_num", ndb_nr },
+		{ "pdb_seq_num", auth_seq_id },
+		{ "auth_seq_num", auth_seq_id },
+		{ "pdb_mon_id", "HOH" },
+		{ "auth_mon_id", "HOH" },
+		{ "pdb_strand_id", asym_id },
+		{ "pdb_ins_code", "." },
 	});
 }

@@ -2433,18 +2465,14 @@ branch &structure::create_branch()
 	auto entity_id = entity.get_unique_id("");
 	auto asym_id = struct_asym.get_unique_id();

-	entity.emplace({
-		{"id", entity_id},
-		{"type", "branched"}
-	});
+	entity.emplace({ { "id", entity_id },
+		{ "type", "branched" } });

-	struct_asym.emplace({
-		{"id", asym_id},
-		{"pdbx_blank_PDB_chainid_flag", "N"},
-		{"pdbx_modified", "N"},
-		{"entity_id", entity_id},
-		{"details", "?"}
-	});
+	struct_asym.emplace({ { "id", asym_id },
+		{ "pdbx_blank_PDB_chainid_flag", "N" },
+		{ "pdbx_modified", "N" },
+		{ "entity_id", entity_id },
+		{ "details", "?" } });

 	return m_branches.emplace_back(*this, asym_id, entity_id);
 }
@@ -2641,22 +2669,19 @@ std::string structure::create_entity_for_branch(branch &branch)
 		if (VERBOSE)
 			std::cout << "Creating new entity " << entityID << " for branched sugar " << entityName << '\n';

-		entity.emplace({
-			{"id", entityID},
-			{"type", "branched"},
-			{"src_method", "man"},
-			{"pdbx_description", entityName},
-			{"formula_weight", branch.weight()}});
+		entity.emplace({ { "id", entityID },
+			{ "type", "branched" },
+			{ "src_method", "man" },
+			{ "pdbx_description", entityName },
+			{ "formula_weight", branch.weight() } });

 		auto &pdbx_entity_branch_list = m_db["pdbx_entity_branch_list"];
 		for (auto &sugar : branch)
 		{
-			pdbx_entity_branch_list.emplace({
-				{"entity_id", entityID},
-				{"comp_id", sugar.get_compound_id()},
-				{"num", sugar.num()},
-				{"hetero", "n"}
-			});
+			pdbx_entity_branch_list.emplace({ { "entity_id", entityID },
+				{ "comp_id", sugar.get_compound_id() },
+				{ "num", sugar.num() },
+				{ "hetero", "n" } });
 		}

 		auto &pdbx_entity_branch_link = m_db["pdbx_entity_branch_link"];
@@ -2670,19 +2695,17 @@ std::string structure::create_entity_for_branch(branch &branch)
 			auto &s2 = branch.at(stoi(l2.get_auth_seq_id()) - 1);
 			auto l1 = s2.get_atom_by_atom_id("C1");

-			pdbx_entity_branch_link.emplace({
-				{"link_id", pdbx_entity_branch_link.get_unique_id("")},
-				{"entity_id", entityID},
-				{"entity_branch_list_num_1", s1.get_auth_seq_id()},
-				{"comp_id_1", s1.get_compound_id()},
-				{"atom_id_1", l1.get_label_atom_id()},
-				{"leaving_atom_id_1", "O1"},
-				{"entity_branch_list_num_2", s2.get_auth_seq_id()},
-				{"comp_id_2", s2.get_compound_id()},
-				{"atom_id_2", l2.get_label_atom_id()},
-				{"leaving_atom_id_2", "H" + l2.get_label_atom_id()},
-				{"value_order", "sing"}
-			});
+			pdbx_entity_branch_link.emplace({ { "link_id", pdbx_entity_branch_link.get_unique_id("") },
+				{ "entity_id", entityID },
+				{ "entity_branch_list_num_1", s1.get_auth_seq_id() },
+				{ "comp_id_1", s1.get_compound_id() },
+				{ "atom_id_1", l1.get_label_atom_id() },
+				{ "leaving_atom_id_1", "O1" },
+				{ "entity_branch_list_num_2", s2.get_auth_seq_id() },
+				{ "comp_id_2", s2.get_compound_id() },
+				{ "atom_id_2", l2.get_label_atom_id() },
+				{ "leaving_atom_id_2", "H" + l2.get_label_atom_id() },
+				{ "value_order", "sing" } });
 		}
 	}

@@ -2725,12 +2748,26 @@ void structure::cleanup_empty_categories()
 		obsoleteEntities.push_back(entity);
 	}

+	auto validator = m_db.get_validator();
+
 	for (auto entity : obsoleteEntities)
+	{
+		std::string entityID = entity["id"].as<std::string>();
+		if (validator)
+		{
+			for (auto linked : validator->get_links_for_parent("entity"))
+			{
+				if (auto cat = m_db.get(linked->m_child_category))
+					cat->erase(cif::key(linked->m_child_keys.front()) == entityID);
+			}
+		}
+
 		entities.erase(entity);
+	}

 	// the rest?

-	for (const char *cat : {"pdbx_entity_nonpoly"})
+	for (const char *cat : { "pdbx_entity_nonpoly" })
 	{
 		auto &category = m_db[cat];

@@ -2836,4 +2873,89 @@ void structure::validate_atoms() const
 	assert(atoms.empty());
 }

-} // namespace pdbx
+static int compare_numbers(std::string_view a, std::string_view b)
+{
+	int result = 0;
+	double da, db;
+
+	using namespace cif;
+	using namespace std;
+
+	std::from_chars_result ra, rb;
+
+	ra = selected_charconv<double>::from_chars(a.data(), a.data() + a.length(), da);
+	rb = selected_charconv<double>::from_chars(b.data(), b.data() + b.length(), db);
+
+	if (not(bool) ra.ec and not(bool) rb.ec)
+	{
+		auto d = da - db;
+		if (std::abs(d) > std::numeric_limits<double>::epsilon())
+		{
+			if (d > 0)
+				result = 1;
+			else if (d < 0)
+				result = -1;
+		}
+	}
+	else if ((bool)ra.ec)
+		result = 1;
+	else
+		result = -1;
+
+	return result;
+}
+
+void structure::reorder_atoms()
+{
+	auto &atom_site = m_db["atom_site"];
+
+	atom_site.sort([](row_handle a, row_handle b)
+		{
+			int d;
+
+			// First by model number
+			d = a.get<int>("pdbx_PDB_model_num") - b.get<int>("pdbx_PDB_model_num");
+			if (d == 0)
+				d = a.get<std::string>("label_asym_id").compare(b.get<std::string>("label_asym_id"));
+			if (d == 0)
+			{
+				auto na = a.get<std::optional<int>>("label_seq_id");
+				auto nb = b.get<std::optional<int>>("label_seq_id");
+
+				if (na.has_value() and nb.has_value())
+					d = *na - *nb;
+				else if (na.has_value())
+					d = 1;
+				else if (nb.has_value())
+					d = -1;
+			}
+
+			if (d == 0)
+			{
+				auto na = a.get<std::optional<int>>("auth_seq_id");
+				auto nb = b.get<std::optional<int>>("auth_seq_id");
+
+				if (na.has_value() and nb.has_value())
+					d = *na - *nb;
+				else if (na.has_value())
+					d = 1;
+				else if (nb.has_value())
+					d = -1;
+			}
+
+			if (d == 0)
+				d = compare_numbers(a.get<std::string>("id"), b.get<std::string>("id"));
+
+			return d;
+			//
+		});
+
+	// atom_site.set_validator(nullptr, m_db);
+
+	// for (int nr = 1; auto r : atom_site)
+	// 	r["id"] = nr++;
+
+	// atom_site.set_validator(m_db.get_validator(), m_db);
+}
+
+} // namespace cif::mm
--- a/src/pdb/reconstruct.cpp
+++ b/src/pdb/reconstruct.cpp
@@ -144,9 +144,11 @@ void checkEntities(datablock &db)
 			if (comp_id.has_value())
 			{
 				auto compound = cf.create(*comp_id);
-				assert(compound);
 				if (not compound)
-					throw std::runtime_error("missing information for compound " + *comp_id);
+				{
+					std::cerr << "missing information for compound " << *comp_id << "\n";
+					continue;
+				}
 				formula_weight = compound->formula_weight();
 			}
 		}
@@ -416,6 +418,8 @@ void checkAtomRecords(datablock &db)
 	for (int id : db["entity"].find<int>("type"_key == "polymer", "id"))
 		polymer_entities.insert(id);

+	std::set<std::string> missingCompounds;
+
 	for (auto row : atom_site)
 	{
 		residue_key_type k = row.get<std::optional<std::string>,
@@ -446,11 +450,18 @@ void checkAtomRecords(datablock &db)
 		std::string asym_id = get_asym_id(k);
 		std::string comp_id = get_comp_id(k);

+		if (missingCompounds.contains(comp_id))
+			continue;
+
 		bool is_polymer = polymer_entities.contains(row["label_entity_id"].as<int>());
 		auto compound = cf.create(comp_id);

 		if (not compound)
-			throw std::runtime_error("Missing compound information for " + comp_id);
+		{
+			missingCompounds.insert(comp_id);
+			std::cerr << "Missing compound information for " << comp_id << "\n";
+			continue;
+		}

 		auto chem_comp_entry = chem_comp.find_first("id"_key == comp_id);

@@ -590,18 +601,18 @@ void checkAtomAnisotropRecords(datablock &db)
 			row["type_symbol"] = parent["type_symbol"].text();
 		}

-		if (row["pdbx_auth_alt_id"].empty())
+		if (row["pdbx_auth_alt_id"].empty() and not parent["pdbx_auth_alt_id"].empty())
 			row["pdbx_auth_alt_id"] = parent["pdbx_auth_alt_id"].text();
-		if (row["pdbx_label_seq_id"].empty())
+		if (row["pdbx_label_seq_id"].empty() and not parent["pdbx_label_seq_id"].empty())
 			row["pdbx_label_seq_id"] = parent["label_seq_id"].text();
-		if (row["pdbx_label_asym_id"].empty())
+		if (row["pdbx_label_asym_id"].empty() and not parent["pdbx_label_asym_id"].empty())
 			row["pdbx_label_asym_id"] = parent["label_asym_id"].text();
-		if (row["pdbx_label_atom_id"].empty())
+		if (row["pdbx_label_atom_id"].empty() and not parent["pdbx_label_atom_id"].empty())
 			row["pdbx_label_atom_id"] = parent["label_atom_id"].text();
-		if (row["pdbx_label_comp_id"].empty())
+		if (row["pdbx_label_comp_id"].empty() and not parent["pdbx_label_comp_id"].empty())
 			row["pdbx_label_comp_id"] = parent["label_comp_id"].text();
-		if (row["pdbx_PDB_model_num"].empty())
-			row["pdbx_PDB_model_num"] = parent["pdbx_PDB_model_num"].text();
+		// if (row["pdbx_PDB_model_num"].empty() and not parent["pdbx_PDB_model_num"].empty())
+		// 	row["pdbx_PDB_model_num"] = parent["pdbx_PDB_model_num"].text();
 	}

 	if (not to_be_deleted.empty())
@@ -811,6 +822,18 @@ void createEntityPoly(datablock &db)

 					non_std_monomer = true;
 				}
+				else
+				{
+					// c_type = "other";
+
+					letter_can = c->one_letter_code();
+					if (letter_can == 0)
+						letter_can = 'X';
+
+					letter = '(' + comp_id + ')';
+
+					non_std_monomer = true;
+				}

 				if (type.empty())
 					type = c_type;
@@ -877,7 +900,7 @@ void createEntityPoly(datablock &db)

 void createEntityPolySeq(datablock &db)
 {
-	if (db.get("entity_poly") == nullptr)
+	if (auto cat = db.get("entity_poly"); cat == nullptr or cat->empty())
 		createEntityPoly(db);

 	using namespace literals;
@@ -928,7 +951,10 @@ void createEntityPolySeq(datablock &db)

 void createPdbxPolySeqScheme(datablock &db)
 {
-	if (db.get("entity_poly_seq") == nullptr)
+	if (auto cat = db.get("entity_poly"); cat == nullptr or cat->empty())
+		createEntityPoly(db);
+
+	if (auto cat = db.get("entity_poly_seq"); cat == nullptr or cat->empty())
 		createEntityPolySeq(db);

 	using namespace literals;
@@ -1065,7 +1091,7 @@ bool reconstruct_pdbx(file &file, std::string_view dictionary)
 	// ... and any additional datablock will contain compound information
 	cif::compound_source cs(file);

-	if (db.get("atom_site") == nullptr)
+	if (auto cat = db.get("atom_site"); cat == nullptr or cat->empty())
 		throw std::runtime_error("Cannot reconstruct PDBx file, atom data missing");

 	auto &validator = validator_factory::instance()[dictionary];
@@ -1073,7 +1099,7 @@ bool reconstruct_pdbx(file &file, std::string_view dictionary)
 	std::string entry_id;

 	// Phenix files do not have an entry record
-	if (db.get("entry") == nullptr)
+	if (auto cat = db.get("entry"); cat == nullptr or cat->empty())
 	{
 		entry_id = db.name();
 		category entry("entry");
@@ -1327,19 +1353,19 @@ bool reconstruct_pdbx(file &file, std::string_view dictionary)

 	// Now create any missing categories
 	// Next make sure we have struct_asym records
-	if (db.get("struct_asym") == nullptr)
+	if (auto cat = db.get("struct_asym"); cat == nullptr or cat->empty())
 		createStructAsym(db);

-	if (db.get("entity") == nullptr)
+	if (auto cat = db.get("entity"); cat == nullptr or cat->empty())
 		createEntity(db);

 	// fill in missing formula_weight, e.g.
 	checkEntities(db);

-	if (db.get("pdbx_poly_seq_scheme") == nullptr)
+	if (auto cat = db.get("pdbx_poly_seq_scheme"); cat == nullptr or cat->empty())
 		createPdbxPolySeqScheme(db);

-	if (db.get("ndb_poly_seq_scheme") != nullptr)
+	if (auto cat = db.get("ndb_poly_seq_scheme"); cat == nullptr or cat->empty())
 		comparePolySeqSchemes(db);

 	// skip unknown categories for now

--- a/src/pdb/validate-pdbx.cpp
+++ b/src/pdb/validate-pdbx.cpp
@@ -189,6 +189,7 @@ bool is_valid_pdbx_file(const file &file, std::string_view dictionary, std::erro
 				for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
 				{
 					if (pdbx_poly_seq_scheme.count(
+							"entity_id"_key == entity_id and
 							"asym_id"_key == asym_id and
 							"mon_id"_key == mon_id and
 							"seq_id"_key == num and
@@ -202,6 +203,7 @@ bool is_valid_pdbx_file(const file &file, std::string_view dictionary, std::erro
 			for (const auto &[seq_id, mon_id, hetero] : pdbx_poly_seq_scheme.find<int, std::string, bool>("entity_id"_key == entity_id, "seq_id", "mon_id", "hetero"))
 			{
 				if (entity_poly_seq.count(
+						"entity_id"_key == entity_id and
 						"mon_id"_key == mon_id and
 						"num"_key == seq_id and
 						"hetero"_key == hetero) != 1)

--- a/src/utilities.cpp
+++ b/src/utilities.cpp
@@ -235,7 +235,7 @@ void progress_bar_impl::print_progress()
 	float progress = static_cast<float>(m_consumed) / m_max_value;
 	
 	if (width < kMinBarWidth)
-		std::cout << (100 * progress) << '%' << std::endl;
+		std::cout << (100 * progress) << "%\n";
 	else
 	{
 		uint32_t bar_width = 7 * width / 10;
@@ -329,7 +329,7 @@ void progress_bar_impl::print_done()
 	if (msg.length() < width)
 		msg += std::string(width - msg.length(), ' ');

-	std::cout << '\r' << msg << std::endl;
+	std::cout << '\r' << msg << '\n';
 }

 progress_bar::progress_bar(int64_t inMax, const std::string &inAction)

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
 # We're using the older version 2 of Catch2

-find_package(Catch2 QUIET)
+if(NOT(Catch2_FOUND OR TARGET Catch2))
+	find_package(Catch2 QUIET)

-if(NOT Catch2_FOUND)
-	FetchContent_Declare(
-		Catch2
-		GIT_REPOSITORY https://github.com/catchorg/Catch2.git
-		GIT_TAG v2.13.9)
+	if(NOT Catch2_FOUND)
+		include(FetchContent)

-	FetchContent_MakeAvailable(Catch2)
+		FetchContent_Declare(
+			Catch2
+			GIT_REPOSITORY https://github.com/catchorg/Catch2.git
+			GIT_TAG v2.13.9)

-	set(Catch2_VERSION "2.13.9")
+		FetchContent_MakeAvailable(Catch2)
+
+		set(Catch2_VERSION "2.13.9")
+	endif()
 endif()

 list(
@@ -49,8 +53,7 @@ foreach(CIFPP_TEST IN LISTS CIFPP_tests)
 		target_compile_definitions(${CIFPP_TEST} PUBLIC CATCH22=1)
 	endif()

-	target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp::cifpp
-		Catch2::Catch2)
+	target_link_libraries(${CIFPP_TEST} PRIVATE cifpp::cifpp Catch2::Catch2)
 	target_include_directories(${CIFPP_TEST} PRIVATE "${EIGEN_INCLUDE_DIR}")

 	if(MSVC)

--- a/test/unit-3d-test.cpp
+++ b/test/unit-3d-test.cpp
@@ -157,7 +157,7 @@ TEST_CASE("dh_q_0")
 	};

 	auto a = cif::dihedral_angle(t[0], t[1], t[2], p);
-	REQUIRE_THAT(a, Catch::Matchers::WithinRel(0, 0.01f));
+	REQUIRE_THAT(a, Catch::Matchers::WithinRel(0.f, 0.01f));

 	auto q = cif::construct_from_angle_axis(90, axis);