backup

fd08678f · Maarten L. Hekkelman · 2e2fc11f · fd08678f · fd08678f · fd08678f
Commit fd08678f authored Apr 20, 2021 by Maarten L. Hekkelman
Hide whitespace changes
Inline Side-by-side

Showing with 375 additions and 40 deletions

include/cif++/Cif++.hpp
+15 -1

src/Cif++.cpp
+40 -4

src/Structure.cpp
+5 -35

test/rename-compound-test.cpp
+30 -0

test/unit-test.cpp
+285 -0

No files found.
--- a/include/cif++/Cif++.hpp
+++ b/include/cif++/Cif++.hpp
@@ -725,6 +725,7 @@ class Row
 	}
 	
 	void assign(const std::vector<Item>& values);
+	void assign(const std::string& name, const std::string& value, bool updateLinked);

 	bool operator==(const Row& rhs) const
 	{
@@ -747,7 +748,6 @@ class Row

  private:

-	void assign(const std::string& name, const std::string& value, bool updateLinked);
 	void assign(size_t column, const std::string& value, bool updateLinked);
 	void assign(const Item& i, bool updateLinked);
 	
@@ -1397,6 +1397,7 @@ class iterator_proxy
 	size_t size() const				{ return std::distance(begin(), end()); }

 	RowType front()					{ return *begin(); }
+	RowType back()					{ return *(std::prev(end())); }

 	Category& category() const		{ return *mCat;}

@@ -1882,6 +1883,19 @@ class Category
 	void sort(std::function<int(const Row&, const Row&)> comparator);

 	// --------------------------------------------------------------------
+	/// Rename a single column in the rows that match \a cond to value \a value
+	/// making sure the linked categories are updated according to the link.
+	/// That means, child categories are updated if the links are absolute
+	/// and unique. If they are not, the child category rows are split.
+
+	void update_value(Condition &&cond, const std::string &tag, const std::string &value)
+	{
+		update_value(RowSet{ *this, std::move(cond) }, tag, value);
+	}
+
+	void update_value(RowSet &&rows, const std::string &tag, const std::string &value);
+
+	// --------------------------------------------------------------------
 	// generate a new, unique ID. Pass it an ID generating function based on
 	// a sequence number. This function will be called until the result is
 	// unique in the context of this category

--- a/src/Cif++.cpp
+++ b/src/Cif++.cpp
@@ -2648,13 +2648,49 @@ void Row::assign(size_t column, const std::string& value, bool skipUpdateLinked)
 				}
 			}

-			if (cif::VERBOSE > 2)
+			auto rows = childCat->find(std::move(cond));
+			if (rows.empty())
+				continue;
+
+			// if (cif::VERBOSE > 2)
+			// {
+			// 	std::cerr << "Parent: " << linked->mParentCategory << " Child: " << linked->mChildCategory << std::endl
+			// 			  << cond << std::endl;
+			// }
+
+			// Now, suppose there are already rows in child that conform to the new value,
+			// we then skip this renam
+
+			Condition cond_n;
+			
+			for (size_t ix = 0; ix < linked->mParentKeys.size(); ++ix)
 			{
-				std::cerr << "Parent: " << linked->mParentCategory << " Child: " << linked->mChildCategory << std::endl
-						  << cond << std::endl;
+				std::string pk = linked->mParentKeys[ix];
+				std::string ck = linked->mChildKeys[ix];
+
+				// TODO add code to *NOT* test mandatory fields for Empty
+
+				if (pk == iv->mTag)
+					cond_n = std::move(cond_n) && Key(ck) == value;
+				else
+				{
+					const char* value = (*this)[pk].c_str();
+					if (*value == 0)
+						cond_n = std::move(cond_n) && Key(ck) == Empty();
+					else
+						cond_n = std::move(cond_n) && ((Key(ck) == value) or Key(ck) == Empty());
+				}
+			}
+
+			auto rows_n = childCat->find(std::move(cond_n));
+			if (not rows_n.empty())
+			{
+				if (cif::VERBOSE)
+					std::cerr << "Will not rename in child category since there are already rows that link to the parent" << std::endl;
+				
+				continue;
 			}

-			auto rows = childCat->find(std::move(cond));
 			for (auto& cr: rows)
 				cr.assign(childTag, value, false);
 		}

--- a/src/Structure.cpp
+++ b/src/Structure.cpp
@@ -2249,40 +2249,10 @@ void Structure::changeResidue(const Residue& res, const std::string& newCompound
 	std::string entityID;
 	std::tie(entityID) = db["struct_asym"].find1<std::string>("id"_key == asymID, { "entity_id" });

-	// First make sure the compound is already known or insert it.
-	// And if the residue is an entity, we must make sure it exists
-	insertCompound(newCompound, res.isEntity());
+	// // First make sure the compound is already known or insert it.
+	// // And if the residue is an entity, we must make sure it exists
+	// insertCompound(newCompound, res.isEntity());

-	// Next, if it is a non-polymer, update the entityID
-
-	if (db["pdbx_entity_nonpoly"].exists("entity_id"_key == entityID and "comp_id"_key == res.compoundID()))
-	{
-		try
-		{
-			std::tie(entityID) = db["entity"].find1<std::string>("type"_key == "non-polymer" and "pdbx_description"_key == compound->name(), { "id" });
-		}
-		catch (const std::exception& ex)
-		{
-			entityID = db["entity"].getUniqueID([](int i) { return std::to_string(i); });
-			db["entity"].emplace({
-				{ "id", entityID },
-				{ "type", "non-polymer" },
-				{ "src_method", "man" },
-				{ "pdbx_description", compound->name() },
-				{ "formula_weight", compound->formulaWeight() }
-			});
-		}
-
-		if (not db["pdbx_entity_nonpoly"].exists("entity_id"_key == entityID and "comp_id"_key == newCompound))
-		{
-			db["pdbx_entity_nonpoly"].emplace({
-				{ "entity_id", entityID },
-				{ "name", compound->name() },
-				{ "comp_id", newCompound }
-			});
-		}
-	}
-	
 	auto& atomSites = db["atom_site"];
 	auto atoms = res.atoms();

@@ -2315,9 +2285,9 @@ void Structure::changeResidue(const Residue& res, const std::string& newCompound
 		if (r.size() != 1)
 			continue;

-		r.front()["label_comp_id"] = newCompound;
+		r.front().assign("label_comp_id", newCompound, false);
 		if (not entityID.empty())
-			r.front()["label_entity_id"] = entityID;
+			r.front().assign("label_entity_id", entityID, false);
 	}
 }


--- a/test/rename-compound-test.cpp
+++ b/test/rename-compound-test.cpp
+#if __has_include("../src/Config.hpp")
+#include "../src/Config.hpp"
+#endif
+#include "../include/cif++/Cif++.hpp"
+#include "../include/cif++/PDB2Cif.hpp"
+#include "../include/cif++/Structure.hpp"
+
+#include <iostream>
+#include <fstream>
+
+#include <boost/program_options.hpp>
+
+namespace po = boost::program_options;
+
+int main(int argc, char* argv[])
+{
+	cif::VERBOSE = 3;
+	
+	mmcif::CompoundFactory::instance().pushDictionary("RXA.cif");
+
+	mmcif::File f("../examples/1cbs.cif.gz");
+	mmcif::Structure structure(f);
+
+	auto &res = structure.getResidue("B", "REA");
+	structure.changeResidue(res, "RXA", {});
+
+	f.file().save(std::cout);
+	
+	return 0;	
+}
--- a/test/unit-test.cpp
+++ b/test/unit-test.cpp
@@ -1214,6 +1214,291 @@ _test.name
 }

 // --------------------------------------------------------------------
+// rename test
+
+BOOST_AUTO_TEST_CASE(r1)
+{
+	/*
+		Rationale:
+
+		The pdbx_mmcif dictionary contains inconsistent child-parent relations. E.g. atom_site is parent
+		of pdbx_nonpoly_scheme which itself is a parent of pdbx_entity_nonpoly. If I want to rename a residue
+		I cannot update pdbx_nonpoly_scheme since changing a parent changes children, but not vice versa.
+
+		But if I change the comp_id in atom_site, the pdbx_nonpoly_scheme is update, that's good, and then
+		pdbx_entity_nonpoly is updated and that's bad.
+		
+		The idea is now that if we update a parent and a child that must change as well, we first check
+		if there are more parents of this child that will not change. In that case we have to split the
+		child into two, one with the new value and one with the old. We then of course have to split all
+		children of this split row that are direct children.
+	*/
+
+    const char dict[] = R"(
+data_test_dict.dic
+    _datablock.id	test_dict.dic
+    _datablock.description
+;
+    A test dictionary
+;
+    _dictionary.title           test_dict.dic
+    _dictionary.datablock_id    test_dict.dic
+    _dictionary.version         1.0
+
+     loop_
+    _item_type_list.code
+    _item_type_list.primitive_code
+    _item_type_list.construct
+               code      char
+               '[][_,.;:"&<>()/\{}'`~!@#$%A-Za-z0-9*|+-]*'
+
+               text      char
+               '[][ \n\t()_,.;:"&<>/\{}'`~!@#$%?+=*A-Za-z0-9|^-]*'
+
+               int       numb
+               '[+-]?[0-9]+'
+
+save_cat_1
+    _category.description     'A simple test category'
+    _category.id              cat_1
+    _category.mandatory_code  no
+    _category_key.name        '_cat_1.id'
+    save_
+
+save__cat_1.id
+    _item.name                '_cat_1.id'
+    _item.category_id         cat_1
+    _item.mandatory_code      yes
+    _item_linked.child_name   '_cat_2.parent_id'
+    _item_linked.parent_name  '_cat_1.id'
+    _item_type.code           code
+    save_
+
+save__cat_1.name
+    _item.name                '_cat_1.name'
+    _item.category_id         cat_1
+    _item.mandatory_code      yes
+    _item_type.code           code
+    save_
+
+save__cat_1.desc
+    _item.name                '_cat_1.desc'
+    _item.category_id         cat_1
+    _item.mandatory_code      yes
+    _item_type.code           text
+    save_
+
+save_cat_2
+    _category.description     'A second simple test category'
+    _category.id              cat_2
+    _category.mandatory_code  no
+    _category_key.name        '_cat_2.id'
+    save_
+
+save__cat_2.id
+    _item.name                '_cat_2.id'
+    _item.category_id         cat_2
+    _item.mandatory_code      yes
+    _item_type.code           int
+    save_
+
+save__cat_2.name
+    _item.name                '_cat_2.name'
+    _item.category_id         cat_2
+    _item.mandatory_code      yes
+    _item_type.code           code
+    save_
+
+save__cat_2.num
+    _item.name                '_cat_2.num'
+    _item.category_id         cat_2
+    _item.mandatory_code      yes
+    _item_type.code           int
+    save_
+
+save__cat_2.desc
+    _item.name                '_cat_2.desc'
+    _item.category_id         cat_2
+    _item.mandatory_code      yes
+    _item_type.code           text
+    save_
+
+save_cat_3
+    _category.description     'A third simple test category'
+    _category.id              cat_3
+    _category.mandatory_code  no
+    _category_key.name        '_cat_3.id'
+    save_
+
+save__cat_3.id
+    _item.name                '_cat_3.id'
+    _item.category_id         cat_3
+    _item.mandatory_code      yes
+    _item_type.code           int
+    save_
+
+save__cat_3.name
+    _item.name                '_cat_3.name'
+    _item.category_id         cat_3
+    _item.mandatory_code      yes
+    _item_type.code           code
+    save_
+
+save__cat_3.num
+    _item.name                '_cat_3.num'
+    _item.category_id         cat_3
+    _item.mandatory_code      yes
+    _item_type.code           int
+    save_
+
+loop_
+_pdbx_item_linked_group_list.child_category_id
+_pdbx_item_linked_group_list.link_group_id
+_pdbx_item_linked_group_list.child_name
+_pdbx_item_linked_group_list.parent_name
+_pdbx_item_linked_group_list.parent_category_id
+cat_1 1 '_cat_1.name' '_cat_2.name' cat_2
+cat_2 1 '_cat_2.name' '_cat_3.name' cat_3
+cat_2 1 '_cat_2.num'  '_cat_3.num'  cat_3
+
+    )";
+
+    struct membuf : public std::streambuf
+    {
+        membuf(char* text, size_t length)
+        {
+            this->setg(text, text, text + length);
+        }
+    } buffer(const_cast<char*>(dict), sizeof(dict) - 1);
+
+    std::istream is_dict(&buffer);
+
+    cif::File f;
+    f.loadDictionary(is_dict);
+
+    // --------------------------------------------------------------------
+
+    const char data[] = R"(
+data_test
+loop_
+_cat_1.id
+_cat_1.name
+_cat_1.desc
+1 aap  Aap
+2 noot Noot
+3 mies Mies
+
+loop_
+_cat_2.id
+_cat_2.name
+_cat_2.num
+_cat_2.desc
+1 aap  1 'Een dier'
+2 aap  2 'Een andere aap'
+3 noot 1 'walnoot bijvoorbeeld'
+4 n2   1  hazelnoot
+
+loop_
+_cat_3.id
+_cat_3.name
+_cat_3.num
+1 aap 1
+2 aap 2
+    )";   
+
+	using namespace cif::literals;
+
+    struct data_membuf : public std::streambuf
+    {
+        data_membuf(char* text, size_t length)
+        {
+            this->setg(text, text, text + length);
+        }
+    } data_buffer(const_cast<char*>(data), sizeof(data) - 1);
+
+    std::istream is_data(&data_buffer);
+    f.load(is_data);
+
+    auto& cat1 = f.firstDatablock()["cat_1"];
+    auto& cat2 = f.firstDatablock()["cat_2"];
+	auto& cat3 = f.firstDatablock()["cat_3"];
+
+	cat3.update_value("name"_key == "aap" and "num"_key == 1, "name", "aapje");
+
+	BOOST_CHECK(cat3.size() == 2);
+	
+	int id, num;
+	std::string name;
+	cif::tie(id, name, num) = cat3.front().get("id", "name", "num");
+	BOOST_CHECK(id == 1);
+	BOOST_CHECK(num == 1);
+	BOOST_CHECK(name == "aapje");
+
+	cif::tie(id, name, num) = cat3.back().get("id", "name", "num");
+	BOOST_CHECK(id == 2);
+	BOOST_CHECK(num == 2);
+	BOOST_CHECK(name == "aap");
+	
+
+
+    // // check a rename in parent and child
+
+    // for (auto r: cat1.find(cif::Key("id") == 1))
+    // {
+    //     r["id"] = 10;
+    //     break;
+    // }
+
+    // BOOST_CHECK(cat1.size() == 3);
+    // BOOST_CHECK(cat2.size() == 4);
+
+    // BOOST_CHECK(cat1.find(cif::Key("id") == 1).size() == 0);
+    // BOOST_CHECK(cat1.find(cif::Key("id") == 10).size() == 1);
+
+    // BOOST_CHECK(cat2.find(cif::Key("parent_id") == 1).size() == 0);
+    // BOOST_CHECK(cat2.find(cif::Key("parent_id") == 10).size() == 2);
+
+    // // check a rename in parent and child, this time only one child should be renamed
+
+    // for (auto r: cat1.find(cif::Key("id") == 2))
+    // {
+    //     r["id"] = 20;
+    //     break;
+    // }
+
+    // BOOST_CHECK(cat1.size() == 3);
+    // BOOST_CHECK(cat2.size() == 4);
+
+    // BOOST_CHECK(cat1.find(cif::Key("id") == 2).size() == 0);
+    // BOOST_CHECK(cat1.find(cif::Key("id") == 20).size() == 1);
+
+    // BOOST_CHECK(cat2.find(cif::Key("parent_id") == 2).size() == 1);
+    // BOOST_CHECK(cat2.find(cif::Key("parent_id") == 20).size() == 1);
+
+    // BOOST_CHECK(cat2.find(cif::Key("parent_id") == 2 and cif::Key("name2") == "noot").size() == 0);
+    // BOOST_CHECK(cat2.find(cif::Key("parent_id") == 2 and cif::Key("name2") == "n2").size() == 1);
+    // BOOST_CHECK(cat2.find(cif::Key("parent_id") == 20 and cif::Key("name2") == "noot").size() == 1);
+    // BOOST_CHECK(cat2.find(cif::Key("parent_id") == 20 and cif::Key("name2") == "n2").size() == 0);
+
+
+
+    // // // --------------------------------------------------------------------
+    
+    // // cat1.erase(cif::Key("id") == 10);
+
+    // // BOOST_CHECK(cat1.size() == 2);
+    // // BOOST_CHECK(cat2.size() == 2);
+
+    // // cat1.erase(cif::Key("id") == 20);
+
+    // // BOOST_CHECK(cat1.size() == 1);
+    // // BOOST_CHECK(cat2.size() == 1);
+
+
+
+}
+
+// --------------------------------------------------------------------

 BOOST_AUTO_TEST_CASE(bondmap_1)
 {