Commit fd08678f by Maarten L. Hekkelman

backup

parent 2e2fc11f
...@@ -725,6 +725,7 @@ class Row ...@@ -725,6 +725,7 @@ class Row
} }
void assign(const std::vector<Item>& values); void assign(const std::vector<Item>& values);
void assign(const std::string& name, const std::string& value, bool updateLinked);
bool operator==(const Row& rhs) const bool operator==(const Row& rhs) const
{ {
...@@ -747,7 +748,6 @@ class Row ...@@ -747,7 +748,6 @@ class Row
private: private:
void assign(const std::string& name, const std::string& value, bool updateLinked);
void assign(size_t column, const std::string& value, bool updateLinked); void assign(size_t column, const std::string& value, bool updateLinked);
void assign(const Item& i, bool updateLinked); void assign(const Item& i, bool updateLinked);
...@@ -1397,6 +1397,7 @@ class iterator_proxy ...@@ -1397,6 +1397,7 @@ class iterator_proxy
size_t size() const { return std::distance(begin(), end()); } size_t size() const { return std::distance(begin(), end()); }
RowType front() { return *begin(); } RowType front() { return *begin(); }
RowType back() { return *(std::prev(end())); }
Category& category() const { return *mCat;} Category& category() const { return *mCat;}
...@@ -1882,6 +1883,19 @@ class Category ...@@ -1882,6 +1883,19 @@ class Category
void sort(std::function<int(const Row&, const Row&)> comparator); void sort(std::function<int(const Row&, const Row&)> comparator);
// -------------------------------------------------------------------- // --------------------------------------------------------------------
/// Rename a single column in the rows that match \a cond to value \a value
/// making sure the linked categories are updated according to the link.
/// That means, child categories are updated if the links are absolute
/// and unique. If they are not, the child category rows are split.
void update_value(Condition &&cond, const std::string &tag, const std::string &value)
{
update_value(RowSet{ *this, std::move(cond) }, tag, value);
}
void update_value(RowSet &&rows, const std::string &tag, const std::string &value);
// --------------------------------------------------------------------
// generate a new, unique ID. Pass it an ID generating function based on // generate a new, unique ID. Pass it an ID generating function based on
// a sequence number. This function will be called until the result is // a sequence number. This function will be called until the result is
// unique in the context of this category // unique in the context of this category
......
...@@ -2648,13 +2648,49 @@ void Row::assign(size_t column, const std::string& value, bool skipUpdateLinked) ...@@ -2648,13 +2648,49 @@ void Row::assign(size_t column, const std::string& value, bool skipUpdateLinked)
} }
} }
if (cif::VERBOSE > 2) auto rows = childCat->find(std::move(cond));
if (rows.empty())
continue;
// if (cif::VERBOSE > 2)
// {
// std::cerr << "Parent: " << linked->mParentCategory << " Child: " << linked->mChildCategory << std::endl
// << cond << std::endl;
// }
// Now, suppose there are already rows in child that conform to the new value,
// we then skip this renam
Condition cond_n;
for (size_t ix = 0; ix < linked->mParentKeys.size(); ++ix)
{ {
std::cerr << "Parent: " << linked->mParentCategory << " Child: " << linked->mChildCategory << std::endl std::string pk = linked->mParentKeys[ix];
<< cond << std::endl; std::string ck = linked->mChildKeys[ix];
// TODO add code to *NOT* test mandatory fields for Empty
if (pk == iv->mTag)
cond_n = std::move(cond_n) && Key(ck) == value;
else
{
const char* value = (*this)[pk].c_str();
if (*value == 0)
cond_n = std::move(cond_n) && Key(ck) == Empty();
else
cond_n = std::move(cond_n) && ((Key(ck) == value) or Key(ck) == Empty());
}
}
auto rows_n = childCat->find(std::move(cond_n));
if (not rows_n.empty())
{
if (cif::VERBOSE)
std::cerr << "Will not rename in child category since there are already rows that link to the parent" << std::endl;
continue;
} }
auto rows = childCat->find(std::move(cond));
for (auto& cr: rows) for (auto& cr: rows)
cr.assign(childTag, value, false); cr.assign(childTag, value, false);
} }
......
...@@ -2249,40 +2249,10 @@ void Structure::changeResidue(const Residue& res, const std::string& newCompound ...@@ -2249,40 +2249,10 @@ void Structure::changeResidue(const Residue& res, const std::string& newCompound
std::string entityID; std::string entityID;
std::tie(entityID) = db["struct_asym"].find1<std::string>("id"_key == asymID, { "entity_id" }); std::tie(entityID) = db["struct_asym"].find1<std::string>("id"_key == asymID, { "entity_id" });
// First make sure the compound is already known or insert it. // // First make sure the compound is already known or insert it.
// And if the residue is an entity, we must make sure it exists // // And if the residue is an entity, we must make sure it exists
insertCompound(newCompound, res.isEntity()); // insertCompound(newCompound, res.isEntity());
// Next, if it is a non-polymer, update the entityID
if (db["pdbx_entity_nonpoly"].exists("entity_id"_key == entityID and "comp_id"_key == res.compoundID()))
{
try
{
std::tie(entityID) = db["entity"].find1<std::string>("type"_key == "non-polymer" and "pdbx_description"_key == compound->name(), { "id" });
}
catch (const std::exception& ex)
{
entityID = db["entity"].getUniqueID([](int i) { return std::to_string(i); });
db["entity"].emplace({
{ "id", entityID },
{ "type", "non-polymer" },
{ "src_method", "man" },
{ "pdbx_description", compound->name() },
{ "formula_weight", compound->formulaWeight() }
});
}
if (not db["pdbx_entity_nonpoly"].exists("entity_id"_key == entityID and "comp_id"_key == newCompound))
{
db["pdbx_entity_nonpoly"].emplace({
{ "entity_id", entityID },
{ "name", compound->name() },
{ "comp_id", newCompound }
});
}
}
auto& atomSites = db["atom_site"]; auto& atomSites = db["atom_site"];
auto atoms = res.atoms(); auto atoms = res.atoms();
...@@ -2315,9 +2285,9 @@ void Structure::changeResidue(const Residue& res, const std::string& newCompound ...@@ -2315,9 +2285,9 @@ void Structure::changeResidue(const Residue& res, const std::string& newCompound
if (r.size() != 1) if (r.size() != 1)
continue; continue;
r.front()["label_comp_id"] = newCompound; r.front().assign("label_comp_id", newCompound, false);
if (not entityID.empty()) if (not entityID.empty())
r.front()["label_entity_id"] = entityID; r.front().assign("label_entity_id", entityID, false);
} }
} }
......
#if __has_include("../src/Config.hpp")
#include "../src/Config.hpp"
#endif
#include "../include/cif++/Cif++.hpp"
#include "../include/cif++/PDB2Cif.hpp"
#include "../include/cif++/Structure.hpp"
#include <iostream>
#include <fstream>
#include <boost/program_options.hpp>
namespace po = boost::program_options;
int main(int argc, char* argv[])
{
cif::VERBOSE = 3;
mmcif::CompoundFactory::instance().pushDictionary("RXA.cif");
mmcif::File f("../examples/1cbs.cif.gz");
mmcif::Structure structure(f);
auto &res = structure.getResidue("B", "REA");
structure.changeResidue(res, "RXA", {});
f.file().save(std::cout);
return 0;
}
...@@ -1214,6 +1214,291 @@ _test.name ...@@ -1214,6 +1214,291 @@ _test.name
} }
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// rename test
BOOST_AUTO_TEST_CASE(r1)
{
/*
Rationale:
The pdbx_mmcif dictionary contains inconsistent child-parent relations. E.g. atom_site is parent
of pdbx_nonpoly_scheme which itself is a parent of pdbx_entity_nonpoly. If I want to rename a residue
I cannot update pdbx_nonpoly_scheme since changing a parent changes children, but not vice versa.
But if I change the comp_id in atom_site, the pdbx_nonpoly_scheme is update, that's good, and then
pdbx_entity_nonpoly is updated and that's bad.
The idea is now that if we update a parent and a child that must change as well, we first check
if there are more parents of this child that will not change. In that case we have to split the
child into two, one with the new value and one with the old. We then of course have to split all
children of this split row that are direct children.
*/
const char dict[] = R"(
data_test_dict.dic
_datablock.id test_dict.dic
_datablock.description
;
A test dictionary
;
_dictionary.title test_dict.dic
_dictionary.datablock_id test_dict.dic
_dictionary.version 1.0
loop_
_item_type_list.code
_item_type_list.primitive_code
_item_type_list.construct
code char
'[][_,.;:"&<>()/\{}'`~!@#$%A-Za-z0-9*|+-]*'
text char
'[][ \n\t()_,.;:"&<>/\{}'`~!@#$%?+=*A-Za-z0-9|^-]*'
int numb
'[+-]?[0-9]+'
save_cat_1
_category.description 'A simple test category'
_category.id cat_1
_category.mandatory_code no
_category_key.name '_cat_1.id'
save_
save__cat_1.id
_item.name '_cat_1.id'
_item.category_id cat_1
_item.mandatory_code yes
_item_linked.child_name '_cat_2.parent_id'
_item_linked.parent_name '_cat_1.id'
_item_type.code code
save_
save__cat_1.name
_item.name '_cat_1.name'
_item.category_id cat_1
_item.mandatory_code yes
_item_type.code code
save_
save__cat_1.desc
_item.name '_cat_1.desc'
_item.category_id cat_1
_item.mandatory_code yes
_item_type.code text
save_
save_cat_2
_category.description 'A second simple test category'
_category.id cat_2
_category.mandatory_code no
_category_key.name '_cat_2.id'
save_
save__cat_2.id
_item.name '_cat_2.id'
_item.category_id cat_2
_item.mandatory_code yes
_item_type.code int
save_
save__cat_2.name
_item.name '_cat_2.name'
_item.category_id cat_2
_item.mandatory_code yes
_item_type.code code
save_
save__cat_2.num
_item.name '_cat_2.num'
_item.category_id cat_2
_item.mandatory_code yes
_item_type.code int
save_
save__cat_2.desc
_item.name '_cat_2.desc'
_item.category_id cat_2
_item.mandatory_code yes
_item_type.code text
save_
save_cat_3
_category.description 'A third simple test category'
_category.id cat_3
_category.mandatory_code no
_category_key.name '_cat_3.id'
save_
save__cat_3.id
_item.name '_cat_3.id'
_item.category_id cat_3
_item.mandatory_code yes
_item_type.code int
save_
save__cat_3.name
_item.name '_cat_3.name'
_item.category_id cat_3
_item.mandatory_code yes
_item_type.code code
save_
save__cat_3.num
_item.name '_cat_3.num'
_item.category_id cat_3
_item.mandatory_code yes
_item_type.code int
save_
loop_
_pdbx_item_linked_group_list.child_category_id
_pdbx_item_linked_group_list.link_group_id
_pdbx_item_linked_group_list.child_name
_pdbx_item_linked_group_list.parent_name
_pdbx_item_linked_group_list.parent_category_id
cat_1 1 '_cat_1.name' '_cat_2.name' cat_2
cat_2 1 '_cat_2.name' '_cat_3.name' cat_3
cat_2 1 '_cat_2.num' '_cat_3.num' cat_3
)";
struct membuf : public std::streambuf
{
membuf(char* text, size_t length)
{
this->setg(text, text, text + length);
}
} buffer(const_cast<char*>(dict), sizeof(dict) - 1);
std::istream is_dict(&buffer);
cif::File f;
f.loadDictionary(is_dict);
// --------------------------------------------------------------------
const char data[] = R"(
data_test
loop_
_cat_1.id
_cat_1.name
_cat_1.desc
1 aap Aap
2 noot Noot
3 mies Mies
loop_
_cat_2.id
_cat_2.name
_cat_2.num
_cat_2.desc
1 aap 1 'Een dier'
2 aap 2 'Een andere aap'
3 noot 1 'walnoot bijvoorbeeld'
4 n2 1 hazelnoot
loop_
_cat_3.id
_cat_3.name
_cat_3.num
1 aap 1
2 aap 2
)";
using namespace cif::literals;
struct data_membuf : public std::streambuf
{
data_membuf(char* text, size_t length)
{
this->setg(text, text, text + length);
}
} data_buffer(const_cast<char*>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer);
f.load(is_data);
auto& cat1 = f.firstDatablock()["cat_1"];
auto& cat2 = f.firstDatablock()["cat_2"];
auto& cat3 = f.firstDatablock()["cat_3"];
cat3.update_value("name"_key == "aap" and "num"_key == 1, "name", "aapje");
BOOST_CHECK(cat3.size() == 2);
int id, num;
std::string name;
cif::tie(id, name, num) = cat3.front().get("id", "name", "num");
BOOST_CHECK(id == 1);
BOOST_CHECK(num == 1);
BOOST_CHECK(name == "aapje");
cif::tie(id, name, num) = cat3.back().get("id", "name", "num");
BOOST_CHECK(id == 2);
BOOST_CHECK(num == 2);
BOOST_CHECK(name == "aap");
// // check a rename in parent and child
// for (auto r: cat1.find(cif::Key("id") == 1))
// {
// r["id"] = 10;
// break;
// }
// BOOST_CHECK(cat1.size() == 3);
// BOOST_CHECK(cat2.size() == 4);
// BOOST_CHECK(cat1.find(cif::Key("id") == 1).size() == 0);
// BOOST_CHECK(cat1.find(cif::Key("id") == 10).size() == 1);
// BOOST_CHECK(cat2.find(cif::Key("parent_id") == 1).size() == 0);
// BOOST_CHECK(cat2.find(cif::Key("parent_id") == 10).size() == 2);
// // check a rename in parent and child, this time only one child should be renamed
// for (auto r: cat1.find(cif::Key("id") == 2))
// {
// r["id"] = 20;
// break;
// }
// BOOST_CHECK(cat1.size() == 3);
// BOOST_CHECK(cat2.size() == 4);
// BOOST_CHECK(cat1.find(cif::Key("id") == 2).size() == 0);
// BOOST_CHECK(cat1.find(cif::Key("id") == 20).size() == 1);
// BOOST_CHECK(cat2.find(cif::Key("parent_id") == 2).size() == 1);
// BOOST_CHECK(cat2.find(cif::Key("parent_id") == 20).size() == 1);
// BOOST_CHECK(cat2.find(cif::Key("parent_id") == 2 and cif::Key("name2") == "noot").size() == 0);
// BOOST_CHECK(cat2.find(cif::Key("parent_id") == 2 and cif::Key("name2") == "n2").size() == 1);
// BOOST_CHECK(cat2.find(cif::Key("parent_id") == 20 and cif::Key("name2") == "noot").size() == 1);
// BOOST_CHECK(cat2.find(cif::Key("parent_id") == 20 and cif::Key("name2") == "n2").size() == 0);
// // // --------------------------------------------------------------------
// // cat1.erase(cif::Key("id") == 10);
// // BOOST_CHECK(cat1.size() == 2);
// // BOOST_CHECK(cat2.size() == 2);
// // cat1.erase(cif::Key("id") == 20);
// // BOOST_CHECK(cat1.size() == 1);
// // BOOST_CHECK(cat2.size() == 1);
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(bondmap_1) BOOST_AUTO_TEST_CASE(bondmap_1)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment