Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
libcifpp
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
open
libcifpp
Commits
fd08678f
Unverified
Commit
fd08678f
authored
Apr 20, 2021
by
Maarten L. Hekkelman
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
backup
parent
2e2fc11f
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
375 additions
and
40 deletions
+375
-40
include/cif++/Cif++.hpp
+15
-1
src/Cif++.cpp
+40
-4
src/Structure.cpp
+5
-35
test/rename-compound-test.cpp
+30
-0
test/unit-test.cpp
+285
-0
No files found.
include/cif++/Cif++.hpp
View file @
fd08678f
...
...
@@ -725,6 +725,7 @@ class Row
}
void
assign
(
const
std
::
vector
<
Item
>&
values
);
void
assign
(
const
std
::
string
&
name
,
const
std
::
string
&
value
,
bool
updateLinked
);
bool
operator
==
(
const
Row
&
rhs
)
const
{
...
...
@@ -747,7 +748,6 @@ class Row
private
:
void
assign
(
const
std
::
string
&
name
,
const
std
::
string
&
value
,
bool
updateLinked
);
void
assign
(
size_t
column
,
const
std
::
string
&
value
,
bool
updateLinked
);
void
assign
(
const
Item
&
i
,
bool
updateLinked
);
...
...
@@ -1397,6 +1397,7 @@ class iterator_proxy
size_t
size
()
const
{
return
std
::
distance
(
begin
(),
end
());
}
RowType
front
()
{
return
*
begin
();
}
RowType
back
()
{
return
*
(
std
::
prev
(
end
()));
}
Category
&
category
()
const
{
return
*
mCat
;}
...
...
@@ -1882,6 +1883,19 @@ class Category
void
sort
(
std
::
function
<
int
(
const
Row
&
,
const
Row
&
)
>
comparator
);
// --------------------------------------------------------------------
/// Rename a single column in the rows that match \a cond to value \a value
/// making sure the linked categories are updated according to the link.
/// That means, child categories are updated if the links are absolute
/// and unique. If they are not, the child category rows are split.
void
update_value
(
Condition
&&
cond
,
const
std
::
string
&
tag
,
const
std
::
string
&
value
)
{
update_value
(
RowSet
{
*
this
,
std
::
move
(
cond
)
},
tag
,
value
);
}
void
update_value
(
RowSet
&&
rows
,
const
std
::
string
&
tag
,
const
std
::
string
&
value
);
// --------------------------------------------------------------------
// generate a new, unique ID. Pass it an ID generating function based on
// a sequence number. This function will be called until the result is
// unique in the context of this category
...
...
src/Cif++.cpp
View file @
fd08678f
...
...
@@ -2648,13 +2648,49 @@ void Row::assign(size_t column, const std::string& value, bool skipUpdateLinked)
}
}
if
(
cif
::
VERBOSE
>
2
)
auto
rows
=
childCat
->
find
(
std
::
move
(
cond
));
if
(
rows
.
empty
())
continue
;
// if (cif::VERBOSE > 2)
// {
// std::cerr << "Parent: " << linked->mParentCategory << " Child: " << linked->mChildCategory << std::endl
// << cond << std::endl;
// }
// Now, suppose there are already rows in child that conform to the new value,
// we then skip this renam
Condition
cond_n
;
for
(
size_t
ix
=
0
;
ix
<
linked
->
mParentKeys
.
size
();
++
ix
)
{
std
::
cerr
<<
"Parent: "
<<
linked
->
mParentCategory
<<
" Child: "
<<
linked
->
mChildCategory
<<
std
::
endl
<<
cond
<<
std
::
endl
;
std
::
string
pk
=
linked
->
mParentKeys
[
ix
];
std
::
string
ck
=
linked
->
mChildKeys
[
ix
];
// TODO add code to *NOT* test mandatory fields for Empty
if
(
pk
==
iv
->
mTag
)
cond_n
=
std
::
move
(
cond_n
)
&&
Key
(
ck
)
==
value
;
else
{
const
char
*
value
=
(
*
this
)[
pk
].
c_str
();
if
(
*
value
==
0
)
cond_n
=
std
::
move
(
cond_n
)
&&
Key
(
ck
)
==
Empty
();
else
cond_n
=
std
::
move
(
cond_n
)
&&
((
Key
(
ck
)
==
value
)
or
Key
(
ck
)
==
Empty
());
}
}
auto
rows_n
=
childCat
->
find
(
std
::
move
(
cond_n
));
if
(
not
rows_n
.
empty
())
{
if
(
cif
::
VERBOSE
)
std
::
cerr
<<
"Will not rename in child category since there are already rows that link to the parent"
<<
std
::
endl
;
continue
;
}
auto
rows
=
childCat
->
find
(
std
::
move
(
cond
));
for
(
auto
&
cr
:
rows
)
cr
.
assign
(
childTag
,
value
,
false
);
}
...
...
src/Structure.cpp
View file @
fd08678f
...
...
@@ -2249,40 +2249,10 @@ void Structure::changeResidue(const Residue& res, const std::string& newCompound
std
::
string
entityID
;
std
::
tie
(
entityID
)
=
db
[
"struct_asym"
].
find1
<
std
::
string
>
(
"id"
_key
==
asymID
,
{
"entity_id"
});
// First make sure the compound is already known or insert it.
// And if the residue is an entity, we must make sure it exists
insertCompound
(
newCompound
,
res
.
isEntity
());
//
//
First make sure the compound is already known or insert it.
//
//
And if the residue is an entity, we must make sure it exists
//
insertCompound(newCompound, res.isEntity());
// Next, if it is a non-polymer, update the entityID
if
(
db
[
"pdbx_entity_nonpoly"
].
exists
(
"entity_id"
_key
==
entityID
and
"comp_id"
_key
==
res
.
compoundID
()))
{
try
{
std
::
tie
(
entityID
)
=
db
[
"entity"
].
find1
<
std
::
string
>
(
"type"
_key
==
"non-polymer"
and
"pdbx_description"
_key
==
compound
->
name
(),
{
"id"
});
}
catch
(
const
std
::
exception
&
ex
)
{
entityID
=
db
[
"entity"
].
getUniqueID
([](
int
i
)
{
return
std
::
to_string
(
i
);
});
db
[
"entity"
].
emplace
({
{
"id"
,
entityID
},
{
"type"
,
"non-polymer"
},
{
"src_method"
,
"man"
},
{
"pdbx_description"
,
compound
->
name
()
},
{
"formula_weight"
,
compound
->
formulaWeight
()
}
});
}
if
(
not
db
[
"pdbx_entity_nonpoly"
].
exists
(
"entity_id"
_key
==
entityID
and
"comp_id"
_key
==
newCompound
))
{
db
[
"pdbx_entity_nonpoly"
].
emplace
({
{
"entity_id"
,
entityID
},
{
"name"
,
compound
->
name
()
},
{
"comp_id"
,
newCompound
}
});
}
}
auto
&
atomSites
=
db
[
"atom_site"
];
auto
atoms
=
res
.
atoms
();
...
...
@@ -2315,9 +2285,9 @@ void Structure::changeResidue(const Residue& res, const std::string& newCompound
if
(
r
.
size
()
!=
1
)
continue
;
r
.
front
()
[
"label_comp_id"
]
=
newCompound
;
r
.
front
()
.
assign
(
"label_comp_id"
,
newCompound
,
false
)
;
if
(
not
entityID
.
empty
())
r
.
front
()
[
"label_entity_id"
]
=
entityID
;
r
.
front
()
.
assign
(
"label_entity_id"
,
entityID
,
false
)
;
}
}
...
...
test/rename-compound-test.cpp
0 → 100644
View file @
fd08678f
#if __has_include("../src/Config.hpp")
#include "../src/Config.hpp"
#endif
#include "../include/cif++/Cif++.hpp"
#include "../include/cif++/PDB2Cif.hpp"
#include "../include/cif++/Structure.hpp"
#include <iostream>
#include <fstream>
#include <boost/program_options.hpp>
namespace
po
=
boost
::
program_options
;
int
main
(
int
argc
,
char
*
argv
[])
{
cif
::
VERBOSE
=
3
;
mmcif
::
CompoundFactory
::
instance
().
pushDictionary
(
"RXA.cif"
);
mmcif
::
File
f
(
"../examples/1cbs.cif.gz"
);
mmcif
::
Structure
structure
(
f
);
auto
&
res
=
structure
.
getResidue
(
"B"
,
"REA"
);
structure
.
changeResidue
(
res
,
"RXA"
,
{});
f
.
file
().
save
(
std
::
cout
);
return
0
;
}
test/unit-test.cpp
View file @
fd08678f
...
...
@@ -1214,6 +1214,291 @@ _test.name
}
// --------------------------------------------------------------------
// rename test
BOOST_AUTO_TEST_CASE
(
r1
)
{
/*
Rationale:
The pdbx_mmcif dictionary contains inconsistent child-parent relations. E.g. atom_site is parent
of pdbx_nonpoly_scheme which itself is a parent of pdbx_entity_nonpoly. If I want to rename a residue
I cannot update pdbx_nonpoly_scheme since changing a parent changes children, but not vice versa.
But if I change the comp_id in atom_site, the pdbx_nonpoly_scheme is update, that's good, and then
pdbx_entity_nonpoly is updated and that's bad.
The idea is now that if we update a parent and a child that must change as well, we first check
if there are more parents of this child that will not change. In that case we have to split the
child into two, one with the new value and one with the old. We then of course have to split all
children of this split row that are direct children.
*/
const
char
dict
[]
=
R"(
data_test_dict.dic
_datablock.id test_dict.dic
_datablock.description
;
A test dictionary
;
_dictionary.title test_dict.dic
_dictionary.datablock_id test_dict.dic
_dictionary.version 1.0
loop_
_item_type_list.code
_item_type_list.primitive_code
_item_type_list.construct
code char
'[][_,.;:"&<>()/\{}'`~!@#$%A-Za-z0-9*|+-]*'
text char
'[][ \n\t()_,.;:"&<>/\{}'`~!@#$%?+=*A-Za-z0-9|^-]*'
int numb
'[+-]?[0-9]+'
save_cat_1
_category.description 'A simple test category'
_category.id cat_1
_category.mandatory_code no
_category_key.name '_cat_1.id'
save_
save__cat_1.id
_item.name '_cat_1.id'
_item.category_id cat_1
_item.mandatory_code yes
_item_linked.child_name '_cat_2.parent_id'
_item_linked.parent_name '_cat_1.id'
_item_type.code code
save_
save__cat_1.name
_item.name '_cat_1.name'
_item.category_id cat_1
_item.mandatory_code yes
_item_type.code code
save_
save__cat_1.desc
_item.name '_cat_1.desc'
_item.category_id cat_1
_item.mandatory_code yes
_item_type.code text
save_
save_cat_2
_category.description 'A second simple test category'
_category.id cat_2
_category.mandatory_code no
_category_key.name '_cat_2.id'
save_
save__cat_2.id
_item.name '_cat_2.id'
_item.category_id cat_2
_item.mandatory_code yes
_item_type.code int
save_
save__cat_2.name
_item.name '_cat_2.name'
_item.category_id cat_2
_item.mandatory_code yes
_item_type.code code
save_
save__cat_2.num
_item.name '_cat_2.num'
_item.category_id cat_2
_item.mandatory_code yes
_item_type.code int
save_
save__cat_2.desc
_item.name '_cat_2.desc'
_item.category_id cat_2
_item.mandatory_code yes
_item_type.code text
save_
save_cat_3
_category.description 'A third simple test category'
_category.id cat_3
_category.mandatory_code no
_category_key.name '_cat_3.id'
save_
save__cat_3.id
_item.name '_cat_3.id'
_item.category_id cat_3
_item.mandatory_code yes
_item_type.code int
save_
save__cat_3.name
_item.name '_cat_3.name'
_item.category_id cat_3
_item.mandatory_code yes
_item_type.code code
save_
save__cat_3.num
_item.name '_cat_3.num'
_item.category_id cat_3
_item.mandatory_code yes
_item_type.code int
save_
loop_
_pdbx_item_linked_group_list.child_category_id
_pdbx_item_linked_group_list.link_group_id
_pdbx_item_linked_group_list.child_name
_pdbx_item_linked_group_list.parent_name
_pdbx_item_linked_group_list.parent_category_id
cat_1 1 '_cat_1.name' '_cat_2.name' cat_2
cat_2 1 '_cat_2.name' '_cat_3.name' cat_3
cat_2 1 '_cat_2.num' '_cat_3.num' cat_3
)"
;
struct
membuf
:
public
std
::
streambuf
{
membuf
(
char
*
text
,
size_t
length
)
{
this
->
setg
(
text
,
text
,
text
+
length
);
}
}
buffer
(
const_cast
<
char
*>
(
dict
),
sizeof
(
dict
)
-
1
);
std
::
istream
is_dict
(
&
buffer
);
cif
::
File
f
;
f
.
loadDictionary
(
is_dict
);
// --------------------------------------------------------------------
const
char
data
[]
=
R"(
data_test
loop_
_cat_1.id
_cat_1.name
_cat_1.desc
1 aap Aap
2 noot Noot
3 mies Mies
loop_
_cat_2.id
_cat_2.name
_cat_2.num
_cat_2.desc
1 aap 1 'Een dier'
2 aap 2 'Een andere aap'
3 noot 1 'walnoot bijvoorbeeld'
4 n2 1 hazelnoot
loop_
_cat_3.id
_cat_3.name
_cat_3.num
1 aap 1
2 aap 2
)"
;
using
namespace
cif
::
literals
;
struct
data_membuf
:
public
std
::
streambuf
{
data_membuf
(
char
*
text
,
size_t
length
)
{
this
->
setg
(
text
,
text
,
text
+
length
);
}
}
data_buffer
(
const_cast
<
char
*>
(
data
),
sizeof
(
data
)
-
1
);
std
::
istream
is_data
(
&
data_buffer
);
f
.
load
(
is_data
);
auto
&
cat1
=
f
.
firstDatablock
()[
"cat_1"
];
auto
&
cat2
=
f
.
firstDatablock
()[
"cat_2"
];
auto
&
cat3
=
f
.
firstDatablock
()[
"cat_3"
];
cat3
.
update_value
(
"name"
_key
==
"aap"
and
"num"
_key
==
1
,
"name"
,
"aapje"
);
BOOST_CHECK
(
cat3
.
size
()
==
2
);
int
id
,
num
;
std
::
string
name
;
cif
::
tie
(
id
,
name
,
num
)
=
cat3
.
front
().
get
(
"id"
,
"name"
,
"num"
);
BOOST_CHECK
(
id
==
1
);
BOOST_CHECK
(
num
==
1
);
BOOST_CHECK
(
name
==
"aapje"
);
cif
::
tie
(
id
,
name
,
num
)
=
cat3
.
back
().
get
(
"id"
,
"name"
,
"num"
);
BOOST_CHECK
(
id
==
2
);
BOOST_CHECK
(
num
==
2
);
BOOST_CHECK
(
name
==
"aap"
);
// // check a rename in parent and child
// for (auto r: cat1.find(cif::Key("id") == 1))
// {
// r["id"] = 10;
// break;
// }
// BOOST_CHECK(cat1.size() == 3);
// BOOST_CHECK(cat2.size() == 4);
// BOOST_CHECK(cat1.find(cif::Key("id") == 1).size() == 0);
// BOOST_CHECK(cat1.find(cif::Key("id") == 10).size() == 1);
// BOOST_CHECK(cat2.find(cif::Key("parent_id") == 1).size() == 0);
// BOOST_CHECK(cat2.find(cif::Key("parent_id") == 10).size() == 2);
// // check a rename in parent and child, this time only one child should be renamed
// for (auto r: cat1.find(cif::Key("id") == 2))
// {
// r["id"] = 20;
// break;
// }
// BOOST_CHECK(cat1.size() == 3);
// BOOST_CHECK(cat2.size() == 4);
// BOOST_CHECK(cat1.find(cif::Key("id") == 2).size() == 0);
// BOOST_CHECK(cat1.find(cif::Key("id") == 20).size() == 1);
// BOOST_CHECK(cat2.find(cif::Key("parent_id") == 2).size() == 1);
// BOOST_CHECK(cat2.find(cif::Key("parent_id") == 20).size() == 1);
// BOOST_CHECK(cat2.find(cif::Key("parent_id") == 2 and cif::Key("name2") == "noot").size() == 0);
// BOOST_CHECK(cat2.find(cif::Key("parent_id") == 2 and cif::Key("name2") == "n2").size() == 1);
// BOOST_CHECK(cat2.find(cif::Key("parent_id") == 20 and cif::Key("name2") == "noot").size() == 1);
// BOOST_CHECK(cat2.find(cif::Key("parent_id") == 20 and cif::Key("name2") == "n2").size() == 0);
// // // --------------------------------------------------------------------
// // cat1.erase(cif::Key("id") == 10);
// // BOOST_CHECK(cat1.size() == 2);
// // BOOST_CHECK(cat2.size() == 2);
// // cat1.erase(cif::Key("id") == 20);
// // BOOST_CHECK(cat1.size() == 1);
// // BOOST_CHECK(cat2.size() == 1);
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE
(
bondmap_1
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment