Commit 82086a93 by Maarten L. Hekkelman

PDBx validation and reconstruction code, take 1

parent abd97cc1
...@@ -300,6 +300,7 @@ set(project_sources ...@@ -300,6 +300,7 @@ set(project_sources
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.hpp ${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.hpp
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.cpp ${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.cpp
${PROJECT_SOURCE_DIR}/src/pdb/reconstruct.cpp ${PROJECT_SOURCE_DIR}/src/pdb/reconstruct.cpp
${PROJECT_SOURCE_DIR}/src/pdb/validate-pdbx.cpp
) )
set(project_headers set(project_headers
...@@ -562,7 +563,8 @@ if(BUILD_TESTING) ...@@ -562,7 +563,8 @@ if(BUILD_TESTING)
model model
rename-compound rename-compound
sugar sugar
spinner) spinner
validate-pdbx)
foreach(CIFPP_TEST IN LISTS CIFPP_tests) foreach(CIFPP_TEST IN LISTS CIFPP_tests)
set(CIFPP_TEST "${CIFPP_TEST}-test") set(CIFPP_TEST "${CIFPP_TEST}-test")
......
/*- /*-
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
* *
* Copyright (c) 2023 NKI/AVL, Netherlands Cancer Institute * Copyright (c) 2023 NKI/AVL, Netherlands Cancer Institute
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
* *
* 1. Redistributions of source code must retain the above copyright notice, this * 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer * list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice, * 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation * this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution. * and/or other materials provided with the distribution.
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
...@@ -30,13 +30,13 @@ ...@@ -30,13 +30,13 @@
/** /**
* @file pdb.hpp * @file pdb.hpp
* *
* This file presents the API to read and write files in the * This file presents the API to read and write files in the
* legacy and ancient PDB format. * legacy and ancient PDB format.
* *
* The code works on the basis of best effort since it is * The code works on the basis of best effort since it is
* impossible to have correct round trip fidelity. * impossible to have correct round trip fidelity.
* *
*/ */
namespace cif::pdb namespace cif::pdb
...@@ -81,7 +81,7 @@ inline void write(std::ostream &os, const file &f) ...@@ -81,7 +81,7 @@ inline void write(std::ostream &os, const file &f)
/** @brief Write out the data in @a db to file @a file /** @brief Write out the data in @a db to file @a file
* in legacy PDB format or mmCIF format, depending on the * in legacy PDB format or mmCIF format, depending on the
* filename extension. * filename extension.
* *
* If extension of @a file is *.gz* the resulting file will * If extension of @a file is *.gz* the resulting file will
* be written in gzip compressed format. * be written in gzip compressed format.
*/ */
...@@ -90,7 +90,7 @@ void write(const std::filesystem::path &file, const datablock &db); ...@@ -90,7 +90,7 @@ void write(const std::filesystem::path &file, const datablock &db);
/** @brief Write out the data in @a f to file @a file /** @brief Write out the data in @a f to file @a file
* in legacy PDB format or mmCIF format, depending on the * in legacy PDB format or mmCIF format, depending on the
* filename extension. * filename extension.
* *
* If extension of @a file is *.gz* the resulting file will * If extension of @a file is *.gz* the resulting file will
* be written in gzip compressed format. * be written in gzip compressed format.
*/ */
...@@ -102,13 +102,30 @@ inline void write(const std::filesystem::path &p, const file &f) ...@@ -102,13 +102,30 @@ inline void write(const std::filesystem::path &p, const file &f)
// -------------------------------------------------------------------- // --------------------------------------------------------------------
/** \brief Reconstruct all missing categories for an assumed PDBx file. /** \brief Reconstruct all missing categories for an assumed PDBx file.
* *
* Some people believe that simply dumping some atom records is enough. * Some people believe that simply dumping some atom records is enough.
* *
* \param db The cif::datablock that hopefully contains some valid data * \param file The cif::file that hopefully contains some valid data
* \param dictionary The mmcif dictionary to use
*/ */
void reconstruct_pdbx(datablock &db); void reconstruct_pdbx(file &pdbx_file, std::string_view dictionary = "mmcif_pdbx");
/** \brief This is an extension to cif::validator, use the logic in common
* PDBx files to see if the file is internally consistent.
*
* This function for now checks if the following categories are consistent:
*
* atom_site -> pdbx_poly_seq_scheme -> entity_poly_seq -> entity_poly -> entity
*
* Use the common \ref cif::VERBOSE flag to turn on diagnostic messages.
*
* \param file The input file
* \param dictionary The mmcif dictionary to use
* \result Returns true if the file was valid and consistent
*/
bool is_valid_pdbx_file(const file &pdbx_file, std::string_view dictionary = "mmcif_pdbx");
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// Other I/O related routines // Other I/O related routines
...@@ -117,7 +134,7 @@ void reconstruct_pdbx(datablock &db); ...@@ -117,7 +134,7 @@ void reconstruct_pdbx(datablock &db);
* *
* The line returned should be compatible with the legacy PDB * The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program. * format and is e.g. used in the DSSP program.
* *
* @param data The datablock to use as source for the requested data * @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned * @param truncate_at The maximum length of the line returned
*/ */
...@@ -127,7 +144,7 @@ std::string get_HEADER_line(const datablock &data, std::string::size_type trunca ...@@ -127,7 +144,7 @@ std::string get_HEADER_line(const datablock &data, std::string::size_type trunca
* *
* The line returned should be compatible with the legacy PDB * The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program. * format and is e.g. used in the DSSP program.
* *
* @param data The datablock to use as source for the requested data * @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned * @param truncate_at The maximum length of the line returned
*/ */
...@@ -137,7 +154,7 @@ std::string get_COMPND_line(const datablock &data, std::string::size_type trunca ...@@ -137,7 +154,7 @@ std::string get_COMPND_line(const datablock &data, std::string::size_type trunca
* *
* The line returned should be compatible with the legacy PDB * The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program. * format and is e.g. used in the DSSP program.
* *
* @param data The datablock to use as source for the requested data * @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned * @param truncate_at The maximum length of the line returned
*/ */
...@@ -147,12 +164,11 @@ std::string get_SOURCE_line(const datablock &data, std::string::size_type trunca ...@@ -147,12 +164,11 @@ std::string get_SOURCE_line(const datablock &data, std::string::size_type trunca
* *
* The line returned should be compatible with the legacy PDB * The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program. * format and is e.g. used in the DSSP program.
* *
* @param data The datablock to use as source for the requested data * @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned * @param truncate_at The maximum length of the line returned
*/ */
std::string get_AUTHOR_line(const datablock &data, std::string::size_type truncate_at = 127); std::string get_AUTHOR_line(const datablock &data, std::string::size_type truncate_at = 127);
} // namespace pdbx } // namespace cif::pdb
...@@ -6478,6 +6478,10 @@ file read(std::istream &is) ...@@ -6478,6 +6478,10 @@ file read(std::istream &is)
{ {
std::throw_with_nested(std::runtime_error("Since the file did not start with a valid PDB HEADER line mmCIF was assumed, but that failed.")); std::throw_with_nested(std::runtime_error("Since the file did not start with a valid PDB HEADER line mmCIF was assumed, but that failed."));
} }
// Since we're using the cif::pdb way of reading the file, the data may need
// reconstruction
reconstruct_pdbx(result);
} }
} }
......
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cif++.hpp"
namespace cif::pdb
{
condition get_parents_condition(const validator &validator, row_handle rh, const category &parentCat)
{
condition result;
auto &childCat = rh.get_category();
auto childName = childCat.name();
auto parentName = parentCat.name();
auto links = validator.get_links_for_child(childName);
links.erase(remove_if(links.begin(), links.end(), [n = parentName](auto &l)
{ return l->m_parent_category != n; }),
links.end());
if (not links.empty())
{
for (auto &link : links)
{
condition cond;
for (size_t ix = 0; ix < link->m_child_keys.size(); ++ix)
{
auto childValue = rh[link->m_child_keys[ix]];
if (childValue.empty())
continue;
cond = std::move(cond) and key(link->m_parent_keys[ix]) == childValue.text();
}
result = std::move(result) or std::move(cond);
}
}
else if (cif::VERBOSE > 0)
std::cerr << "warning: no child to parent links were found for child " << childName << " and parent " << parentName << '\n';
return result;
}
bool is_valid_pdbx_file(const file &file, std::string_view dictionary)
{
using namespace cif::literals;
auto &cf = cif::compound_factory::instance();
auto &validator = cif::validator_factory::instance().operator[](dictionary);
bool result = true;
try
{
if (file.empty())
throw validation_error("Empty file");
auto &db = file.front();
if (db.empty())
throw validation_error("Empty datablock");
auto &atom_site = db["atom_site"];
if (atom_site.empty())
throw validation_error("Empty or missing atom_site category");
auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"];
std::string last_asym_id;
int last_seq_id = -1;
for (auto r : atom_site)
{
auto seq_id = r.get<std::optional<int>>("label_seq_id");
if (not seq_id.has_value()) // not a residue in a polymer
continue;
if (*seq_id == last_seq_id)
continue;
last_seq_id = *seq_id;
auto comp_id = r.get<std::string>("label_comp_id");
if (not cf.is_known_peptide(comp_id))
continue;
auto p = pdbx_poly_seq_scheme.find(get_parents_condition(validator, r, pdbx_poly_seq_scheme));
if (p.size() != 1)
throw validation_error("For each residue in atom_site that is a residue in a polymer there should be exactly one pdbx_poly_seq_scheme record");
}
auto &entity = db["entity"];
if (entity.empty())
throw validation_error("Entity category is missing or empty");
auto &entity_poly = db["entity_poly"];
if (entity_poly.empty())
throw validation_error("Entity_poly category is missing or empty");
auto &entity_poly_seq = db["entity_poly_seq"];
if (entity_poly_seq.empty())
throw validation_error("Entity_poly_seq category is missing or empty");
auto &struct_asym = db["struct_asym"];
if (struct_asym.empty())
throw validation_error("struct_asym category is missing or empty");
for (auto entity_id : entity.find<std::string>("type"_key == "polymer", "id"))
{
if (entity_poly.count("entity_id"_key == entity_id) != 1)
throw validation_error("There should be exactly one entity_poly record per polymer entity");
// const auto entity_poly_type = entity_poly.find1<std::string>("entity_id"_key == entity_id, "type");
std::map<int,std::set<std::string>> mon_per_seq_id;
for (const auto &[num, mon_id, hetero] : entity_poly_seq.find<int, std::string, bool>("entity_id"_key == entity_id, "num", "mon_id", "hetero"))
{
mon_per_seq_id[num].emplace(mon_id);
for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
{
if (pdbx_poly_seq_scheme.count(
"asym_id"_key == asym_id and
"mon_id"_key == mon_id and
"seq_id"_key == num and
"hetero"_key == hetero) != 1)
{
throw validation_error("For each entity_poly_seq record there should be exactly one pdbx_poly_seq record");
}
}
}
for (const auto &[seq_id, mon_id, hetero] : pdbx_poly_seq_scheme.find<int, std::string, bool>("entity_id"_key == entity_id, "seq_id", "mon_id", "hetero"))
{
if (entity_poly_seq.count(
"mon_id"_key == mon_id and
"num"_key == seq_id and
"hetero"_key == hetero) != 1)
{
throw validation_error("For each pdbx_poly_seq/struct_asym record there should be exactly one entity_poly_seq record");
}
if ((mon_per_seq_id[seq_id].size() > 1) != hetero)
throw validation_error("Mismatch between the hetero flag in the poly seq schemes and the number residues per seq_id");
}
for (const auto &[seq_id, mon_ids] : mon_per_seq_id)
{
for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
{
condition cond;
for (auto mon_id : mon_ids)
{
if (cond)
cond = std::move(cond) or "label_comp_id"_key == mon_id;
else
cond = "label_comp_id"_key == mon_id;
}
cond = "label_entity_id"_key == entity_id and
"label_asym_id"_key == asym_id and
"label_seq_id"_key == seq_id and not std::move(cond);
if (atom_site.exists(std::move(cond)))
throw validation_error("An atom_site record exists that has no parent in the poly seq scheme categories");
}
}
}
result = true;
}
catch (const std::exception &ex)
{
result = false;
if (cif::VERBOSE > 0)
std::clog << ex.what() << '\n';
}
return result;
}
} // namespace cif::pdb
\ No newline at end of file
...@@ -13,10 +13,11 @@ int main(int argc, char *argv[]) ...@@ -13,10 +13,11 @@ int main(int argc, char *argv[])
// Build a new parser on top of Catch2's // Build a new parser on top of Catch2's
using namespace Catch::clara; using namespace Catch::clara;
auto cli = session.cli() // Get Catch2's command line parser auto cli = session.cli() // Get Catch2's command line parser
| Opt(gTestDir, "data-dir") // bind variable to a new option, with a hint string | Opt(gTestDir, "data-dir") // bind variable to a new option, with a hint string
["-D"]["--data-dir"] // the option names it will respond to ["-D"]["--data-dir"] // the option names it will respond to
("The directory containing the data files"); // description string for the help output ("The directory containing the data files") // description string for the help output
| Opt(cif::VERBOSE, "verbose")["-v"]["--cif-verbose"]("Flag for cif::VERBOSE");
// Now pass the new composite back to Catch2 so it uses that // Now pass the new composite back to Catch2 so it uses that
session.cli(cli); session.cli(cli);
...@@ -34,6 +35,5 @@ int main(int argc, char *argv[]) ...@@ -34,6 +35,5 @@ int main(int argc, char *argv[])
cif::compound_factory::instance().push_dictionary(gTestDir / "HEM.cif"); cif::compound_factory::instance().push_dictionary(gTestDir / "HEM.cif");
return session.run(); return session.run();
} }
\ No newline at end of file
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "test-main.hpp"
#include <catch2/catch.hpp>
#include <cif++.hpp>
#include <stdexcept>
// --------------------------------------------------------------------
cif::file operator""_cf(const char *text, size_t length)
{
struct membuf : public std::streambuf
{
membuf(char *text, size_t length)
{
this->setg(text, text, text + length);
}
} buffer(const_cast<char *>(text), length);
std::istream is(&buffer);
return cif::file(is);
}
// --------------------------------------------------------------------
TEST_CASE("test-1")
{
auto f = R"(data_1CBS
#
_entry.id 1CBS
#
_entity.id 1
_entity.type polymer
#
_entity_poly.entity_id 1
_entity_poly.type 'polypeptide(L)'
_entity_poly.nstd_linkage no
_entity_poly.nstd_monomer no
_entity_poly.pdbx_seq_one_letter_code
;PNFSG
;
_entity_poly.pdbx_seq_one_letter_code_can
;PNFSG
;
_entity_poly.pdbx_strand_id A
_entity_poly.pdbx_target_identifier ?
#
loop_
_entity_poly_seq.entity_id
_entity_poly_seq.num
_entity_poly_seq.mon_id
_entity_poly_seq.hetero
1 1 PRO n
1 2 ASN n
1 3 PHE n
1 4 SER n
1 5 GLY n
#
loop_
_struct_asym.id
_struct_asym.pdbx_blank_PDB_chainid_flag
_struct_asym.pdbx_modified
_struct_asym.entity_id
_struct_asym.details
A N N 1 ?
#
loop_
_atom_type.symbol
C
N
O
S
#
loop_
_atom_site.group_PDB
_atom_site.id
_atom_site.type_symbol
_atom_site.label_atom_id
_atom_site.label_alt_id
_atom_site.label_comp_id
_atom_site.label_asym_id
_atom_site.label_entity_id
_atom_site.label_seq_id
_atom_site.pdbx_PDB_ins_code
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.B_iso_or_equiv
_atom_site.pdbx_formal_charge
_atom_site.auth_seq_id
_atom_site.auth_comp_id
_atom_site.auth_asym_id
_atom_site.auth_atom_id
_atom_site.pdbx_PDB_model_num
ATOM 2 C CA . PRO A 1 1 ? 18.150 13.525 43.680 1.00 28.82 ? 1 PRO A CA 1
ATOM 9 C CA . ASN A 1 2 ? 20.576 16.457 43.578 1.00 20.79 ? 2 ASN A CA 1
ATOM 17 C CA . PHE A 1 3 ? 21.144 17.838 40.087 1.00 12.62 ? 3 PHE A CA 1
ATOM 28 C CA . SER A 1 4 ? 23.170 20.780 41.464 1.00 11.30 ? 4 SER A CA 1
ATOM 34 C CA . GLY A 1 5 ? 26.628 21.486 40.103 1.00 10.86 ? 5 GLY A CA 1
#
loop_
_pdbx_poly_seq_scheme.asym_id
_pdbx_poly_seq_scheme.entity_id
_pdbx_poly_seq_scheme.seq_id
_pdbx_poly_seq_scheme.mon_id
_pdbx_poly_seq_scheme.ndb_seq_num
_pdbx_poly_seq_scheme.pdb_seq_num
_pdbx_poly_seq_scheme.auth_seq_num
_pdbx_poly_seq_scheme.pdb_mon_id
_pdbx_poly_seq_scheme.auth_mon_id
_pdbx_poly_seq_scheme.pdb_strand_id
_pdbx_poly_seq_scheme.pdb_ins_code
_pdbx_poly_seq_scheme.hetero
A 1 1 PRO 1 1 1 PRO PRO A . n
A 1 2 ASN 2 2 2 ASN ASN A . n
A 1 3 PHE 3 3 3 PHE PHE A . n
A 1 4 SER 4 4 4 SER SER A . n
A 1 5 GLY 5 5 5 GLY GLY A . n
#
)"_cf;
SECTION("Plain file")
{
REQUIRE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Delete one atom_site")
{
auto &db = f.front();
auto n = db["atom_site"].erase(cif::key("id") == 2);
REQUIRE(n == 1);
REQUIRE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Delete a pdbx_poly_seq_scheme record")
{
auto &db = f.front();
auto n = db["pdbx_poly_seq_scheme"].erase(cif::key("seq_id") == 2);
REQUIRE(n == 1);
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Delete an entity_poly_seq record")
{
auto &db = f.front();
auto n = db["entity_poly_seq"].erase(cif::key("num") == 2);
REQUIRE(n == 1);
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Delete an entity_poly record")
{
auto &db = f.front();
auto n = db["entity_poly"].erase(cif::key("entity_id") == 1);
REQUIRE(n == 1);
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Mutate an atom_site record")
{
auto &db = f.front();
auto r = db["atom_site"].find1(cif::key("id") == 9);
r.assign({
{ "label_comp_id", "ALA" },
{ "auth_comp_id", "ALA" }
});
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Hetero consistency")
{
auto &db = f.front();
db["entity_poly_seq"].emplace({ //
{ "entity_id", 1 },
{ "num", 1 },
{ "mon_id", "ALA" },
{ "hetero", "n" }
});
db["pdbx_poly_seq_scheme"].emplace({ //
{ "asym_id", "A" },
{ "entity_id", "1" },
{ "seq_id", "1" },
{ "mon_id", "ALA" },
{ "ndb_seq_num", "1" },
{ "pdb_seq_num", "1" },
{ "auth_seq_num", "1" },
{ "pdb_mon_id", "ALA" },
{ "auth_mon_id", "ALA" },
{ "pdb_strand_id", "A" },
{ "pdb_ins_code", "." },
{ "hetero", "n" }
});
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment