Commit 25a90e3b by Maarten L. Hekkelman

split out pdbx code

fix dangling memory reference
parent 2f62759d
...@@ -159,7 +159,7 @@ if(STD_REGEX_RUNNING STREQUAL FAILED_TO_RUN) ...@@ -159,7 +159,7 @@ if(STD_REGEX_RUNNING STREQUAL FAILED_TO_RUN)
# Test to see if the submodule was loaded # Test to see if the submodule was loaded
find_file(HAVE_BOOST_REGEX_HPP regex.hpp PATHS ${PROJECT_SOURCE_DIR}/regex/include/boost NO_DEFAULT_PATH) find_file(HAVE_BOOST_REGEX_HPP regex.hpp PATHS ${PROJECT_SOURCE_DIR}/regex/include/boost NO_DEFAULT_PATH)
if (NOT HAVE_BOOST_REGEX_HPP) if(NOT HAVE_BOOST_REGEX_HPP)
message(FATAL_ERROR "The submodule regex was not loaded, please run git submodule update --init ") message(FATAL_ERROR "The submodule regex was not loaded, please run git submodule update --init ")
endif() endif()
...@@ -183,85 +183,42 @@ list(APPEND CIFPP_REQUIRED_LIBRARIES ${STDCPPATOMIC_LIBRARY}) ...@@ -183,85 +183,42 @@ list(APPEND CIFPP_REQUIRED_LIBRARIES ${STDCPPATOMIC_LIBRARY})
include(VersionString) include(VersionString)
write_version_header("LibCIFPP") write_version_header("LibCIFPP")
# SymOp data table
if(CIFPP_RECREATE_SYMOP_DATA)
# The tool to create the table
add_executable(symop-map-generator "${CMAKE_SOURCE_DIR}/tools/symop-map-generator.cpp")
target_link_libraries(symop-map-generator Threads::Threads ${CIFPP_REQUIRED_LIBRARIES})
set($ENV{CLIBD} ${CLIBD})
add_custom_command(
OUTPUT ${CMAKE_SOURCE_DIR}/src/structure/SymOpTable_data.hpp
COMMAND $<TARGET_FILE:symop-map-generator> ${CLIBD}/syminfo.lib ${CMAKE_SOURCE_DIR}/src/structure/SymOpTable_data.hpp
)
add_custom_target(
OUTPUT ${CMAKE_SOURCE_DIR}/src/structure/SymOpTable_data.hpp
DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib"
)
endif()
# Sources # Sources
set(project_sources set(project_sources
${PROJECT_SOURCE_DIR}/src/cif/category.cpp ${PROJECT_SOURCE_DIR}/src/category.cpp
${PROJECT_SOURCE_DIR}/src/cif/condition.cpp ${PROJECT_SOURCE_DIR}/src/condition.cpp
${PROJECT_SOURCE_DIR}/src/cif/datablock.cpp ${PROJECT_SOURCE_DIR}/src/datablock.cpp
${PROJECT_SOURCE_DIR}/src/cif/dictionary_parser.cpp ${PROJECT_SOURCE_DIR}/src/dictionary_parser.cpp
${PROJECT_SOURCE_DIR}/src/cif/file.cpp ${PROJECT_SOURCE_DIR}/src/file.cpp
${PROJECT_SOURCE_DIR}/src/cif/item.cpp ${PROJECT_SOURCE_DIR}/src/item.cpp
${PROJECT_SOURCE_DIR}/src/cif/parser.cpp ${PROJECT_SOURCE_DIR}/src/parser.cpp
${PROJECT_SOURCE_DIR}/src/cif/row.cpp ${PROJECT_SOURCE_DIR}/src/row.cpp
${PROJECT_SOURCE_DIR}/src/cif/validate.cpp ${PROJECT_SOURCE_DIR}/src/validate.cpp
# ${PROJECT_SOURCE_DIR}/src/pdb/Cif2PDB.cpp
# ${PROJECT_SOURCE_DIR}/src/pdb/PDB2Cif.cpp
# ${PROJECT_SOURCE_DIR}/src/pdb/PDB2CifRemark3.cpp
${PROJECT_SOURCE_DIR}/src/structure/AtomType.cpp
# ${PROJECT_SOURCE_DIR}/src/structure/BondMap.cpp
${PROJECT_SOURCE_DIR}/src/structure/Compound.cpp
# ${PROJECT_SOURCE_DIR}/src/structure/Structure.cpp
${PROJECT_SOURCE_DIR}/src/structure/Symmetry.cpp
${PROJECT_SOURCE_DIR}/src/structure/TlsParser.cpp
# ${PROJECT_SOURCE_DIR}/src/point.cpp
${PROJECT_SOURCE_DIR}/src/text.cpp ${PROJECT_SOURCE_DIR}/src/text.cpp
${PROJECT_SOURCE_DIR}/src/utilities.cpp ${PROJECT_SOURCE_DIR}/src/utilities.cpp
) )
set(project_headers set(project_headers
${PROJECT_SOURCE_DIR}/include/cif++/cif.hpp ${PROJECT_SOURCE_DIR}/include/cif++.hpp
${PROJECT_SOURCE_DIR}/include/cif++/utilities.hpp ${PROJECT_SOURCE_DIR}/include/cif++/utilities.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/item.hpp ${PROJECT_SOURCE_DIR}/include/cif++/item.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/datablock.hpp ${PROJECT_SOURCE_DIR}/include/cif++/datablock.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/file.hpp ${PROJECT_SOURCE_DIR}/include/cif++/file.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/writer.hpp ${PROJECT_SOURCE_DIR}/include/cif++/validate.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/validate.hpp ${PROJECT_SOURCE_DIR}/include/cif++/list.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/list.hpp ${PROJECT_SOURCE_DIR}/include/cif++/iterator.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/iterator.hpp ${PROJECT_SOURCE_DIR}/include/cif++/parser.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/parser.hpp ${PROJECT_SOURCE_DIR}/include/cif++/forward_decl.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/forward_decl.hpp ${PROJECT_SOURCE_DIR}/include/cif++/dictionary_parser.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/dictionary_parser.hpp ${PROJECT_SOURCE_DIR}/include/cif++/condition.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/condition.hpp ${PROJECT_SOURCE_DIR}/include/cif++/category.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/category.hpp ${PROJECT_SOURCE_DIR}/include/cif++/row.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/row.hpp
${PROJECT_SOURCE_DIR}/include/cif++/structure/AtomType.hpp # ${PROJECT_SOURCE_DIR}/include/cif++/point.hpp
${PROJECT_SOURCE_DIR}/include/cif++/structure/BondMap.hpp
${PROJECT_SOURCE_DIR}/include/cif++/structure/TlsParser.hpp
${PROJECT_SOURCE_DIR}/include/cif++/structure/Symmetry.hpp
${PROJECT_SOURCE_DIR}/include/cif++/structure/Structure.hpp
${PROJECT_SOURCE_DIR}/include/cif++/structure/Compound.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/PDB2Cif.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/PDB2CifRemark3.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/Cif2PDB.hpp
${PROJECT_SOURCE_DIR}/include/cif++/Cif++.hpp
${PROJECT_SOURCE_DIR}/include/cif++/point.hpp
) )
add_library(cifpp ${project_sources} ${project_headers} ${CMAKE_SOURCE_DIR}/src/structure/SymOpTable_data.hpp) add_library(cifpp ${project_sources} ${project_headers})
add_library(cifpp::cifpp ALIAS cifpp) add_library(cifpp::cifpp ALIAS cifpp)
set_target_properties(cifpp PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(cifpp PROPERTIES POSITION_INDEPENDENT_CODE ON)
...@@ -282,11 +239,6 @@ if(BOOST_REGEX_STANDALONE) ...@@ -282,11 +239,6 @@ if(BOOST_REGEX_STANDALONE)
target_include_directories(cifpp PRIVATE regex/include) target_include_directories(cifpp PRIVATE regex/include)
endif() endif()
# if(BOOST_REGEX_STANDALONE)
# target_link_libraries(cifpp PRIVATE Boost::regex)
# endif()
# target_link_libraries(cifpp PRIVATE)
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
target_link_options(cifpp PRIVATE -undefined dynamic_lookup) target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
endif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") endif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
...@@ -325,8 +277,8 @@ if(UNIX) ...@@ -325,8 +277,8 @@ if(UNIX)
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}") target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
endif() endif()
generate_export_header(cifpp # generate_export_header(cifpp
EXPORT_FILE_NAME cif++/Cif++Export.hpp) # EXPORT_FILE_NAME cif++/Cif++Export.hpp)
set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR}) set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR})
set(LIBRARY_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR}) set(LIBRARY_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR})
...@@ -363,20 +315,16 @@ install( ...@@ -363,20 +315,16 @@ install(
COMPONENT Devel COMPONENT Devel
) )
install(
FILES "${CMAKE_CURRENT_BINARY_DIR}/cif++/Cif++Export.hpp"
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/cif++
COMPONENT Devel
)
install(FILES install(FILES
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic ${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic ${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic
${COMPONENTS_CIF} ${COMPONENTS_CIF}
DESTINATION ${SHARE_INSTALL_DIR} DESTINATION ${SHARE_INSTALL_DIR}
) )
configure_package_config_file(Config.cmake.in configure_package_config_file(
${PROJECT_SOURCE_DIR}/cmake/cifppConfig.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfig.cmake ${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfig.cmake
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
PATH_VARS INCLUDE_INSTALL_DIR LIBRARY_INSTALL_DIR SHARE_INSTALL_DIR PATH_VARS INCLUDE_INSTALL_DIR LIBRARY_INSTALL_DIR SHARE_INSTALL_DIR
...@@ -426,12 +374,6 @@ if(CIFPP_BUILD_TESTS) ...@@ -426,12 +374,6 @@ if(CIFPP_BUILD_TESTS)
find_package(Boost REQUIRED) find_package(Boost REQUIRED)
list(APPEND CIFPP_tests list(APPEND CIFPP_tests
# pdb2cif
# rename-compound
# structure
# sugar
# unit
unit-v2) unit-v2)
foreach(CIFPP_TEST IN LISTS CIFPP_tests) foreach(CIFPP_TEST IN LISTS CIFPP_tests)
......
#include <iostream> #include <iostream>
#include <filesystem> #include <filesystem>
#include <cif++/cif.hpp> #include <cif++.hpp>
namespace fs = std::filesystem; namespace fs = std::filesystem;
......
...@@ -27,5 +27,5 @@ ...@@ -27,5 +27,5 @@
#pragma once #pragma once
#include <cif++/utilities.hpp> #include <cif++/utilities.hpp>
#include <cif++/cif/file.hpp> #include <cif++/file.hpp>
#include <cif++/cif/parser.hpp> #include <cif++/parser.hpp>
...@@ -28,12 +28,12 @@ ...@@ -28,12 +28,12 @@
#include <array> #include <array>
#include <cif++/cif/forward_decl.hpp> #include <cif++/forward_decl.hpp>
#include <cif++/cif/condition.hpp> #include <cif++/condition.hpp>
#include <cif++/cif/iterator.hpp> #include <cif++/iterator.hpp>
#include <cif++/cif/row.hpp> #include <cif++/row.hpp>
#include <cif++/cif/validate.hpp> #include <cif++/validate.hpp>
// TODO: implement all of: // TODO: implement all of:
// https://en.cppreference.com/w/cpp/named_req/Container // https://en.cppreference.com/w/cpp/named_req/Container
......
// void write(std::ostream &os, const std::vector<size_t> &order, bool includeEmptyColumns)
// {
// if (empty())
// return;
// // If there are multiple rows in this category, we need a _loop
// if (size() == 1)
// {
// os << "loop_" << std::endl;
// std::vector<size_t> columnWidths;
// for (auto cix : order)
// {
// auto &col = mColumns[cix];
// os << '_' << mName << '.' << col.mName << ' ' << std::endl;
// columnWidths.push_back(2);
// }
// for (auto Row = mHead; Row != nullptr; Row = Row->mNext)
// {
// for (auto v = Row->mValues; v != nullptr; v = v->mNext)
// {
// if (strchr(v->mText, '\n') == nullptr)
// {
// size_t l = strlen(v->mText);
// if (not isUnquotedString(v->mText))
// l += 2;
// if (l > 132)
// continue;
// if (columnWidths[v->mColumnIndex] < l + 1)
// columnWidths[v->mColumnIndex] = l + 1;
// }
// }
// }
// for (auto Row = mHead; Row != nullptr; Row = Row->mNext) // loop over rows
// {
// size_t offset = 0;
// for (size_t cix : order)
// {
// size_t w = columnWidths[cix];
// std::string s;
// for (auto iv = Row->mValues; iv != nullptr; iv = iv->mNext)
// {
// if (iv->mColumnIndex == cix)
// {
// s = iv->mText;
// break;
// }
// }
// if (s.empty())
// s = "?";
// size_t l = s.length();
// if (not isUnquotedString(s.c_str()))
// l += 2;
// if (l < w)
// l = w;
// if (offset + l > 132 and offset > 0)
// {
// os << std::endl;
// offset = 0;
// }
// offset = detail::writeValue(os, s, offset, w);
// if (offset > 132)
// {
// os << std::endl;
// offset = 0;
// }
// }
// if (offset > 0)
// os << std::endl;
// }
// }
// else
// {
// // first find the indent level
// size_t l = 0;
// for (auto &col : mColumns)
// {
// std::string tag = '_' + mName + '.' + col.mName;
// if (l < tag.length())
// l = tag.length();
// }
// l += 3;
// for (size_t cix : order)
// {
// auto &col = mColumns[cix];
// os << '_' << mName << '.' << col.mName << std::string(l - col.mName.length() - mName.length() - 2, ' ');
// std::string s;
// for (auto iv = mHead->mValues; iv != nullptr; iv = iv->mNext)
// {
// if (iv->mColumnIndex == cix)
// {
// s = iv->mText;
// break;
// }
// }
// if (s.empty())
// s = "?";
// size_t offset = l;
// if (s.length() + l >= kMaxLineLength)
// {
// os << std::endl;
// offset = 0;
// }
// if (detail::writeValue(os, s, offset, 1) != 0)
// os << std::endl;
// }
// }
// os << "# " << std::endl;
// }
void write(std::ostream &os) const
{
// std::vector<size_t> order(mColumns.size());
// iota(order.begin(), order.end(), 0);
// write(os, order, false);
os << '#' << m_name << std::endl;
for (auto &r : *this)
{
for (auto &f : r)
os << '_' << m_name << '.' << f.name() << ' ' << f.value() << std::endl;
}
}
// void Category::write(std::ostream &os, const std::vector<std::string> &columns)
// {
// // make sure all columns are present
// for (auto &c : columns)
// addColumn(c);
// std::vector<size_t> order;
// order.reserve(mColumns.size());
// for (auto &c : columns)
// order.push_back(getColumnIndex(c));
// for (size_t i = 0; i < mColumns.size(); ++i)
// {
// if (std::find(order.begin(), order.end(), i) == order.end())
// order.push_back(i);
// }
// write(os, order, true);
// }
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#include <iostream> #include <iostream>
#include <regex> #include <regex>
#include <cif++/cif/row.hpp> #include <cif++/row.hpp>
namespace cif namespace cif
{ {
......
...@@ -26,9 +26,9 @@ ...@@ -26,9 +26,9 @@
#pragma once #pragma once
#include <cif++/cif/forward_decl.hpp> #include <cif++/forward_decl.hpp>
#include <cif++/cif/category.hpp> #include <cif++/category.hpp>
namespace cif namespace cif
{ {
......
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
#pragma once #pragma once
#include <cif++/cif/validate.hpp> #include <cif++/validate.hpp>
namespace cif namespace cif
{ {
......
...@@ -28,8 +28,8 @@ ...@@ -28,8 +28,8 @@
#include <list> #include <list>
#include <cif++/cif/datablock.hpp> #include <cif++/datablock.hpp>
#include <cif++/cif/parser.hpp> #include <cif++/parser.hpp>
namespace cif namespace cif
{ {
......
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
#include <cif++/text.hpp> #include <cif++/text.hpp>
#include <cif++/cif/forward_decl.hpp> #include <cif++/forward_decl.hpp>
namespace cif namespace cif
{ {
......
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
#pragma once #pragma once
#include <cif++/cif/row.hpp> #include <cif++/row.hpp>
namespace cif namespace cif
{ {
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
#include <map> #include <map>
#include <cif++/cif/row.hpp> #include <cif++/row.hpp>
namespace cif namespace cif
{ {
......
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cif++/Cif++.hpp>
void WritePDBFile(std::ostream &os, const cif::Datablock &data);
/// \brief Just the HEADER, COMPND, SOURCE and AUTHOR lines
void WritePDBHeaderLines(std::ostream &os, const cif::Datablock &data);
std::string GetPDBHEADERLine(const cif::Datablock &data, std::string::size_type truncate_at = 127);
std::string GetPDBCOMPNDLine(const cif::Datablock &data, std::string::size_type truncate_at = 127);
std::string GetPDBSOURCELine(const cif::Datablock &data, std::string::size_type truncate_at = 127);
std::string GetPDBAUTHORLine(const cif::Datablock &data, std::string::size_type truncate_at = 127);
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cif++/Cif++.hpp>
// --------------------------------------------------------------------
struct PDBRecord
{
PDBRecord *mNext;
uint32_t mLineNr;
char mName[11];
size_t mVlen;
char mValue[1];
PDBRecord(uint32_t lineNr, const std::string &name, const std::string &value);
~PDBRecord();
void *operator new(size_t);
void *operator new(size_t size, size_t vLen);
void operator delete(void *p);
void operator delete(void *p, size_t vLen);
bool is(const char *name) const;
char vC(size_t column);
std::string vS(size_t columnFirst, size_t columnLast = std::numeric_limits<size_t>::max());
int vI(int columnFirst, int columnLast);
std::string vF(size_t columnFirst, size_t columnLast);
};
// --------------------------------------------------------------------
void ReadPDBFile(std::istream &pdbFile, cif::File &cifFile);
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cif++/PDB2Cif.hpp>
// --------------------------------------------------------------------
struct TemplateLine;
class Remark3Parser
{
public:
virtual ~Remark3Parser() {}
static bool parse(const std::string& expMethod, PDBRecord* r, cif::Datablock& db);
virtual std::string program();
virtual std::string version();
protected:
Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::Datablock& db,
const TemplateLine templatelines[], uint32_t templateLineCount, std::regex programVersion);
virtual float parse();
std::string nextLine();
bool match(const char* expr, int nextState);
void storeCapture(const char* category, std::initializer_list<const char*> items, bool createNew = false);
void storeRefineLsRestr(const char* type, std::initializer_list<const char*> values);
void updateRefineLsRestr(const char* type, std::initializer_list<const char*> values);
virtual void fixup() {}
std::string mName;
std::string mExpMethod;
PDBRecord* mRec;
cif::Datablock mDb;
std::string mLine;
std::smatch mM;
uint32_t mState;
const TemplateLine* mTemplate;
uint32_t mTemplateCount;
std::regex mProgramVersion;
};
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
#pragma once #pragma once
#include <cif++/cif/item.hpp> #include <cif++/item.hpp>
namespace cif namespace cif
{ {
......
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// Lib for working with structures as contained in mmCIF and PDB files
#pragma once
#include <cstdint>
#include <stdexcept>
#include <string>
namespace mmcif
{
enum AtomType : uint8_t
{
Nn = 0, // Unknown
H = 1, // Hydro­gen
He = 2, // He­lium
Li = 3, // Lith­ium
Be = 4, // Beryl­lium
B = 5, // Boron
C = 6, // Carbon
N = 7, // Nitro­gen
O = 8, // Oxy­gen
F = 9, // Fluor­ine
Ne = 10, // Neon
Na = 11, // So­dium
Mg = 12, // Magne­sium
Al = 13, // Alumin­ium
Si = 14, // Sili­con
P = 15, // Phos­phorus
S = 16, // Sulfur
Cl = 17, // Chlor­ine
Ar = 18, // Argon
K = 19, // Potas­sium
Ca = 20, // Cal­cium
Sc = 21, // Scan­dium
Ti = 22, // Tita­nium
V = 23, // Vana­dium
Cr = 24, // Chrom­ium
Mn = 25, // Manga­nese
Fe = 26, // Iron
Co = 27, // Cobalt
Ni = 28, // Nickel
Cu = 29, // Copper
Zn = 30, // Zinc
Ga = 31, // Gallium
Ge = 32, // Germa­nium
As = 33, // Arsenic
Se = 34, // Sele­nium
Br = 35, // Bromine
Kr = 36, // Kryp­ton
Rb = 37, // Rubid­ium
Sr = 38, // Stront­ium
Y = 39, // Yttrium
Zr = 40, // Zirco­nium
Nb = 41, // Nio­bium
Mo = 42, // Molyb­denum
Tc = 43, // Tech­netium
Ru = 44, // Ruthe­nium
Rh = 45, // Rho­dium
Pd = 46, // Pallad­ium
Ag = 47, // Silver
Cd = 48, // Cad­mium
In = 49, // Indium
Sn = 50, // Tin
Sb = 51, // Anti­mony
Te = 52, // Tellurium
I = 53, // Iodine
Xe = 54, // Xenon
Cs = 55, // Cae­sium
Ba = 56, // Ba­rium
La = 57, // Lan­thanum
Hf = 72, // Haf­nium
Ta = 73, // Tanta­lum
W = 74, // Tung­sten
Re = 75, // Rhe­nium
Os = 76, // Os­mium
Ir = 77, // Iridium
Pt = 78, // Plat­inum
Au = 79, // Gold
Hg = 80, // Mer­cury
Tl = 81, // Thallium
Pb = 82, // Lead
Bi = 83, // Bis­muth
Po = 84, // Polo­nium
At = 85, // Asta­tine
Rn = 86, // Radon
Fr = 87, // Fran­cium
Ra = 88, // Ra­dium
Ac = 89, // Actin­ium
Rf = 104, // Ruther­fordium
Db = 105, // Dub­nium
Sg = 106, // Sea­borgium
Bh = 107, // Bohr­ium
Hs = 108, // Has­sium
Mt = 109, // Meit­nerium
Ds = 110, // Darm­stadtium
Rg = 111, // Roent­genium
Cn = 112, // Coper­nicium
Nh = 113, // Nihon­ium
Fl = 114, // Flerov­ium
Mc = 115, // Moscov­ium
Lv = 116, // Liver­morium
Ts = 117, // Tenness­ine
Og = 118, // Oga­nesson
Ce = 58, // Cerium
Pr = 59, // Praseo­dymium
Nd = 60, // Neo­dymium
Pm = 61, // Prome­thium
Sm = 62, // Sama­rium
Eu = 63, // Europ­ium
Gd = 64, // Gadolin­ium
Tb = 65, // Ter­bium
Dy = 66, // Dyspro­sium
Ho = 67, // Hol­mium
Er = 68, // Erbium
Tm = 69, // Thulium
Yb = 70, // Ytter­bium
Lu = 71, // Lute­tium
Th = 90, // Thor­ium
Pa = 91, // Protac­tinium
U = 92, // Ura­nium
Np = 93, // Neptu­nium
Pu = 94, // Pluto­nium
Am = 95, // Ameri­cium
Cm = 96, // Curium
Bk = 97, // Berkel­ium
Cf = 98, // Califor­nium
Es = 99, // Einstei­nium
Fm = 100, // Fer­mium
Md = 101, // Mende­levium
No = 102, // Nobel­ium
Lr = 103, // Lawren­cium
D = 129, // Deuterium
};
// --------------------------------------------------------------------
// AtomTypeInfo
enum class RadiusType
{
Calculated,
Empirical,
CovalentEmpirical,
SingleBond,
DoubleBond,
TripleBond,
VanderWaals,
TypeCount
};
constexpr size_t RadiusTypeCount = static_cast<size_t>(RadiusType::TypeCount);
enum class IonicRadiusType
{
Effective, Crystal
};
struct AtomTypeInfo
{
AtomType type;
std::string name;
std::string symbol;
float weight;
bool metal;
float radii[RadiusTypeCount];
};
extern const AtomTypeInfo kKnownAtoms[];
// --------------------------------------------------------------------
// AtomTypeTraits
class AtomTypeTraits
{
public:
AtomTypeTraits(AtomType a);
AtomTypeTraits(const std::string &symbol);
AtomType type() const { return mInfo->type; }
std::string name() const { return mInfo->name; }
std::string symbol() const { return mInfo->symbol; }
float weight() const { return mInfo->weight; }
bool isMetal() const { return mInfo->metal; }
static bool isElement(const std::string &symbol);
static bool isMetal(const std::string &symbol);
float radius(RadiusType type = RadiusType::SingleBond) const
{
if (type >= RadiusType::TypeCount)
throw std::invalid_argument("invalid radius requested");
return mInfo->radii[static_cast<size_t>(type)] / 100.f;
}
/// \brief Return the radius for a charged version of this atom in a solid crystal
///
/// \param charge The charge of the ion
/// \return The radius of the ion
float crystal_ionic_radius(int charge) const;
/// \brief Return the radius for a charged version of this atom in a non-solid environment
///
/// \param charge The charge of the ion
/// \return The radius of the ion
float effective_ionic_radius(int charge) const;
/// \brief Return the radius for a charged version of this atom, returns the effective radius by default
///
/// \param charge The charge of the ion
/// \return The radius of the ion
float ionic_radius(int charge, IonicRadiusType type = IonicRadiusType::Effective) const
{
return type == IonicRadiusType::Effective ? effective_ionic_radius(charge) : crystal_ionic_radius(charge);
}
// data type encapsulating the Waasmaier & Kirfel scattering factors
// in a simplified form (only a and b).
// Added the electrion scattering factors as well
struct SFData
{
double a[6], b[6];
};
// to get the Cval and Siva values, use this constant as charge:
enum
{
kWKSFVal = -99
};
const SFData &wksf(int charge = 0) const;
const SFData &elsf() const;
private:
const struct AtomTypeInfo *mInfo;
};
} // namespace mmcif
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <filesystem>
#include <stdexcept>
#include <unordered_map>
#include <cif++/structure/Structure.hpp>
namespace mmcif
{
class BondMapException : public std::runtime_error
{
public:
BondMapException(const std::string &msg)
: runtime_error(msg)
{
}
};
class BondMap
{
public:
BondMap(const Structure &p);
BondMap(const BondMap &) = delete;
BondMap &operator=(const BondMap &) = delete;
bool operator()(const Atom &a, const Atom &b) const
{
return isBonded(index.at(a.id()), index.at(b.id()));
}
bool is1_4(const Atom &a, const Atom &b) const
{
uint32_t ixa = index.at(a.id());
uint32_t ixb = index.at(b.id());
return bond_1_4.count(key(ixa, ixb));
}
// links coming from the struct_conn records:
std::vector<std::string> linked(const Atom &a) const;
// This list of atomID's is comming from either CCD or the CCP4 dictionaries loaded
static std::vector<std::string> atomIDsForCompound(const std::string &compoundID);
private:
bool isBonded(uint32_t ai, uint32_t bi) const
{
return bond.count(key(ai, bi)) != 0;
}
uint64_t key(uint32_t a, uint32_t b) const
{
if (a > b)
std::swap(a, b);
return static_cast<uint64_t>(a) | (static_cast<uint64_t>(b) << 32);
}
std::tuple<uint32_t, uint32_t> dekey(uint64_t k) const
{
return std::make_tuple(
static_cast<uint32_t>(k >> 32),
static_cast<uint32_t>(k));
}
uint32_t dim;
std::unordered_map<std::string, uint32_t> index;
std::set<uint64_t> bond, bond_1_4;
std::map<std::string, std::set<std::string>> link;
};
} // namespace mmcif
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
/// \file This file contains the definition for the class Compound, encapsulating
/// the information found for compounds in the CCD.
#include <map>
#include <set>
#include <tuple>
#include <vector>
#include <cif++/cif.hpp>
#include <cif++/structure/AtomType.hpp>
namespace mmcif
{
// --------------------------------------------------------------------
class Compound;
struct CompoundAtom;
class CompoundFactoryImpl;
/// \brief The bond type as defined in the CCD, possible values taken from the mmcif_pdbx file
enum class BondType
{
sing, // 'single bond'
doub, // 'double bond'
trip, // 'triple bond'
quad, // 'quadruple bond'
arom, // 'aromatic bond'
poly, // 'polymeric bond'
delo, // 'delocalized double bond'
pi, // 'pi bond'
};
std::string to_string(BondType bondType);
BondType from_string(const std::string& bondType);
/// --------------------------------------------------------------------
/// \brief struct containing information about an atom in a chemical compound.
/// This is a subset of the available information. Contact the author if you need more fields.
struct CompoundAtom
{
std::string id;
AtomType typeSymbol;
int charge = 0;
bool aromatic = false;
bool leavingAtom = false;
bool stereoConfig = false;
float x, y, z;
};
/// --------------------------------------------------------------------
/// \brief struct containing information about the bonds
struct CompoundBond
{
std::string atomID[2];
BondType type;
bool aromatic = false, stereoConfig = false;
};
/// --------------------------------------------------------------------
/// \brief a class that contains information about a chemical compound.
/// This information is derived from the CDD by default.
///
/// To create compounds, you use the factory method. You can add your own
/// compound definitions by calling the addExtraComponents function and
/// pass it a valid CCD formatted file.
class Compound
{
public:
// accessors
std::string id() const { return mID; }
std::string name() const { return mName; }
std::string type() const { return mType; }
std::string group() const { return mGroup; }
std::string formula() const { return mFormula; }
float formulaWeight() const { return mFormulaWeight; }
int formalCharge() const { return mFormalCharge; }
const std::vector<CompoundAtom> &atoms() const { return mAtoms; }
const std::vector<CompoundBond> &bonds() const { return mBonds; }
CompoundAtom getAtomByID(const std::string &atomID) const;
bool atomsBonded(const std::string &atomId_1, const std::string &atomId_2) const;
// float atomBondValue(const std::string &atomId_1, const std::string &atomId_2) const;
// float bondAngle(const std::string &atomId_1, const std::string &atomId_2, const std::string &atomId_3) const;
// float chiralVolume(const std::string &centreID) const;
bool isWater() const
{
return mID == "HOH" or mID == "H2O" or mID == "WAT";
}
private:
friend class CompoundFactoryImpl;
friend class CCDCompoundFactoryImpl;
friend class CCP4CompoundFactoryImpl;
Compound(cif::datablock &db);
Compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group);
std::string mID;
std::string mName;
std::string mType;
std::string mGroup;
std::string mFormula;
float mFormulaWeight = 0;
int mFormalCharge = 0;
std::vector<CompoundAtom> mAtoms;
std::vector<CompoundBond> mBonds;
};
// --------------------------------------------------------------------
// Factory class for Compound and Link objects
CIFPP_EXPORT extern const std::map<std::string, char> kAAMap, kBaseMap;
class CompoundFactory
{
public:
/// \brief Initialise a singleton instance.
///
/// If you have a multithreaded application and want to have different
/// compounds in each thread (e.g. a web service processing user requests
/// with different sets of compounds) you can set the \a useThreadLocalInstanceOnly
/// flag to true.
static void init(bool useThreadLocalInstanceOnly);
static CompoundFactory &instance();
static void clear();
void setDefaultDictionary(const std::filesystem::path &inDictFile);
void pushDictionary(const std::filesystem::path &inDictFile);
void popDictionary();
bool isKnownPeptide(const std::string &res_name) const;
bool isKnownBase(const std::string &res_name) const;
/// \brief Create the Compound object for \a id
///
/// This will create the Compound instance for \a id if it doesn't exist already.
/// The result is owned by this factory and should not be deleted by the user.
/// \param id The Compound ID, a three letter code usually
/// \result The compound, or nullptr if it could not be created (missing info)
const Compound *create(std::string id);
~CompoundFactory();
private:
CompoundFactory();
CompoundFactory(const CompoundFactory &) = delete;
CompoundFactory &operator=(const CompoundFactory &) = delete;
static std::unique_ptr<CompoundFactory> sInstance;
static thread_local std::unique_ptr<CompoundFactory> tlInstance;
static bool sUseThreadLocalInstance;
std::shared_ptr<CompoundFactoryImpl> mImpl;
};
} // namespace mmcif
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <numeric>
#include <cif++/cif.hpp>
#include <cif++/structure/AtomType.hpp>
#include <cif++/structure/Compound.hpp>
#include <cif++/point.hpp>
/*
To modify a structure, you will have to use actions.
The currently supported actions are:
// - Move atom to new location
- Remove atom
// - Add new atom that was formerly missing
// - Add alternate Residue
-
*/
namespace mmcif
{
class Atom;
class Residue;
class Monomer;
class Polymer;
class Structure;
class File;
// --------------------------------------------------------------------
class Atom
{
private:
struct AtomImpl : public std::enable_shared_from_this<AtomImpl>
{
AtomImpl(cif::datablock &db, const std::string &id, cif::row_handle row);
// constructor for a symmetry copy of an atom
AtomImpl(const AtomImpl &impl, const Point &loc, const std::string &sym_op);
AtomImpl(const AtomImpl &i) = default;
void prefetch();
int compare(const AtomImpl &b) const;
bool getAnisoU(float anisou[6]) const;
int charge() const;
void moveTo(const Point &p);
const Compound *compound() const;
const std::string get_property(const std::string_view name) const;
void set_property(const std::string_view name, const std::string &value);
const cif::datablock &mDb;
std::string mID;
AtomType mType;
std::string mAtomID;
std::string mCompID;
std::string mAsymID;
int mSeqID;
std::string mAltID;
std::string mAuthSeqID;
Point mLocation;
int mRefcount;
cif::row_handle mRow;
// mutable std::vector<std::tuple<std::string, cif::detail::ItemReference>> mCachedRefs;
mutable const Compound *mCompound = nullptr;
bool mSymmetryCopy = false;
bool mClone = false;
std::string mSymmetryOperator = "1_555";
};
public:
Atom() {}
Atom(std::shared_ptr<AtomImpl> impl)
: mImpl(impl)
{
}
Atom(const Atom &rhs)
: mImpl(rhs.mImpl)
{
}
Atom(cif::datablock &db, cif::row_handle &row);
// a special constructor to create symmetry copies
Atom(const Atom &rhs, const Point &symmmetry_location, const std::string &symmetry_operation);
explicit operator bool() const { return (bool)mImpl; }
// return a copy of this atom, with data copied instead of referenced
Atom clone() const
{
auto copy = std::make_shared<AtomImpl>(*mImpl);
copy->mClone = true;
return Atom(copy);
}
Atom &operator=(const Atom &rhs) = default;
template <typename T>
T get_property(const std::string_view name) const;
void set_property(const std::string_view name, const std::string &value)
{
if (not mImpl)
throw std::logic_error("Error trying to modify an uninitialized atom");
mImpl->set_property(name, value);
}
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
void property(const std::string_view name, const T &value)
{
set_property(name, std::to_string(value));
}
const std::string &id() const { return impl().mID; }
AtomType type() const { return impl().mType; }
Point location() const { return impl().mLocation; }
void location(Point p)
{
if (not mImpl)
throw std::logic_error("Error trying to modify an uninitialized atom");
mImpl->moveTo(p);
}
/// \brief Translate the position of this atom by \a t
void translate(Point t);
/// \brief Rotate the position of this atom by \a q
void rotate(Quaternion q);
/// \brief Translate and rotate the position of this atom by \a t and \a q
void translateAndRotate(Point t, Quaternion q);
/// \brief Translate, rotate and translate again the coordinates this atom by \a t1 , \a q and \a t2
void translateRotateAndTranslate(Point t1, Quaternion q, Point t2);
// for direct access to underlying data, be careful!
const cif::row_handle getRow() const { return impl().mRow; }
const cif::row_handle getRowAniso() const;
bool isSymmetryCopy() const { return impl().mSymmetryCopy; }
std::string symmetry() const { return impl().mSymmetryOperator; }
const Compound &compound() const;
bool isWater() const { return impl().mCompID == "HOH" or impl().mCompID == "H2O" or impl().mCompID == "WAT"; }
int charge() const;
float uIso() const;
bool getAnisoU(float anisou[6]) const { return impl().getAnisoU(anisou); }
float occupancy() const;
// specifications
const std::string &labelAtomID() const { return impl().mAtomID; }
const std::string &labelCompID() const { return impl().mCompID; }
const std::string &labelAsymID() const { return impl().mAsymID; }
std::string labelEntityID() const;
int labelSeqID() const { return impl().mSeqID; }
const std::string &labelAltID() const { return impl().mAltID; }
bool isAlternate() const { return not impl().mAltID.empty(); }
std::string authAtomID() const;
std::string authCompID() const;
std::string authAsymID() const;
const std::string &authSeqID() const { return impl().mAuthSeqID; }
std::string pdbxAuthInsCode() const;
std::string pdbxAuthAltID() const;
std::string labelID() const; // label_comp_id + '_' + label_asym_id + '_' + label_seq_id
std::string pdbID() const; // auth_comp_id + '_' + auth_asym_id + '_' + auth_seq_id + pdbx_PDB_ins_code
bool operator==(const Atom &rhs) const;
bool operator!=(const Atom &rhs) const
{
return not operator==(rhs);
}
// access data in compound for this atom
// convenience routine
bool isBackBone() const
{
auto atomID = labelAtomID();
return atomID == "N" or atomID == "O" or atomID == "C" or atomID == "CA";
}
void swap(Atom &b)
{
std::swap(mImpl, b.mImpl);
}
int compare(const Atom &b) const { return impl().compare(*b.mImpl); }
bool operator<(const Atom &rhs) const
{
return compare(rhs) < 0;
}
friend std::ostream &operator<<(std::ostream &os, const Atom &atom);
/// \brief Synchronize data with underlying cif data
void sync()
{
if (mImpl)
mImpl->prefetch();
}
private:
friend class Structure;
const AtomImpl &impl() const
{
if (not mImpl)
throw std::runtime_error("Uninitialized atom, not found?");
return *mImpl;
}
std::shared_ptr<AtomImpl> mImpl;
};
template <>
inline std::string Atom::get_property<std::string>(const std::string_view name) const
{
return impl().get_property(name);
}
template <>
inline int Atom::get_property<int>(const std::string_view name) const
{
auto v = impl().get_property(name);
return v.empty() ? 0 : stoi(v);
}
template <>
inline float Atom::get_property<float>(const std::string_view name) const
{
return stof(impl().get_property(name));
}
inline void swap(mmcif::Atom &a, mmcif::Atom &b)
{
a.swap(b);
}
inline double Distance(const Atom &a, const Atom &b)
{
return Distance(a.location(), b.location());
}
inline double DistanceSquared(const Atom &a, const Atom &b)
{
return DistanceSquared(a.location(), b.location());
}
typedef std::vector<Atom> AtomView;
// --------------------------------------------------------------------
enum class EntityType
{
Polymer, NonPolymer, Macrolide, Water, Branched
};
// --------------------------------------------------------------------
class Residue
{
public:
// constructor
Residue(const Structure &structure, const std::string &compoundID,
const std::string &asymID, int seqID, const std::string &authSeqID)
: mStructure(&structure)
, mCompoundID(compoundID)
, mAsymID(asymID)
, mSeqID(seqID)
, mAuthSeqID(authSeqID)
{
}
Residue(const Residue &rhs) = delete;
Residue &operator=(const Residue &rhs) = delete;
Residue(Residue &&rhs);
Residue &operator=(Residue &&rhs);
virtual ~Residue();
const Compound &compound() const;
AtomView &atoms();
const AtomView &atoms() const;
void addAtom(Atom &atom);
/// \brief Unique atoms returns only the atoms without alternates and the first of each alternate atom id.
AtomView unique_atoms() const;
/// \brief The alt ID used for the unique atoms
std::string unique_alt_id() const;
Atom atomByID(const std::string &atomID) const;
const std::string &compoundID() const { return mCompoundID; }
void setCompoundID(const std::string &id) { mCompoundID = id; }
const std::string &asymID() const { return mAsymID; }
int seqID() const { return mSeqID; }
std::string entityID() const;
EntityType entityType() const;
std::string authAsymID() const;
std::string authSeqID() const;
std::string authInsCode() const;
// return a human readable PDB-like auth id (chain+seqnr+iCode)
std::string authID() const;
// similar for mmCIF space
std::string labelID() const;
// Is this residue a single entity?
bool isEntity() const;
bool isWater() const { return mCompoundID == "HOH"; }
const Structure &structure() const { return *mStructure; }
bool empty() const { return mStructure == nullptr; }
bool hasAlternateAtoms() const;
/// \brief Return the list of unique alt ID's present in this residue
std::set<std::string> getAlternateIDs() const;
/// \brief Return the list of unique atom ID's
std::set<std::string> getAtomIDs() const;
/// \brief Return the list of atoms having ID \a atomID
AtomView getAtomsByID(const std::string &atomID) const;
// some routines for 3d work
std::tuple<Point, float> centerAndRadius() const;
friend std::ostream &operator<<(std::ostream &os, const Residue &res);
friend Structure;
bool operator==(const mmcif::Residue &rhs) const
{
return this == &rhs or (
mStructure == rhs.mStructure and
mSeqID == rhs.mSeqID and
mAsymID == rhs.mAsymID and
mCompoundID == rhs.mCompoundID and
mAuthSeqID == rhs.mAuthSeqID);
}
protected:
Residue() {}
friend class Polymer;
const Structure *mStructure = nullptr;
std::string mCompoundID, mAsymID;
int mSeqID = 0;
std::string mAuthSeqID;
AtomView mAtoms;
};
// --------------------------------------------------------------------
// a monomer models a single Residue in a protein chain
class Monomer : public Residue
{
public:
// Monomer();
Monomer(const Monomer &rhs) = delete;
Monomer &operator=(const Monomer &rhs) = delete;
Monomer(Monomer &&rhs);
Monomer &operator=(Monomer &&rhs);
Monomer(const Polymer &polymer, size_t index, int seqID, const std::string &authSeqID,
const std::string &compoundID);
bool is_first_in_chain() const;
bool is_last_in_chain() const;
// convenience
bool has_alpha() const;
bool has_kappa() const;
// Assuming this is really an amino acid...
float phi() const;
float psi() const;
float alpha() const;
float kappa() const;
float tco() const;
float omega() const;
// torsion angles
size_t nrOfChis() const;
float chi(size_t i) const;
bool isCis() const;
/// \brief Returns true if the four atoms C, CA, N and O are present
bool isComplete() const;
/// \brief Returns true if any of the backbone atoms has an alternate
bool hasAlternateBackboneAtoms() const;
Atom CAlpha() const { return atomByID("CA"); }
Atom C() const { return atomByID("C"); }
Atom N() const { return atomByID("N"); }
Atom O() const { return atomByID("O"); }
Atom H() const { return atomByID("H"); }
bool isBondedTo(const Monomer &rhs) const
{
return this != &rhs and areBonded(*this, rhs);
}
static bool areBonded(const Monomer &a, const Monomer &b, float errorMargin = 0.5f);
static bool isCis(const Monomer &a, const Monomer &b);
static float omega(const Monomer &a, const Monomer &b);
// for LEU and VAL
float chiralVolume() const;
bool operator==(const Monomer &rhs) const
{
return mPolymer == rhs.mPolymer and mIndex == rhs.mIndex;
}
private:
const Polymer *mPolymer;
size_t mIndex;
};
// --------------------------------------------------------------------
class Polymer : public std::vector<Monomer>
{
public:
Polymer(const Structure &s, const std::string &entityID, const std::string &asymID);
Polymer(const Polymer &) = delete;
Polymer &operator=(const Polymer &) = delete;
// Polymer(Polymer&& rhs) = delete;
// Polymer& operator=(Polymer&& rhs) = de;
Monomer &getBySeqID(int seqID);
const Monomer &getBySeqID(int seqID) const;
Structure *structure() const { return mStructure; }
std::string asymID() const { return mAsymID; }
std::string entityID() const { return mEntityID; }
std::string chainID() const;
int Distance(const Monomer &a, const Monomer &b) const;
private:
Structure *mStructure;
std::string mEntityID;
std::string mAsymID;
// cif::row_handleSet mPolySeq;
};
// --------------------------------------------------------------------
// Sugar and Branch, to describe glycosylation sites
class Branch;
class Sugar : public Residue
{
public:
Sugar(const Branch &branch, const std::string &compoundID,
const std::string &asymID, int authSeqID);
Sugar(Sugar &&rhs);
Sugar &operator=(Sugar &&rhs);
int num() const { return std::stoi(mAuthSeqID); }
std::string name() const;
/// \brief Return the atom the C1 is linked to
Atom getLink() const { return mLink; }
void setLink(Atom link) { mLink = link; }
size_t getLinkNr() const
{
return mLink ? std::stoi(mLink.authSeqID()) : 0;
}
private:
const Branch *mBranch;
Atom mLink;
};
class Branch : public std::vector<Sugar>
{
public:
Branch(Structure &structure, const std::string &asymID);
void linkAtoms();
std::string name() const;
float weight() const;
std::string asymID() const { return mAsymID; }
Structure &structure() { return *mStructure; }
const Structure &structure() const { return *mStructure; }
Sugar &getSugarByNum(int nr);
const Sugar &getSugarByNum(int nr) const;
private:
friend Sugar;
std::string name(const Sugar &s) const;
Structure *mStructure;
std::string mAsymID;
};
// --------------------------------------------------------------------
// file is a reference to the data stored in e.g. the cif file.
// This object is not copyable.
class File : public cif::file
{
public:
File() {}
// File(const std::filesystem::path &path)
// {
// load(path);
// }
// File(const char *data, size_t length)
// {
// load(data, length);
// }
File(const File &) = delete;
File &operator=(const File &) = delete;
// void load(const std::filesystem::path &p) override;
// void save(const std::filesystem::path &p) override;
// using cif::file::load;
// using cif::file::save;
cif::datablock &data() { return front(); }
};
// --------------------------------------------------------------------
enum class StructureOpenOptions
{
SkipHydrogen = 1 << 0
};
inline bool operator&(StructureOpenOptions a, StructureOpenOptions b)
{
return static_cast<int>(a) bitand static_cast<int>(b);
}
// --------------------------------------------------------------------
class Structure
{
public:
Structure(cif::file &p, size_t modelNr = 1, StructureOpenOptions options = {})
: Structure(p.front(), modelNr, options)
{
}
Structure(cif::datablock &db, size_t modelNr = 1, StructureOpenOptions options = {});
Structure(Structure &&s) = default;
// Create a read-only clone of the current structure (for multithreaded calculations that move atoms)
Structure(const Structure &);
Structure &operator=(const Structure &) = delete;
// Structure &operator=(Structure &&s) = default;
~Structure();
const AtomView &atoms() const { return mAtoms; }
// AtomView &atoms() { return mAtoms; }
EntityType getEntityTypeForEntityID(const std::string entityID) const;
EntityType getEntityTypeForAsymID(const std::string asymID) const;
AtomView waters() const;
const std::list<Polymer> &polymers() const { return mPolymers; }
std::list<Polymer> &polymers() { return mPolymers; }
Polymer &getPolymerByAsymID(const std::string &asymID);
const Polymer &getPolymerByAsymID(const std::string &asymID) const
{
return const_cast<Structure *>(this)->getPolymerByAsymID(asymID);
}
const std::list<Branch> &branches() const { return mBranches; }
std::list<Branch> &branches() { return mBranches; }
Branch &getBranchByAsymID(const std::string &asymID);
const Branch &getBranchByAsymID(const std::string &asymID) const;
const std::vector<Residue> &nonPolymers() const { return mNonPolymers; }
Atom getAtomByID(const std::string &id) const;
// Atom getAtomByLocation(Point pt, float maxDistance) const;
Atom getAtomByLabel(const std::string &atomID, const std::string &asymID,
const std::string &compID, int seqID, const std::string &altID = "");
/// \brief Return the atom closest to point \a p
Atom getAtomByPosition(Point p) const;
/// \brief Return the atom closest to point \a p with atom type \a type in a residue of type \a res_type
Atom getAtomByPositionAndType(Point p, std::string_view type, std::string_view res_type) const;
/// \brief Get a non-poly residue for an asym with id \a asymID
Residue &getResidue(const std::string &asymID)
{
return getResidue(asymID, 0, "");
}
/// \brief Get a non-poly residue for an asym with id \a asymID
const Residue &getResidue(const std::string &asymID) const
{
return getResidue(asymID, 0, "");
}
/// \brief Get a residue for an asym with id \a asymID seq id \a seqID and authSeqID \a authSeqID
Residue &getResidue(const std::string &asymID, int seqID, const std::string &authSeqID);
/// \brief Get a the single residue for an asym with id \a asymID seq id \a seqID and authSeqID \a authSeqID
const Residue &getResidue(const std::string &asymID, int seqID, const std::string &authSeqID) const
{
return const_cast<Structure *>(this)->getResidue(asymID, seqID, authSeqID);
}
/// \brief Get a residue for an asym with id \a asymID, compound id \a compID, seq id \a seqID and authSeqID \a authSeqID
Residue &getResidue(const std::string &asymID, const std::string &compID, int seqID, const std::string &authSeqID);
/// \brief Get a residue for an asym with id \a asymID, compound id \a compID, seq id \a seqID and authSeqID \a authSeqID
const Residue &getResidue(const std::string &asymID, const std::string &compID, int seqID, const std::string &authSeqID) const
{
return const_cast<Structure *>(this)->getResidue(asymID, compID, seqID, authSeqID);
}
/// \brief Get a the residue for atom \a atom
Residue &getResidue(const mmcif::Atom &atom)
{
return getResidue(atom.labelAsymID(), atom.labelCompID(), atom.labelSeqID(), atom.authSeqID());
}
/// \brief Get a the residue for atom \a atom
const Residue &getResidue(const mmcif::Atom &atom) const
{
return getResidue(atom.labelAsymID(), atom.labelCompID(), atom.labelSeqID(), atom.authSeqID());
}
// Actions
void removeAtom(Atom &a)
{
removeAtom(a, true);
}
void swapAtoms(Atom a1, Atom a2); // swap the labels for these atoms
void moveAtom(Atom a, Point p); // move atom to a new location
void changeResidue(Residue &res, const std::string &newCompound,
const std::vector<std::tuple<std::string, std::string>> &remappedAtoms);
/// \brief Remove a residue, can be monomer or nonpoly
///
/// \param asym_id The asym ID
/// \param seq_id The sequence ID
void removeResidue(const std::string &asym_id, int seq_id, const std::string &auth_seq_id)
{
removeResidue(getResidue(asym_id, seq_id, auth_seq_id));
}
/// \brief Create a new non-polymer entity, returns new ID
/// \param mon_id The mon_id for the new nonpoly, must be an existing and known compound from CCD
/// \return The ID of the created entity
std::string createNonPolyEntity(const std::string &mon_id);
/// \brief Create a new NonPolymer struct_asym with atoms constructed from \a atoms, returns asym_id.
/// This method assumes you are copying data from one cif file to another.
///
/// \param entity_id The entity ID of the new nonpoly
/// \param atoms The array of atom_site rows containing the data.
/// \return The newly create asym ID
std::string createNonpoly(const std::string &entity_id, const std::vector<mmcif::Atom> &atoms);
/// \brief Create a new NonPolymer struct_asym with atoms constructed from info in \a atom_info, returns asym_id.
/// This method creates new atom records filled with info from the info.
///
/// \param entity_id The entity ID of the new nonpoly
/// \param atoms The array of sets of cif::item data containing the data for the atoms.
/// \return The newly create asym ID
std::string createNonpoly(const std::string &entity_id, std::vector<std::vector<cif::item>> &atom_info);
/// \brief Create a new (sugar) branch with one first NAG containing atoms constructed from \a nag_atom_info
Branch &createBranch(std::vector<std::vector<cif::item>> &nag_atom_info);
/// \brief Extend an existing (sugar) branch identified by \a asymID with one sugar containing atoms constructed from \a atom_info
///
/// \param asym_id The asym id of the branch to extend
/// \param atom_info Array containing the info for the atoms to construct for the new sugar
/// \param link_sugar The sugar to link to, note: this is the sugar number (1 based)
/// \param link_atom The atom id of the atom linked in the sugar
Branch &extendBranch(const std::string &asym_id, std::vector<std::vector<cif::item>> &atom_info,
int link_sugar, const std::string &link_atom);
/// \brief Remove \a branch
void removeBranch(Branch &branch);
/// \brief Remove residue \a res
///
/// \param res The residue to remove
void removeResidue(mmcif::Residue &res);
/// \brief Translate the coordinates of all atoms in the structure by \a t
void translate(Point t);
/// \brief Rotate the coordinates of all atoms in the structure by \a q
void rotate(Quaternion t);
/// \brief Translate and rotate the coordinates of all atoms in the structure by \a t and \a q
void translateAndRotate(Point t, Quaternion q);
/// \brief Translate, rotate and translate again the coordinates of all atoms in the structure by \a t1 , \a q and \a t2
void translateRotateAndTranslate(Point t1, Quaternion q, Point t2);
const std::vector<Residue> &getNonPolymers() const { return mNonPolymers; }
void cleanupEmptyCategories();
/// \brief Direct access to underlying data
cif::category &category(std::string_view name) const
{
return mDb[name];
}
cif::datablock &datablock() const
{
return mDb;
}
void validateAtoms() const;
private:
friend Polymer;
friend Residue;
std::string insertCompound(const std::string &compoundID, bool isEntity);
std::string createEntityForBranch(Branch &branch);
void loadData();
void loadAtomsForModel(StructureOpenOptions options);
template<typename... Args>
Atom& emplace_atom(Args ...args)
{
return emplace_atom(Atom{std::forward<Args>(args)...});
}
Atom &emplace_atom(Atom &&atom);
void removeAtom(Atom &a, bool removeFromResidue);
void removeSugar(Sugar &sugar);
cif::datablock &mDb;
size_t mModelNr;
AtomView mAtoms;
std::vector<size_t> mAtomIndex;
std::list<Polymer> mPolymers;
std::list<Branch> mBranches;
std::vector<Residue> mNonPolymers;
};
} // namespace mmcif
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <string>
#include <cstdint>
#include <array>
#include <cif++/utilities.hpp>
namespace mmcif
{
// --------------------------------------------------------------------
enum class SpacegroupName
{
full, xHM, Hall
};
struct Spacegroup
{
const char* name;
const char* xHM;
const char* Hall;
int nr;
};
CIFPP_EXPORT extern const Spacegroup kSpaceGroups[];
CIFPP_EXPORT extern const std::size_t kNrOfSpaceGroups;
// --------------------------------------------------------------------
struct SymopData
{
constexpr SymopData(const std::array<int,15>& data)
: m_packed((data[ 0] & 0x03ULL) << 34 bitor
(data[ 1] & 0x03ULL) << 32 bitor
(data[ 2] & 0x03ULL) << 30 bitor
(data[ 3] & 0x03ULL) << 28 bitor
(data[ 4] & 0x03ULL) << 26 bitor
(data[ 5] & 0x03ULL) << 24 bitor
(data[ 6] & 0x03ULL) << 22 bitor
(data[ 7] & 0x03ULL) << 20 bitor
(data[ 8] & 0x03ULL) << 18 bitor
(data[ 9] & 0x07ULL) << 15 bitor
(data[10] & 0x07ULL) << 12 bitor
(data[11] & 0x07ULL) << 9 bitor
(data[12] & 0x07ULL) << 6 bitor
(data[13] & 0x07ULL) << 3 bitor
(data[14] & 0x07ULL) << 0)
{
}
bool operator==(const SymopData& rhs) const
{
return m_packed == rhs.m_packed;
}
std::array<int,15> data() const
{
return {
static_cast<int>(m_packed >> 34) bitand 0x03,
static_cast<int>(m_packed >> 32) bitand 0x03,
static_cast<int>(m_packed >> 30) bitand 0x03,
static_cast<int>(m_packed >> 28) bitand 0x03,
static_cast<int>(m_packed >> 26) bitand 0x03,
static_cast<int>(m_packed >> 24) bitand 0x03,
static_cast<int>(m_packed >> 22) bitand 0x03,
static_cast<int>(m_packed >> 20) bitand 0x03,
static_cast<int>(m_packed >> 18) bitand 0x03,
static_cast<int>(m_packed >> 15) bitand 0x07,
static_cast<int>(m_packed >> 12) bitand 0x07,
static_cast<int>(m_packed >> 9) bitand 0x07,
static_cast<int>(m_packed >> 6) bitand 0x07,
static_cast<int>(m_packed >> 3) bitand 0x07,
static_cast<int>(m_packed >> 0) bitand 0x07,
};
}
private:
friend struct SymopDataBlock;
const uint64_t kPackMask = (~0ULL >> (64-36));
SymopData(uint64_t v)
: m_packed(v bitand kPackMask) {}
uint64_t m_packed;
};
struct SymopDataBlock
{
constexpr SymopDataBlock(int spacegroup, int rotational_number, const std::array<int,15>& rt_data)
: m_v((spacegroup & 0xffffULL) << 48 bitor
(rotational_number & 0xffULL) << 40 bitor
SymopData(rt_data).m_packed)
{
}
uint16_t spacegroup() const { return m_v >> 48; }
SymopData symop() const { return SymopData(m_v); }
uint8_t rotational_number() const { return (m_v >> 40) bitand 0xff; }
private:
uint64_t m_v;
};
static_assert(sizeof(SymopDataBlock) == sizeof(uint64_t), "Size of SymopData is wrong");
CIFPP_EXPORT extern const SymopDataBlock kSymopNrTable[];
CIFPP_EXPORT extern const std::size_t kSymopNrTableSize;
// --------------------------------------------------------------------
int GetSpacegroupNumber(std::string spacegroup); // alternative for clipper's parsing code, using SpacegroupName::full
int GetSpacegroupNumber(std::string spacegroup, SpacegroupName type); // alternative for clipper's parsing code
}
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <vector>
#include <string>
#include <tuple>
#include <cif++/cif.hpp>
namespace cif
{
extern const int
kResidueNrWildcard,
kNoSeqNum;
struct TLSSelection;
typedef std::unique_ptr<TLSSelection> TLSSelectionPtr;
struct TLSResidue;
struct TLSSelection
{
virtual ~TLSSelection() {}
virtual void CollectResidues(cif::datablock& db, std::vector<TLSResidue>& residues, std::size_t indentLevel = 0) const = 0;
std::vector<std::tuple<std::string,int,int>> GetRanges(cif::datablock& db, bool pdbNamespace) const;
};
// Low level: get the selections
TLSSelectionPtr ParseSelectionDetails(const std::string& program, const std::string& selection);
}
...@@ -43,8 +43,6 @@ ...@@ -43,8 +43,6 @@
#include <unistd.h> #include <unistd.h>
#endif #endif
#include <cif++/Cif++Export.hpp>
#if _MSC_VER #if _MSC_VER
#pragma warning(disable : 4996) // unsafe function or variable (strcpy e.g.) #pragma warning(disable : 4996) // unsafe function or variable (strcpy e.g.)
#pragma warning(disable : 4068) // unknown pragma #pragma warning(disable : 4068) // unknown pragma
......
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cassert>
#include <fstream>
#include <numeric>
#include <regex>
#include <set>
#include <shared_mutex>
#include <stack>
#include <tuple>
#include <unordered_map>
#include <mutex>
#include <filesystem>
#include <gxrio.hpp>
#include <boost/logic/tribool.hpp>
#include <cif++/Cif++.hpp>
#include <cif++/CifParser.hpp>
#include <cif++/utilities.hpp>
#include <cif++/CifValidator.hpp>
namespace fs = std::filesystem;
namespace cif
{
CIFPP_EXPORT int VERBOSE = 0;
static const char *kEmptyResult = "";
// --------------------------------------------------------------------
// most internal data structures are stored as linked lists
// Item values are stored in a simple struct. They should be const anyway
struct ItemValue
{
ItemValue *mNext;
uint32_t mColumnIndex;
char mText[4];
ItemValue(const char *v, size_t columnIndex);
~ItemValue();
bool empty() const { return mText[0] == 0 or ((mText[0] == '.' or mText[0] == '?') and mText[1] == 0); }
bool null() const { return mText[0] == '.' and mText[1] == 0; }
bool unknown() const { return mText[0] == '?' and mText[1] == 0; }
void *operator new(size_t size, size_t dataSize);
void operator delete(void *p);
void operator delete(void *p, size_t dataSize);
};
// --------------------------------------------------------------------
ItemValue::ItemValue(const char *value, size_t columnIndex)
: mNext(nullptr)
, mColumnIndex(uint32_t(columnIndex))
{
assert(columnIndex < std::numeric_limits<uint32_t>::max());
strcpy(mText, value);
}
ItemValue::~ItemValue()
{
// remove recursion (and be paranoid)
while (mNext != nullptr and mNext != this)
{
auto n = mNext;
mNext = n->mNext;
n->mNext = nullptr;
delete n;
}
}
void *ItemValue::operator new(size_t size, size_t dataSize)
{
return malloc(size - 4 + dataSize + 1);
}
void ItemValue::operator delete(void *p)
{
free(p);
}
void ItemValue::operator delete(void *p, size_t dataSize)
{
free(p);
}
// --------------------------------------------------------------------
// itemColumn contains info about a column or field in a Category
struct ItemColumn
{
std::string mName; // store lower-case, for optimization
const ValidateItem *mValidator;
};
// itemRow contains the actual values for a Row in a Category
struct ItemRow
{
~ItemRow();
void drop(size_t columnIx);
const char *c_str(size_t columnIx) const;
std::string str() const
{
std::stringstream s;
s << '{';
for (auto v = mValues; v != nullptr; v = v->mNext)
{
s << mCategory->getColumnName(v->mColumnIndex)
<< ':'
<< v->mText;
if (v->mNext != nullptr)
s << ", ";
}
s << '}';
return s.str();
}
ItemRow *mNext;
Category *mCategory;
ItemValue *mValues;
uint32_t mLineNr = 0;
};
std::ostream &operator<<(std::ostream &os, const ItemRow &r)
{
os << r.mCategory->name() << '[';
for (auto iv = r.mValues; iv != nullptr; iv = iv->mNext)
{
os << iv->mText;
if (iv->mNext)
os << ',';
}
os << ']';
return os;
}
// --------------------------------------------------------------------
ItemRow::~ItemRow()
{
// remove recursive
while (mNext != nullptr and mNext != this)
{
auto n = mNext;
mNext = n->mNext;
n->mNext = nullptr;
delete n;
}
delete mValues;
}
void ItemRow::drop(size_t columnIx)
{
if (mValues != nullptr and mValues->mColumnIndex == columnIx)
{
auto v = mValues;
mValues = mValues->mNext;
v->mNext = nullptr;
delete v;
}
else
{
for (auto v = mValues; v->mNext != nullptr; v = v->mNext)
{
if (v->mNext->mColumnIndex == columnIx)
{
auto vn = v->mNext;
v->mNext = vn->mNext;
vn->mNext = nullptr;
delete vn;
break;
}
}
}
#if DEBUG
for (auto iv = mValues; iv != nullptr; iv = iv->mNext)
assert(iv != iv->mNext and (iv->mNext == nullptr or iv != iv->mNext->mNext));
#endif
}
const char *ItemRow::c_str(size_t columnIx) const
{
const char *result = kEmptyResult;
for (auto v = mValues; v != nullptr; v = v->mNext)
{
if (v->mColumnIndex == columnIx)
{
result = v->mText;
break;
}
}
return result;
}
// --------------------------------------------------------------------
namespace detail
{
ItemReference &ItemReference::operator=(const std::string &value)
{
if (mConst)
throw std::logic_error("Attempt to write to a constant row");
if (mRow.mData == nullptr)
throw std::logic_error("Attempt to write to an uninitialized row");
mRow.assign(mName, value, false);
return *this;
}
const char *ItemReference::c_str() const
{
const char *result = kEmptyResult;
if (mRow.mData != nullptr /* and mRow.mData->mCategory != nullptr*/)
{
// assert(mRow.mData->mCategory);
for (auto iv = mRow.mData->mValues; iv != nullptr; iv = iv->mNext)
{
if (iv->mColumnIndex == mColumn)
{
if (iv->mText[0] != '.' or iv->mText[1] != 0)
result = iv->mText;
break;
}
}
}
return result;
}
const char *ItemReference::c_str(const char *defaultValue) const
{
const char *result = defaultValue;
if (mRow.mData != nullptr and mRow.mData->mCategory != nullptr)
{
for (auto iv = mRow.mData->mValues; iv != nullptr; iv = iv->mNext)
{
if (iv->mColumnIndex == mColumn)
{
// only really non-empty values
if (iv->mText[0] != 0 and ((iv->mText[0] != '.' and iv->mText[0] != '?') or iv->mText[1] != 0))
result = iv->mText;
break;
}
}
if (result == defaultValue and mColumn < mRow.mData->mCategory->mColumns.size()) // not found, perhaps the category has a default defined?
{
auto iv = mRow.mData->mCategory->mColumns[mColumn].mValidator;
if (iv != nullptr and not iv->mDefault.empty())
result = iv->mDefault.c_str();
}
}
return result;
}
bool ItemReference::empty() const
{
return c_str() == kEmptyResult;
}
bool ItemReference::is_null() const
{
boost::tribool result;
if (mRow.mData != nullptr and mRow.mData->mCategory != nullptr)
{
for (auto iv = mRow.mData->mValues; iv != nullptr; iv = iv->mNext)
{
if (iv->mColumnIndex == mColumn)
{
result = iv->mText[0] == '.' and iv->mText[1] == 0;
break;
}
}
if (result == boost::indeterminate and mColumn < mRow.mData->mCategory->mColumns.size()) // not found, perhaps the category has a default defined?
{
auto iv = mRow.mData->mCategory->mColumns[mColumn].mValidator;
if (iv != nullptr)
result = iv->mDefaultIsNull;
}
}
return result ? true : false;
}
void ItemReference::swap(ItemReference &b)
{
Row::swap(mColumn, mRow.mData, b.mRow.mData);
}
std::ostream &operator<<(std::ostream &os, ItemReference &item)
{
os << item.c_str();
return os;
}
} // namespace detail
// --------------------------------------------------------------------
// Datablock implementation
Datablock::Datablock(const std::string_view name)
: mName(name)
, mValidator(nullptr)
, mNext(nullptr)
{
}
Datablock::~Datablock()
{
delete mNext;
}
auto Datablock::emplace(std::string_view name) -> std::tuple<iterator, bool>
{
// LRU code
std::shared_lock lock(mLock);
bool isNew = true;
auto i = begin();
while (i != end())
{
if (iequals(name, i->name()))
{
isNew = false;
if (i != begin())
{
auto n = std::next(i);
mCategories.splice(begin(), mCategories, i, n);
}
break;
}
++i;
}
if (isNew)
{
mCategories.emplace(begin(), *this, std::string(name), mValidator);
for (auto &cat : mCategories)
cat.updateLinks();
}
return std::make_tuple(begin(), isNew);
}
Category &Datablock::operator[](std::string_view name)
{
iterator i;
std::tie(i, std::ignore) = emplace(name);
return *i;
}
const Category &Datablock::operator[](std::string_view name) const
{
using namespace std::literals;
auto result = get(name);
if (result == nullptr)
// throw std::out_of_range("The category with name " + std::string(name) + " does not exist");
{
std::unique_lock lock(mLock);
if (not mNullCategory)
mNullCategory.reset(new Category(const_cast<Datablock&>(*this), "<null>", nullptr));
result = mNullCategory.get();
}
return *result;
}
Category *Datablock::get(std::string_view name)
{
std::shared_lock lock(mLock);
for (auto &cat : mCategories)
{
if (iequals(cat.name(), name))
return &cat;
}
return nullptr;
}
const Category *Datablock::get(std::string_view name) const
{
std::shared_lock lock(mLock);
for (auto &cat : mCategories)
{
if (iequals(cat.name(), name))
return &cat;
}
return nullptr;
}
bool Datablock::isValid()
{
std::shared_lock lock(mLock);
if (mValidator == nullptr)
throw std::runtime_error("Validator not specified");
bool result = true;
for (auto &cat : *this)
result = cat.isValid() and result;
return result;
}
void Datablock::validateLinks() const
{
std::shared_lock lock(mLock);
for (auto &cat : *this)
cat.validateLinks();
}
void Datablock::setValidator(const Validator *v)
{
std::shared_lock lock(mLock);
mValidator = v;
for (auto &cat : *this)
cat.setValidator(v);
}
void Datablock::add_software(const std::string_view name, const std::string &classification, const std::string &versionNr, const std::string &versionDate)
{
std::shared_lock lock(mLock);
Category &cat = operator[]("software");
auto ordNr = cat.size() + 1;
// TODO: should we check this ordinal number???
cat.emplace({{"pdbx_ordinal", ordNr},
{"name", name},
{"version", versionNr},
{"date", versionDate},
{"classification", classification}});
}
void Datablock::getTagOrder(std::vector<std::string> &tags) const
{
std::shared_lock lock(mLock);
for (auto &cat : *this)
cat.getTagOrder(tags);
}
void Datablock::write(std::ostream &os)
{
std::shared_lock lock(mLock);
os << "data_" << mName << std::endl
<< "# " << std::endl;
// mmcif support, sort of. First write the 'entry' Category
// and if it exists, _AND_ we have a Validator, write out the
// audit_conform record.
for (auto &cat : mCategories)
{
if (cat.name() == "entry")
{
cat.write(os);
if (mValidator != nullptr)
{
Category auditConform(*this, "audit_conform", nullptr);
auditConform.emplace({{"dict_name", mValidator->dictName()},
{"dict_version", mValidator->dictVersion()}});
auditConform.write(os);
}
break;
}
}
for (auto &cat : mCategories)
{
if (cat.name() != "entry" and cat.name() != "audit_conform")
cat.write(os);
}
}
void Datablock::write(std::ostream &os, const std::vector<std::string> &order)
{
std::shared_lock lock(mLock);
os << "data_" << mName << std::endl
<< "# " << std::endl;
std::vector<std::string> catOrder;
for (auto &o : order)
{
std::string cat, Item;
std::tie(cat, Item) = split_tag_name(o);
if (find_if(catOrder.rbegin(), catOrder.rend(), [cat](const std::string &s) -> bool
{ return iequals(cat, s); }) == catOrder.rend())
catOrder.push_back(cat);
}
for (auto &c : catOrder)
{
auto cat = get(c);
if (cat == nullptr)
continue;
std::vector<std::string> items;
for (auto &o : order)
{
std::string catName, Item;
std::tie(catName, Item) = split_tag_name(o);
if (catName == c)
items.push_back(Item);
}
cat->write(os, items);
}
// for any Category we missed in the catOrder
for (auto &cat : mCategories)
{
if (find_if(catOrder.begin(), catOrder.end(), [&](const std::string &s) -> bool
{ return iequals(cat.name(), s); }) != catOrder.end())
continue;
cat.write(os);
}
}
bool operator==(const cif::Datablock &dbA, const cif::Datablock &dbB)
{
bool result = true;
std::shared_lock lockA(dbA.mLock);
std::shared_lock lockB(dbB.mLock);
std::vector<std::string> catA, catB;
for (auto &cat : dbA)
{
if (not cat.empty())
catA.push_back(cat.name());
}
sort(catA.begin(), catA.end());
for (auto &cat : dbB)
{
if (not cat.empty())
catB.push_back(cat.name());
}
sort(catB.begin(), catB.end());
// loop over categories twice, to group output
// First iteration is to list missing categories.
std::vector<std::string> missingA, missingB;
auto catA_i = catA.begin(), catB_i = catB.begin();
while (catA_i != catA.end() and catB_i != catB.end())
{
std::string nA = *catA_i;
toLower(nA);
std::string nB = *catB_i;
toLower(nB);
int d = nA.compare(nB);
if (d > 0)
{
missingA.push_back(*catB_i);
++catB_i;
}
else if (d < 0)
{
missingB.push_back(*catA_i);
++catA_i;
}
else
++catA_i, ++catB_i;
}
while (catA_i != catA.end())
missingB.push_back(*catA_i++);
while (catB_i != catB.end())
missingA.push_back(*catB_i++);
if (not(missingA.empty() and missingB.empty()))
{
if (cif::VERBOSE > 1)
{
std::cerr << "compare of datablocks failed" << std::endl;
if (not missingA.empty())
std::cerr << "Categories missing in A: " << cif::join(missingA, ", ") << std::endl
<< std::endl;
if (not missingB.empty())
std::cerr << "Categories missing in B: " << cif::join(missingB, ", ") << std::endl
<< std::endl;
result = false;
}
else
return false;
}
// Second loop, now compare category values
catA_i = catA.begin(), catB_i = catB.begin();
while (catA_i != catA.end() and catB_i != catB.end())
{
std::string nA = *catA_i;
toLower(nA);
std::string nB = *catB_i;
toLower(nB);
int d = nA.compare(nB);
if (d > 0)
++catB_i;
else if (d < 0)
++catA_i;
else
{
if (not(*dbA.get(*catA_i) == *dbB.get(*catB_i)))
{
if (cif::VERBOSE > 1)
{
std::cerr << "Compare of datablocks failed due to unequal values in category " << *catA_i << std::endl;
result = false;
}
else
return false;
}
++catA_i;
++catB_i;
}
}
return result;
}
std::ostream &operator<<(std::ostream &os, const Datablock &data)
{
// whoohoo... this sucks!
const_cast<Datablock &>(data).write(os);
return os;
}
// --------------------------------------------------------------------
//
namespace detail
{
void KeyCompareConditionImpl::prepare(const Category &c)
{
mItemIx = c.getColumnIndex(mItemTag);
auto cv = c.getCatValidator();
if (cv)
{
auto iv = cv->getValidatorForItem(mItemTag);
if (iv != nullptr and iv->mType != nullptr)
{
auto type = iv->mType;
mCaseInsensitive = type->mPrimitiveType == DDL_PrimitiveType::UChar;
}
}
}
void KeyIsEmptyConditionImpl::prepare(const Category &c)
{
mItemIx = c.getColumnIndex(mItemTag);
}
void KeyMatchesConditionImpl::prepare(const Category &c)
{
mItemIx = c.getColumnIndex(mItemTag);
}
} // namespace detail
// --------------------------------------------------------------------
//
// class to compare two rows based on their keys.
class RowComparator
{
public:
RowComparator(Category *cat)
: RowComparator(cat, cat->getCatValidator()->mKeys.begin(), cat->getCatValidator()->mKeys.end())
{
}
template <typename KeyIter>
RowComparator(Category *cat, KeyIter b, KeyIter e);
int operator()(const ItemRow *a, const ItemRow *b) const;
int operator()(const Row &a, const Row &b) const
{
return operator()(a.mData, b.mData);
}
private:
typedef std::function<int(const char *, const char *)> compareFunc;
typedef std::tuple<size_t, compareFunc> keyComp;
std::vector<keyComp> mComp;
};
template <typename KeyIter>
RowComparator::RowComparator(Category *cat, KeyIter b, KeyIter e)
{
auto cv = cat->getCatValidator();
for (auto ki = b; ki != e; ++ki)
{
std::string k = *ki;
size_t ix = cat->getColumnIndex(k);
auto iv = cv->getValidatorForItem(k);
if (iv == nullptr)
throw std::runtime_error("Incomplete dictionary, no Item Validator for Key " + k);
auto tv = iv->mType;
if (tv == nullptr)
throw std::runtime_error("Incomplete dictionary, no type Validator for Item " + k);
using namespace std::placeholders;
mComp.emplace_back(ix, std::bind(&ValidateType::compare, tv, _1, _2));
}
}
int RowComparator::operator()(const ItemRow *a, const ItemRow *b) const
{
assert(a);
assert(b);
int d = 0;
for (auto &c : mComp)
{
size_t k;
compareFunc f;
std::tie(k, f) = c;
const char *ka = a->c_str(k);
const char *kb = b->c_str(k);
d = f(ka, kb);
if (d != 0)
break;
}
return d;
}
// --------------------------------------------------------------------
//
// class to keep an index on the keys of a Category. This is a red/black
// tree implementation.
class CatIndex
{
public:
CatIndex(Category *cat);
~CatIndex();
ItemRow *find(ItemRow *k) const;
void insert(ItemRow *r);
void erase(ItemRow *r);
// batch create
void reconstruct();
// reorder the ItemRow's and returns new head and tail
std::tuple<ItemRow *, ItemRow *> reorder()
{
std::tuple<ItemRow *, ItemRow *> result = std::make_tuple(nullptr, nullptr);
if (mRoot != nullptr)
{
entry *head = findMin(mRoot);
entry *tail = reorder(mRoot);
tail->mRow->mNext = nullptr;
result = std::make_tuple(head->mRow, tail->mRow);
}
return result;
}
size_t size() const;
// bool isValid() const;
private:
struct entry
{
entry(ItemRow *r)
: mRow(r)
, mLeft(nullptr)
, mRight(nullptr)
, mRed(true)
{
}
~entry()
{
delete mLeft;
delete mRight;
}
ItemRow *mRow;
entry *mLeft;
entry *mRight;
bool mRed;
};
entry *insert(entry *h, ItemRow *v);
entry *erase(entry *h, ItemRow *k);
// void validate(entry* h, bool isParentRed, uint32_t blackDepth, uint32_t& minBlack, uint32_t& maxBlack) const;
entry *rotateLeft(entry *h)
{
entry *x = h->mRight;
h->mRight = x->mLeft;
x->mLeft = h;
x->mRed = h->mRed;
h->mRed = true;
return x;
}
entry *rotateRight(entry *h)
{
entry *x = h->mLeft;
h->mLeft = x->mRight;
x->mRight = h;
x->mRed = h->mRed;
h->mRed = true;
return x;
}
void flipColour(entry *h)
{
h->mRed = not h->mRed;
if (h->mLeft != nullptr)
h->mLeft->mRed = not h->mLeft->mRed;
if (h->mRight != nullptr)
h->mRight->mRed = not h->mRight->mRed;
}
bool isRed(entry *h) const
{
return h != nullptr and h->mRed;
}
entry *moveRedLeft(entry *h)
{
flipColour(h);
if (h->mRight != nullptr and isRed(h->mRight->mLeft))
{
h->mRight = rotateRight(h->mRight);
h = rotateLeft(h);
flipColour(h);
}
return h;
}
entry *moveRedRight(entry *h)
{
flipColour(h);
if (h->mLeft != nullptr and isRed(h->mLeft->mLeft))
{
h = rotateRight(h);
flipColour(h);
}
return h;
}
entry *fixUp(entry *h)
{
if (isRed(h->mRight))
h = rotateLeft(h);
if (isRed(h->mLeft) and isRed(h->mLeft->mLeft))
h = rotateRight(h);
if (isRed(h->mLeft) and isRed(h->mRight))
flipColour(h);
return h;
}
entry *findMin(entry *h)
{
while (h->mLeft != nullptr)
h = h->mLeft;
return h;
}
entry *eraseMin(entry *h)
{
if (h->mLeft == nullptr)
{
delete h;
h = nullptr;
}
else
{
if (not isRed(h->mLeft) and not isRed(h->mLeft->mLeft))
h = moveRedLeft(h);
h->mLeft = eraseMin(h->mLeft);
h = fixUp(h);
}
return h;
}
// Fix mNext fields for rows in order of this index
entry *reorder(entry *e)
{
auto result = e;
if (e->mLeft != nullptr)
{
auto l = reorder(e->mLeft);
l->mRow->mNext = e->mRow;
}
if (e->mRight != nullptr)
{
auto mr = findMin(e->mRight);
e->mRow->mNext = mr->mRow;
result = reorder(e->mRight);
}
return result;
}
Category &mCat;
RowComparator mComp;
entry *mRoot;
};
CatIndex::CatIndex(Category *cat)
: mCat(*cat)
, mComp(cat)
, mRoot(nullptr)
{
}
CatIndex::~CatIndex()
{
delete mRoot;
}
ItemRow *CatIndex::find(ItemRow *k) const
{
const entry *r = mRoot;
while (r != nullptr)
{
int d = mComp(k, r->mRow);
if (d < 0)
r = r->mLeft;
else if (d > 0)
r = r->mRight;
else
break;
}
return r ? r->mRow : nullptr;
}
void CatIndex::insert(ItemRow *k)
{
mRoot = insert(mRoot, k);
mRoot->mRed = false;
}
CatIndex::entry *CatIndex::insert(entry *h, ItemRow *v)
{
if (h == nullptr)
return new entry(v);
int d = mComp(v, h->mRow);
if (d < 0)
h->mLeft = insert(h->mLeft, v);
else if (d > 0)
h->mRight = insert(h->mRight, v);
else
throw std::runtime_error("Duplicate Key violation, cat: " + mCat.name() + " values: " + v->str());
if (isRed(h->mRight) and not isRed(h->mLeft))
h = rotateLeft(h);
if (isRed(h->mLeft) and isRed(h->mLeft->mLeft))
h = rotateRight(h);
if (isRed(h->mLeft) and isRed(h->mRight))
flipColour(h);
return h;
}
void CatIndex::erase(ItemRow *k)
{
mRoot = erase(mRoot, k);
if (mRoot != nullptr)
mRoot->mRed = false;
}
CatIndex::entry *CatIndex::erase(entry *h, ItemRow *k)
{
if (mComp(k, h->mRow) < 0)
{
if (h->mLeft != nullptr)
{
if (not isRed(h->mLeft) and not isRed(h->mLeft->mLeft))
h = moveRedLeft(h);
h->mLeft = erase(h->mLeft, k);
}
}
else
{
if (isRed(h->mLeft))
h = rotateRight(h);
if (mComp(k, h->mRow) == 0 and h->mRight == nullptr)
{
delete h;
return nullptr;
}
if (h->mRight != nullptr)
{
if (not isRed(h->mRight) and not isRed(h->mRight->mLeft))
h = moveRedRight(h);
if (mComp(k, h->mRow) == 0)
{
h->mRow = findMin(h->mRight)->mRow;
h->mRight = eraseMin(h->mRight);
}
else
h->mRight = erase(h->mRight, k);
}
}
return fixUp(h);
}
void CatIndex::reconstruct()
{
delete mRoot;
mRoot = nullptr;
for (auto r : mCat)
insert(r.mData);
// maybe reconstruction can be done quicker by using the following commented code.
// however, I've not had the time to think of a way to set the red/black flag correctly in that case.
// std::vector<ItemRow*> rows;
// transform(mCat.begin(), mCat.end(), backInserter(rows),
// [](Row r) -> ItemRow* { assert(r.mData); return r.mData; });
//
// assert(std::find(rows.begin(), rows.end(), nullptr) == rows.end());
//
// // don't use sort here, it will run out of the stack of something.
// // quicksort is notorious for using excessive recursion.
// // Besides, most of the time, the data is ordered already anyway.
//
// stable_sort(rows.begin(), rows.end(), [this](ItemRow* a, ItemRow* b) -> bool { return this->mComp(a, b) < 0; });
//
// for (size_t i = 0; i < rows.size() - 1; ++i)
// assert(mComp(rows[i], rows[i + 1]) < 0);
//
// deque<entry*> e;
// transform(rows.begin(), rows.end(), back_inserter(e),
// [](ItemRow* r) -> entry* { return new entry(r); });
//
// while (e.size() > 1)
// {
// deque<entry*> ne;
//
// while (not e.empty())
// {
// entry* a = e.front();
// e.pop_front();
//
// if (e.empty())
// ne.push_back(a);
// else
// {
// entry* b = e.front();
// b->mLeft = a;
//
// assert(mComp(a->mRow, b->mRow) < 0);
//
// e.pop_front();
//
// if (not e.empty())
// {
// entry* c = e.front();
// e.pop_front();
//
// assert(mComp(b->mRow, c->mRow) < 0);
//
// b->mRight = c;
// }
//
// ne.push_back(b);
//
// if (not e.empty())
// {
// ne.push_back(e.front());
// e.pop_front();
// }
// }
// }
//
// swap (e, ne);
// }
//
// assert(e.size() == 1);
// mRoot = e.front();
}
size_t CatIndex::size() const
{
std::stack<entry *> s;
s.push(mRoot);
size_t result = 0;
while (not s.empty())
{
entry *e = s.top();
s.pop();
if (e == nullptr)
continue;
++result;
s.push(e->mLeft);
s.push(e->mRight);
}
return result;
}
// --------------------------------------------------------------------
RowSet::RowSet(Category &cat)
: mCat(&cat)
{
}
RowSet::RowSet(Category &cat, Condition &&cond)
: mCat(&cat)
{
cond.prepare(cat);
for (auto r : cat)
{
if (cond(cat, r))
mItems.push_back(r.mData);
}
}
RowSet::RowSet(const RowSet &rhs)
: mCat(rhs.mCat)
, mItems(rhs.mItems)
{
}
RowSet::RowSet(RowSet &&rhs)
: mCat(rhs.mCat)
, mItems(std::move(rhs.mItems))
{
}
RowSet::~RowSet()
{
}
RowSet &RowSet::operator=(const RowSet &rhs)
{
if (this != &rhs)
{
mItems = rhs.mItems;
mCat = rhs.mCat;
}
return *this;
}
RowSet &RowSet::operator=(RowSet &&rhs)
{
if (this != &rhs)
{
std::swap(mItems, rhs.mItems);
mCat = rhs.mCat;
}
return *this;
}
RowSet &RowSet::orderBy(std::initializer_list<std::string> items)
{
RowComparator c(mCat, items.begin(), items.end());
stable_sort(mItems.begin(), mItems.end(), c);
return *this;
}
// --------------------------------------------------------------------
Category::Category(Datablock &db, const std::string_view name, const Validator *Validator)
: mDb(db)
, mName(name)
, mValidator(Validator)
, mHead(nullptr)
, mTail(nullptr)
, mIndex(nullptr)
{
if (mName.empty())
throw ValidationError("invalid empty name for Category");
if (mValidator != nullptr)
{
mCatValidator = mValidator->getValidatorForCategory(mName);
if (mCatValidator != nullptr)
{
// make sure all required columns are added
for (auto &k : mCatValidator->mKeys)
addColumn(k);
for (auto &k : mCatValidator->mMandatoryFields)
addColumn(k);
mIndex = new CatIndex(this);
}
}
}
Category::~Category()
{
delete mHead;
delete mIndex;
}
void Category::setValidator(const Validator *v)
{
mValidator = v;
if (mIndex != nullptr)
{
delete mIndex;
mIndex = nullptr;
}
if (mValidator != nullptr)
{
mCatValidator = mValidator->getValidatorForCategory(mName);
if (mCatValidator != nullptr)
{
mIndex = new CatIndex(this);
mIndex->reconstruct();
//#if DEBUG
// assert(mIndex->size() == size());
// mIndex->validate();
//#endif
}
}
else
mCatValidator = nullptr;
updateLinks();
}
void Category::updateLinks()
{
mChildLinks.clear();
mParentLinks.clear();
if (mValidator != nullptr)
{
for (auto link : mValidator->getLinksForParent(mName))
{
auto childCat = mDb.get(link->mChildCategory);
if (childCat == nullptr)
continue;
mChildLinks.push_back({childCat, link});
}
for (auto link : mValidator->getLinksForChild(mName))
{
auto parentCat = mDb.get(link->mParentCategory);
if (parentCat == nullptr)
continue;
mParentLinks.push_back({parentCat, link});
}
}
}
bool Category::hasColumn(std::string_view name) const
{
return getColumnIndex(name) < mColumns.size();
}
size_t Category::getColumnIndex(std::string_view name) const
{
size_t result;
for (result = 0; result < mColumns.size(); ++result)
{
if (iequals(name, mColumns[result].mName))
break;
}
if (VERBOSE > 0 and result == mColumns.size() and mCatValidator != nullptr) // validate the name, if it is known at all (since it was not found)
{
auto iv = mCatValidator->getValidatorForItem(name);
if (iv == nullptr)
std::cerr << "Invalid name used '" << name << "' is not a known column in " + mName << std::endl;
}
return result;
}
const std::string &Category::getColumnName(size_t columnIx) const
{
return mColumns.at(columnIx).mName;
}
std::vector<std::string> Category::getColumnNames() const
{
std::vector<std::string> result;
for (auto &c : mColumns)
result.push_back(c.mName);
return result;
}
size_t Category::addColumn(std::string_view name)
{
using namespace std::literals;
size_t result = getColumnIndex(name);
if (result == mColumns.size())
{
const ValidateItem *itemValidator = nullptr;
if (mCatValidator != nullptr)
{
itemValidator = mCatValidator->getValidatorForItem(name);
if (itemValidator == nullptr)
mValidator->reportError("tag " + std::string(name) + " not allowed in Category " + mName, false);
}
mColumns.push_back(ItemColumn{std::string(name), itemValidator});
}
return result;
}
void Category::reorderByIndex()
{
if (mIndex != nullptr)
std::tie(mHead, mTail) = mIndex->reorder();
}
void Category::sort(std::function<int(const Row &, const Row &)> comparator)
{
if (mHead == nullptr)
return;
std::vector<ItemRow *> rows;
for (auto itemRow = mHead; itemRow != nullptr; itemRow = itemRow->mNext)
rows.push_back(itemRow);
std::stable_sort(rows.begin(), rows.end(),
[&comparator](ItemRow *ia, ItemRow *ib)
{
Row ra(ia);
Row rb(ib);
return comparator(ra, rb) < 0;
});
mHead = rows.front();
mTail = rows.back();
auto r = mHead;
for (size_t i = 1; i < rows.size(); ++i)
r = r->mNext = rows[i];
r->mNext = nullptr;
assert(r == mTail);
assert(size() == rows.size());
}
std::string Category::getUniqueID(std::function<std::string(int)> generator)
{
using namespace cif::literals;
std::string key = "id";
if (mCatValidator != nullptr and mCatValidator->mKeys.size() == 1)
key = mCatValidator->mKeys.front();
// calling size() often is a waste of resources
if (mLastUniqueNr == 0)
mLastUniqueNr = size();
for (;;)
{
std::string result = generator(static_cast<int>(mLastUniqueNr++));
if (exists(Key(key) == result))
continue;
return result;
}
}
size_t Category::size() const
{
size_t result = 0;
for (auto pi = mHead; pi != nullptr; pi = pi->mNext)
++result;
return result;
}
bool Category::empty() const
{
return mHead == nullptr or mHead->mValues == nullptr;
}
void Category::drop(const std::string &field)
{
using namespace std::placeholders;
auto ci = find_if(mColumns.begin(), mColumns.end(),
[field](ItemColumn &c) -> bool
{ return iequals(c.mName, field); });
if (ci != mColumns.end())
{
size_t columnIx = ci - mColumns.begin();
for (auto pi = mHead; pi != nullptr; pi = pi->mNext)
pi->drop(columnIx);
mColumns.erase(ci);
}
}
const Row Category::operator[](Condition &&cond) const
{
return const_cast<Category*>(this)->operator[](std::forward<Condition>(cond));
}
Row Category::operator[](Condition &&cond)
{
Row result;
cond.prepare(*this);
for (auto r : *this)
{
if (cond(*this, r))
{
result = r;
break;
}
}
return result;
}
bool Category::exists(Condition &&cond) const
{
bool result = false;
cond.prepare(*this);
for (auto r : *this)
{
if (cond(*this, r))
{
result = true;
break;
}
}
return result;
}
RowSet Category::orderBy(std::initializer_list<std::string> items)
{
RowSet result(*this);
result.insert(result.begin(), begin(), end());
return result.orderBy(items);
}
void Category::clear()
{
delete mHead;
mHead = mTail = nullptr;
if (mIndex != nullptr)
{
delete mIndex;
mIndex = new CatIndex(this);
}
}
template <class Iter>
std::tuple<Row, bool> Category::emplace(Iter b, Iter e)
{
// First, make sure all mandatory fields are supplied
Row result;
bool isNew = true;
if (mCatValidator != nullptr and b != e)
{
for (auto &col : mColumns)
{
auto iv = mCatValidator->getValidatorForItem(col.mName);
if (iv == nullptr)
continue;
bool seen = false;
for (auto v = b; v != e; ++v)
{
if (iequals(v->name(), col.mName))
{
seen = true;
break;
}
}
if (not seen and iv->mMandatory)
throw std::runtime_error("missing mandatory field " + col.mName + " for Category " + mName);
}
if (mIndex != nullptr)
{
std::unique_ptr<ItemRow> nr(new ItemRow{nullptr, this, nullptr});
Row r(nr.get());
auto keys = keyFields();
for (auto v = b; v != e; ++v)
{
if (keys.count(v->name()))
r.assign(v->name(), v->value(), true);
}
auto test = mIndex->find(nr.get());
if (test != nullptr)
{
if (VERBOSE > 1)
std::cerr << "Not inserting new record in " << mName << " (duplicate Key)" << std::endl;
result = test;
isNew = false;
}
}
}
if (isNew)
{
auto nr = new ItemRow{nullptr, this, nullptr};
Row r(nr);
for (auto v = b; v != e; ++v)
r.assign(*v, true);
// if (isOrphan(r))
// throw std::runtime_error("Cannot insert row in category " + mName + " since it would be an orphan");
if (mHead == nullptr)
{
assert(mTail == nullptr);
mHead = mTail = nr;
}
else
{
assert(mTail != nullptr);
assert(mHead != nullptr);
mTail->mNext = nr;
mTail = nr;
}
result = r;
if (mIndex != nullptr)
mIndex->insert(nr);
}
return {result, isNew};
}
std::tuple<Row, bool> Category::emplace(Row r)
{
return emplace(r.begin(), r.end());
}
size_t Category::erase(Condition &&cond)
{
size_t result = 0;
cond.prepare(*this);
auto ri = begin();
while (ri != end())
{
if (cond(*this, *ri))
{
ri = erase(ri);
++result;
}
else
++ri;
}
return result;
}
size_t Category::erase(Condition &&cond, std::function<void(const Row &)> &&verbose)
{
size_t result = 0;
cond.prepare(*this);
auto ri = begin();
while (ri != end())
{
if (cond(*this, *ri))
{
verbose(*ri);
ri = erase(ri);
++result;
}
else
++ri;
}
return result;
}
void Category::eraseOrphans(Condition &&cond)
{
std::vector<ItemRow *> remove;
cond.prepare(*this);
for (auto r : *this)
{
if (cond(*this, r) and isOrphan(r))
{
if (VERBOSE > 1)
std::cerr << "Removing orphaned record: " << std::endl
<< r << std::endl
<< std::endl;
remove.push_back(r.mData);
}
}
for (auto r : remove)
erase(iterator(r));
}
void Category::erase(Row r)
{
erase(iterator(r.mData));
}
auto Category::erase(iterator pos) -> iterator
{
auto r = *pos;
iterator result = ++pos;
iset keys;
if (mCatValidator)
keys = iset(mCatValidator->mKeys.begin(), mCatValidator->mKeys.end());
if (mHead == nullptr)
throw std::runtime_error("erase");
if (mIndex != nullptr)
mIndex->erase(r.mData);
if (r == mHead)
{
mHead = mHead->mNext;
r.mData->mNext = nullptr;
}
else
{
for (auto pi = mHead; pi != nullptr; pi = pi->mNext)
{
if (pi->mNext == r.mData)
{
pi->mNext = r.mData->mNext;
r.mData->mNext = nullptr;
break;
}
}
}
// links are created based on the _pdbx_item_linked_group_list entries
// in mmcif_pdbx.dic dictionary.
//
// For each link group in _pdbx_item_linked_group_list
// a std::set of keys from one category is mapped to another.
// If all values in a child are the same as the specified parent ones
// the child is removed as well, recursively of course.
if (mValidator != nullptr)
{
for (auto &&[childCat, link] : mChildLinks)
{
Condition cond;
for (size_t ix = 0; ix < link->mParentKeys.size(); ++ix)
{
const char *value = r[link->mParentKeys[ix]].c_str();
cond = std::move(cond) && (Key(link->mChildKeys[ix]) == value);
}
childCat->eraseOrphans(std::move(cond));
}
}
delete r.mData;
// reset mTail, if needed
if (r == mTail)
{
mTail = mHead;
if (mTail != nullptr)
while (mTail->mNext != nullptr)
mTail = mTail->mNext;
}
return result;
}
Row Category::copyRow(const Row &row)
{
// copy the values
std::vector<Item> items;
std::copy(row.begin(), row.end(), std::back_inserter(items));
if (mCatValidator and mCatValidator->mKeys.size() == 1)
{
auto key = mCatValidator->mKeys.front();
auto kv = mCatValidator->getValidatorForItem(key);
for (auto &item : items)
{
if (item.name() != key)
continue;
if (kv->mType->mPrimitiveType == DDL_PrimitiveType::Numb)
item.value(getUniqueID(""));
else
item.value(getUniqueID(mName + "_id_"));
break;
}
}
auto &&[result, inserted] = emplace(items.begin(), items.end());
// assert(inserted);
return result;
}
void Category::getTagOrder(std::vector<std::string> &tags) const
{
for (auto &c : mColumns)
tags.push_back("_" + mName + "." + c.mName);
}
Category::iterator Category::begin()
{
return iterator(mHead);
}
Category::iterator Category::end()
{
return iterator();
}
Category::const_iterator Category::cbegin() const
{
return const_iterator(mHead);
}
Category::const_iterator Category::cend() const
{
return const_iterator();
}
Category::const_iterator Category::begin() const
{
return const_iterator(mHead);
}
Category::const_iterator Category::end() const
{
return const_iterator();
}
bool Category::hasParent(Row r, const Category &parentCat, const ValidateLink &link) const
{
assert(mValidator != nullptr);
assert(mCatValidator != nullptr);
bool result = true;
Condition cond;
for (size_t ix = 0; ix < link.mChildKeys.size(); ++ix)
{
auto &name = link.mChildKeys[ix];
auto field = r[name];
if (field.empty())
{
if (mCatValidator->mMandatoryFields.count(name) and field.is_null())
cond = std::move(cond) and (Key(link.mParentKeys[ix]) == Empty());
}
else if (parentCat.mCatValidator->mMandatoryFields.count(link.mParentKeys[ix]))
{
const char *value = field.c_str();
cond = std::move(cond) and (Key(link.mParentKeys[ix]) == value);
}
else
{
const char *value = field.c_str();
cond = std::move(cond) and (Key(link.mParentKeys[ix]) == value or Key(link.mParentKeys[ix]) == Empty());
}
}
if (result and not cond.empty())
{
result = parentCat.exists(std::move(cond));
// if (VERBOSE > 3 or (result == false and VERBOSE > 2))
// std::cerr << "result = " << std::boolalpha << result << " for: '" << cond << "' in parent category " << link.mParentCategory << " for child cat " << mName << std::endl;
}
// else if (VERBOSE > 3 and cond.empty())
// std::cerr << "Condition is empty due to missing data in parent category " << link.mParentCategory << " for child cat " << mName << std::endl;
return result;
}
bool Category::isOrphan(Row r)
{
// be safe
if (mCatValidator == nullptr)
return false;
bool isOrphan = true;
for (auto &&[parentCat, link] : mParentLinks)
{
Condition cond;
for (size_t ix = 0; ix < link->mChildKeys.size(); ++ix)
{
const char *value = r[link->mChildKeys[ix]].c_str();
cond = std::move(cond) && (Key(link->mParentKeys[ix]) == value);
}
// if (VERBOSE > 2)
// std::cerr << "Check condition '" << cond << "' in parent category " << link->mParentCategory << " for child cat " << mName << std::endl;
if (parentCat->exists(std::move(cond)))
{
if (VERBOSE > 2)
std::cerr << "Not removing because row has a parent in category " << link->mParentCategory << std::endl;
isOrphan = false;
break;
}
}
return isOrphan;
}
bool Category::hasChildren(Row r) const
{
assert(mValidator != nullptr);
assert(mCatValidator != nullptr);
bool result = false;
for (auto &&[childCat, link] : mChildLinks)
{
Condition cond;
for (size_t ix = 0; ix < link->mParentKeys.size(); ++ix)
{
const char *value = r[link->mParentKeys[ix]].c_str();
cond = std::move(cond) && (Key(link->mChildKeys[ix]) == value);
}
result = not childCat->find(std::move(cond)).empty();
if (result)
break;
}
return result;
}
bool Category::hasParents(Row r) const
{
assert(mValidator != nullptr);
assert(mCatValidator != nullptr);
bool result = false;
for (auto &&[parentCat, link] : mParentLinks)
{
Condition cond;
for (size_t ix = 0; ix < link->mChildKeys.size(); ++ix)
{
const char *value = r[link->mChildKeys[ix]].c_str();
cond = std::move(cond) && (Key(link->mParentKeys[ix]) == value);
}
result = not parentCat->find(std::move(cond)).empty();
if (result)
break;
}
return result;
}
RowSet Category::getChildren(Row r, const char *childCat)
{
return getChildren(r, mDb[childCat]);
}
RowSet Category::getChildren(Row r, Category &childCat)
{
assert(mValidator != nullptr);
assert(mCatValidator != nullptr);
RowSet result(childCat);
for (auto &link : mValidator->getLinksForParent(mName))
{
if (link->mChildCategory != childCat.mName)
continue;
Condition cond;
for (size_t ix = 0; ix < link->mParentKeys.size(); ++ix)
{
const char *value = r[link->mParentKeys[ix]].c_str();
cond = std::move(cond) && (Key(link->mChildKeys[ix]) == value);
}
auto children = childCat.find(std::move(cond));
result.insert(result.end(), children.begin(), children.end());
}
// remove duplicates
result.make_unique();
return result;
}
RowSet Category::getParents(Row r, const char *parentCat)
{
return getParents(r, mDb[parentCat]);
}
RowSet Category::getParents(Row r, Category &parentCat)
{
assert(mValidator != nullptr);
assert(mCatValidator != nullptr);
RowSet result(parentCat);
for (auto &link : mValidator->getLinksForChild(mName))
{
if (link->mParentCategory != parentCat.mName)
continue;
Condition cond;
for (size_t ix = 0; ix < link->mChildKeys.size(); ++ix)
{
const char *value = r[link->mChildKeys[ix]].c_str();
cond = std::move(cond) && (Key(link->mParentKeys[ix]) == value);
}
auto parents = parentCat.find(std::move(cond));
result.insert(result.end(), parents.begin(), parents.end());
}
// remove duplicates
result.make_unique();
return result;
}
RowSet Category::getLinked(Row r, const char *cat)
{
return getLinked(r, mDb[cat]);
}
RowSet Category::getLinked(Row r, Category &cat)
{
RowSet result = getChildren(r, cat);
if (result.empty())
result = getParents(r, cat);
return result;
}
bool Category::isValid()
{
bool result = true;
if (mValidator == nullptr)
throw std::runtime_error("no Validator specified");
if (empty())
{
if (VERBOSE > 2)
std::cerr << "Skipping validation of empty Category " << mName << std::endl;
return true;
}
if (mCatValidator == nullptr)
{
mValidator->reportError("undefined Category " + mName, false);
return false;
}
auto mandatory = mCatValidator->mMandatoryFields;
for (auto &col : mColumns)
{
auto iv = mCatValidator->getValidatorForItem(col.mName);
if (iv == nullptr)
{
mValidator->reportError("Field " + col.mName + " is not valid in Category " + mName, false);
result = false;
}
col.mValidator = iv;
mandatory.erase(col.mName);
}
if (not mandatory.empty())
{
mValidator->reportError("In Category " + mName + " the following mandatory fields are missing: " + cif::join(mandatory, ", "), false);
result = false;
}
//#if not defined(NDEBUG)
// // check index?
// if (mIndex)
// {
// mIndex->validate();
// for (auto r: *this)
// {
// if (mIndex->find(r.mData) != r.mData)
// mValidator->reportError("Key not found in index for Category " + mName);
// }
// }
//#endif
// validate all values
mandatory = mCatValidator->mMandatoryFields;
for (auto ri = mHead; ri != nullptr; ri = ri->mNext)
{
for (size_t cix = 0; cix < mColumns.size(); ++cix)
{
bool seen = false;
auto iv = mColumns[cix].mValidator;
if (iv == nullptr)
{
mValidator->reportError("invalid field " + mColumns[cix].mName + " for Category " + mName, false);
result = false;
continue;
}
for (auto vi = ri->mValues; vi != nullptr; vi = vi->mNext)
{
if (vi->mColumnIndex == cix)
{
seen = true;
try
{
(*iv)(vi->mText);
}
catch (const std::exception &e)
{
mValidator->reportError("Error validating " + mColumns[cix].mName + ": " + e.what(), false);
continue;
}
}
}
if (seen or ri != mHead)
continue;
if (iv != nullptr and iv->mMandatory)
{
mValidator->reportError("missing mandatory field " + mColumns[cix].mName + " for Category " + mName, false);
result = false;
}
}
}
return result;
}
void Category::validateLinks() const
{
for (auto &&[parentCat, link] : mParentLinks)
{
size_t missing = 0;
for (auto r : *this)
if (not hasParent(r, *parentCat, *link))
{
if (cif::VERBOSE > 1)
{
if (missing == 0)
{
std::cerr << "Links for " << link->mLinkGroupLabel << " are incomplete" << std::endl
<< " These are the items in " << mName << " that don't have matching parent items in " << parentCat->mName << std::endl
<< std::endl;
}
for (auto k : link->mChildKeys)
std::cerr << k << ": " << r[k].as<std::string>() << std::endl;
std::cerr << std::endl;
}
++missing;
}
if (missing and VERBOSE == 1)
{
std::cerr << "Links for " << link->mLinkGroupLabel << " are incomplete" << std::endl
<< " There are " << missing << " items in " << mName << " that don't have matching parent items in " << parentCat->mName << std::endl;
}
}
}
const Validator &Category::getValidator() const
{
if (mValidator == nullptr)
throw std::runtime_error("no Validator defined yet");
return *mValidator;
}
iset Category::fields() const
{
if (mValidator == nullptr)
throw std::runtime_error("No Validator specified");
if (mCatValidator == nullptr)
mValidator->reportError("undefined Category", true);
iset result;
for (auto &iv : mCatValidator->mItemValidators)
result.insert(iv.mTag);
return result;
}
iset Category::mandatoryFields() const
{
if (mValidator == nullptr)
throw std::runtime_error("No Validator specified");
if (mCatValidator == nullptr)
mValidator->reportError("undefined Category", true);
return mCatValidator->mMandatoryFields;
}
iset Category::keyFields() const
{
if (mValidator == nullptr)
throw std::runtime_error("No Validator specified");
if (mCatValidator == nullptr)
mValidator->reportError("undefined Category", true);
return iset{mCatValidator->mKeys.begin(), mCatValidator->mKeys.end()};
}
std::set<size_t> Category::keyFieldsByIndex() const
{
if (mValidator == nullptr)
throw std::runtime_error("No Validator specified");
if (mCatValidator == nullptr)
mValidator->reportError("undefined Category", true);
std::set<size_t> result;
for (auto &k : mCatValidator->mKeys)
result.insert(getColumnIndex(k));
return result;
}
bool operator==(const Category &a, const Category &b)
{
using namespace std::placeholders;
bool result = true;
// set<std::string> tagsA(a.fields()), tagsB(b.fields());
//
// if (tagsA != tagsB)
// std::cout << "Unequal number of fields" << std::endl;
auto &validator = a.getValidator();
auto catValidator = validator.getValidatorForCategory(a.name());
if (catValidator == nullptr)
throw std::runtime_error("missing cat validator");
typedef std::function<int(const char *, const char *)> compType;
std::vector<std::tuple<std::string, compType>> tags;
auto keys = catValidator->mKeys;
std::vector<size_t> keyIx;
for (auto &tag : a.fields())
{
auto iv = catValidator->getValidatorForItem(tag);
if (iv == nullptr)
throw std::runtime_error("missing item validator");
auto tv = iv->mType;
if (tv == nullptr)
throw std::runtime_error("missing type validator");
tags.push_back(std::make_tuple(tag, std::bind(&cif::ValidateType::compare, tv, std::placeholders::_1, std::placeholders::_2)));
auto pred = [tag](const std::string &s) -> bool
{ return cif::iequals(tag, s) == 0; };
if (find_if(keys.begin(), keys.end(), pred) == keys.end())
keyIx.push_back(tags.size() - 1);
}
// a.reorderByIndex();
// b.reorderByIndex();
auto rowEqual = [&](const cif::Row &ra, const cif::Row &rb)
{
int d = 0;
for (auto kix : keyIx)
{
std::string tag;
compType compare;
std::tie(tag, compare) = tags[kix];
d = compare(ra[tag].c_str(), rb[tag].c_str());
if (d != 0)
{
if (cif::VERBOSE > 1)
std::cerr << "Values in _" << a.name() << '.' << tag << " are not equal: '" << ra[tag].c_str() << "' != '" << rb[tag].c_str() << '\'' << std::endl;
break;
}
}
return d == 0;
};
auto ai = a.begin(), bi = b.begin();
while (ai != a.end() or bi != b.end())
{
if (ai == a.end() or bi == b.end())
{
if (cif::VERBOSE > 1)
{
std::cerr << "Unequal number of rows in " << a.name() << std::endl;
result = false;
break;
}
else
return false;
}
cif::Row ra = *ai, rb = *bi;
if (not rowEqual(ra, rb))
{
if (cif::VERBOSE > 1)
result = false;
else
return false;
}
std::vector<std::string> missingA, missingB, different;
for (auto &tt : tags)
{
std::string tag;
compType compare;
std::tie(tag, compare) = tt;
// make it an option to compare unapplicable to empty or something
const char *ta = ra[tag].c_str();
if (strcmp(ta, ".") == 0 or strcmp(ta, "?") == 0)
ta = "";
const char *tb = rb[tag].c_str();
if (strcmp(tb, ".") == 0 or strcmp(tb, "?") == 0)
tb = "";
if (compare(ta, tb) != 0)
{
if (cif::VERBOSE > 1)
{
std::cerr << "Values in _" << a.name() << '.' << tag << " are not equal: '" << ta << "' != '" << tb << '\'' << std::endl;
result = false;
}
else
return false;
}
}
++ai;
++bi;
}
return result;
}
namespace detail
{
size_t writeValue(std::ostream &os, std::string value, size_t offset, size_t width)
{
if (value.find('\n') != std::string::npos or width == 0 or value.length() > 132) // write as text field
{
cif::replace_all(value, "\n;", "\n\\;");
if (offset > 0)
os << std::endl;
os << ';' << value;
if (not cif::ends_with(value, "\n"))
os << std::endl;
os << ';' << std::endl;
offset = 0;
}
else if (isUnquotedString(value.c_str()))
{
os << value;
if (value.length() < width)
{
os << std::string(width - value.length(), ' ');
offset += width;
}
else
{
os << ' ';
offset += value.length() + 1;
}
}
else
{
bool done = false;
for (char q : {'\'', '"'})
{
auto p = value.find(q); // see if we can use the quote character
while (p != std::string::npos and isNonBlank(value[p + 1]) and value[p + 1] != q)
p = value.find(q, p + 1);
if (p != std::string::npos)
continue;
os << q << value << q;
if (value.length() + 2 < width)
{
os << std::string(width - value.length() - 2, ' ');
offset += width;
}
else
{
os << ' ';
offset += value.length() + 1;
}
done = true;
break;
}
if (not done)
{
if (offset > 0)
os << std::endl;
os << ';' << value << std::endl
<< ';' << std::endl;
offset = 0;
}
}
return offset;
}
} // namespace detail
void Category::write(std::ostream &os, const std::vector<size_t> &order, bool includeEmptyColumns)
{
if (empty())
return;
// If the first Row has a next, we need a loop_
bool needLoop = (mHead->mNext != nullptr);
if (needLoop)
{
os << "loop_" << std::endl;
std::vector<size_t> columnWidths;
for (auto cix : order)
{
auto &col = mColumns[cix];
os << '_' << mName << '.' << col.mName << ' ' << std::endl;
columnWidths.push_back(2);
}
for (auto Row = mHead; Row != nullptr; Row = Row->mNext)
{
for (auto v = Row->mValues; v != nullptr; v = v->mNext)
{
if (strchr(v->mText, '\n') == nullptr)
{
size_t l = strlen(v->mText);
if (not isUnquotedString(v->mText))
l += 2;
if (l > 132)
continue;
if (columnWidths[v->mColumnIndex] < l + 1)
columnWidths[v->mColumnIndex] = l + 1;
}
}
}
for (auto Row = mHead; Row != nullptr; Row = Row->mNext) // loop over rows
{
size_t offset = 0;
for (size_t cix : order)
{
size_t w = columnWidths[cix];
std::string s;
for (auto iv = Row->mValues; iv != nullptr; iv = iv->mNext)
{
if (iv->mColumnIndex == cix)
{
s = iv->mText;
break;
}
}
if (s.empty())
s = "?";
size_t l = s.length();
if (not isUnquotedString(s.c_str()))
l += 2;
if (l < w)
l = w;
if (offset + l > 132 and offset > 0)
{
os << std::endl;
offset = 0;
}
offset = detail::writeValue(os, s, offset, w);
if (offset > 132)
{
os << std::endl;
offset = 0;
}
}
if (offset > 0)
os << std::endl;
}
}
else
{
// first find the indent level
size_t l = 0;
for (auto &col : mColumns)
{
std::string tag = '_' + mName + '.' + col.mName;
if (l < tag.length())
l = tag.length();
}
l += 3;
for (size_t cix : order)
{
auto &col = mColumns[cix];
os << '_' << mName << '.' << col.mName << std::string(l - col.mName.length() - mName.length() - 2, ' ');
std::string s;
for (auto iv = mHead->mValues; iv != nullptr; iv = iv->mNext)
{
if (iv->mColumnIndex == cix)
{
s = iv->mText;
break;
}
}
if (s.empty())
s = "?";
size_t offset = l;
if (s.length() + l >= kMaxLineLength)
{
os << std::endl;
offset = 0;
}
if (detail::writeValue(os, s, offset, 1) != 0)
os << std::endl;
}
}
os << "# " << std::endl;
}
void Category::write(std::ostream &os)
{
std::vector<size_t> order(mColumns.size());
iota(order.begin(), order.end(), 0);
write(os, order, false);
}
void Category::write(std::ostream &os, const std::vector<std::string> &columns)
{
// make sure all columns are present
for (auto &c : columns)
addColumn(c);
std::vector<size_t> order;
order.reserve(mColumns.size());
for (auto &c : columns)
order.push_back(getColumnIndex(c));
for (size_t i = 0; i < mColumns.size(); ++i)
{
if (std::find(order.begin(), order.end(), i) == order.end())
order.push_back(i);
}
write(os, order, true);
}
// --------------------------------------------------------------------
void Category::update_value(RowSet &&rows, const std::string &tag, const std::string &value)
{
if (rows.empty())
return;
auto colIx = getColumnIndex(tag);
if (colIx >= mColumns.size())
throw std::runtime_error("Invalid column " + value + " for " + mName);
auto &col = mColumns[colIx];
// check the value
if (col.mValidator)
(*col.mValidator)(value);
// first some sanity checks, what was the old value and is it the same for all rows?
std::string oldValue = rows.front()[tag].c_str();
for (auto &row : rows)
{
if (oldValue != row[tag].c_str())
throw std::runtime_error("Inconsistent old values in update_value");
}
if (oldValue == value) // no need to do anything
return;
// update rows, but do not cascade
for (auto &row : rows)
row.assign(colIx, value, true);
// see if we need to update any child categories that depend on this value
for (auto parent : rows)
{
for (auto &&[childCat, linked] : mChildLinks)
{
if (std::find(linked->mParentKeys.begin(), linked->mParentKeys.end(), tag) == linked->mParentKeys.end())
continue;
Condition cond;
std::string childTag;
for (size_t ix = 0; ix < linked->mParentKeys.size(); ++ix)
{
std::string pk = linked->mParentKeys[ix];
std::string ck = linked->mChildKeys[ix];
// TODO add code to *NOT* test mandatory fields for Empty
if (pk == tag)
{
childTag = ck;
cond = std::move(cond) && Key(ck) == oldValue;
}
else
cond = std::move(cond) && Key(ck) == parent[pk].c_str();
}
auto children = RowSet{*childCat, std::move(cond)};
if (children.empty())
continue;
// now be careful. If we search back from child to parent and still find a valid parent row
// we cannot simply rename the child but will have to create a new child. Unless that new
// child already exists of course.
RowSet process(*childCat);
for (auto child : children)
{
Condition cond_c;
for (size_t ix = 0; ix < linked->mParentKeys.size(); ++ix)
{
std::string pk = linked->mParentKeys[ix];
std::string ck = linked->mChildKeys[ix];
// TODO add code to *NOT* test mandatory fields for Empty
cond_c = std::move(cond_c) && Key(pk) == child[ck].c_str();
}
auto parents = find(std::move(cond_c));
if (parents.empty())
{
process.push_back(child);
continue;
}
// oops, we need to split this child, unless a row already exists for the new value
Condition check;
for (size_t ix = 0; ix < linked->mParentKeys.size(); ++ix)
{
std::string pk = linked->mParentKeys[ix];
std::string ck = linked->mChildKeys[ix];
// TODO add code to *NOT* test mandatory fields for Empty
if (pk == tag)
check = std::move(check) && Key(ck) == value;
else
check = std::move(check) && Key(ck) == parent[pk].c_str();
}
if (childCat->exists(std::move(check))) // phew..., narrow escape
continue;
// create the actual copy, if we can...
if (childCat->mCatValidator != nullptr and childCat->mCatValidator->mKeys.size() == 1)
{
auto copy = childCat->copyRow(child);
if (copy != child)
{
process.push_back(child);
continue;
}
}
// cannot update this...
if (cif::VERBOSE > 0)
std::cerr << "Cannot update child " << childCat->mName << "." << childTag << " with value " << value << std::endl;
}
// finally, update the children
if (not process.empty())
childCat->update_value(std::move(process), childTag, value);
}
}
}
// --------------------------------------------------------------------
Row::Row(const Row &rhs)
: mData(rhs.mData)
, mCascade(rhs.mCascade)
{
}
Row::Row(Row &&rhs)
: mData(rhs.mData)
, mCascade(rhs.mCascade)
{
rhs.mData = nullptr;
}
Row::~Row()
{
}
void Row::next() const
{
if (mData != nullptr)
mData = mData->mNext;
}
Row &Row::operator=(Row &&rhs)
{
mData = rhs.mData;
rhs.mData = nullptr;
mCascade = rhs.mCascade;
return *this;
}
Row &Row::operator=(const Row &rhs)
{
mData = rhs.mData;
mCascade = rhs.mCascade;
return *this;
}
void Row::assign(const std::vector<Item> &values)
{
auto cat = mData->mCategory;
std::map<std::string, std::tuple<size_t, std::string, std::string>> changed;
for (auto &value : values)
{
auto columnIx = cat->addColumn(value.name());
auto &col = cat->mColumns[columnIx];
std::string tag = col.mValidator ? col.mValidator->mTag : std::to_string(columnIx);
changed[tag] = std::make_tuple(columnIx, operator[](columnIx).c_str(), value.value());
assign(columnIx, value.value(), true);
}
// see if we need to update any child categories that depend on these values
// auto iv = col.mValidator;
if (mCascade)
{
for (auto &&[childCat, linked] : cat->mChildLinks)
{
Condition cond;
std::string childTag;
std::vector<Item> newValues;
for (size_t ix = 0; ix < linked->mParentKeys.size(); ++ix)
{
std::string pk = linked->mParentKeys[ix];
std::string ck = linked->mChildKeys[ix];
if (changed.count(pk) > 0)
{
childTag = ck;
cond = std::move(cond) && (Key(ck) == std::get<1>(changed[pk]));
newValues.emplace_back(ck, std::get<2>(changed[pk]));
}
else
{
const char *value = (*this)[pk].c_str();
cond = std::move(cond) && (Key(ck) == value);
}
}
auto rows = childCat->find(std::move(cond));
for (auto &cr : rows)
cr.assign(newValues);
}
}
}
void Row::assign(const Item &value, bool skipUpdateLinked)
{
assign(value.name(), value.value(), skipUpdateLinked);
}
void Row::assign(std::string_view name, const std::string &value, bool skipUpdateLinked, bool validate)
{
try
{
auto cat = mData->mCategory;
assign(cat->addColumn(name), value, skipUpdateLinked, validate);
}
catch (const std::exception &ex)
{
if (cif::VERBOSE >= 0)
std::cerr << "Could not assign value '" << value << "' to column _" << mData->mCategory->name() << '.' << name << std::endl;
throw;
}
}
void Row::assign(size_t column, const std::string &value, bool skipUpdateLinked, bool validate)
{
if (mData == nullptr)
throw std::logic_error("invalid Row, no data assigning value '" + value + "' to column with index " + std::to_string(column));
auto cat = mData->mCategory;
auto &col = cat->mColumns[column];
const char *oldValue = nullptr;
for (auto iv = mData->mValues; iv != nullptr; iv = iv->mNext)
{
assert(iv != iv->mNext and (iv->mNext == nullptr or iv != iv->mNext->mNext));
if (iv->mColumnIndex == column)
{
oldValue = iv->mText;
break;
}
}
if (oldValue != nullptr and value == oldValue) // no need to update
return;
std::string oldStrValue = oldValue ? oldValue : "";
// check the value
if (col.mValidator and validate)
(*col.mValidator)(value);
// If the field is part of the Key for this Category, remove it from the index
// before updating
bool reinsert = false;
if (not skipUpdateLinked and // an update of an Item's value
cat->mIndex != nullptr and cat->keyFieldsByIndex().count(column))
{
reinsert = cat->mIndex->find(mData);
if (reinsert)
cat->mIndex->erase(mData);
}
// first remove old value with cix
if (mData->mValues == nullptr)
; // nothing to do
else if (mData->mValues->mColumnIndex == column)
{
auto iv = mData->mValues;
mData->mValues = iv->mNext;
iv->mNext = nullptr;
delete iv;
}
else
{
for (auto iv = mData->mValues; iv->mNext != nullptr; iv = iv->mNext)
{
if (iv->mNext->mColumnIndex == column)
{
auto nv = iv->mNext;
iv->mNext = nv->mNext;
nv->mNext = nullptr;
delete nv;
break;
}
}
}
if (not value.empty())
{
auto nv = new (value.length()) ItemValue(value.c_str(), column);
if (mData->mValues == nullptr)
mData->mValues = nv;
else
{
auto iv = mData->mValues;
while (iv->mNext != nullptr)
iv = iv->mNext;
iv->mNext = nv;
}
}
if (reinsert)
cat->mIndex->insert(mData);
// see if we need to update any child categories that depend on this value
auto iv = col.mValidator;
if (not skipUpdateLinked and iv != nullptr and mCascade)
{
for (auto &&[childCat, linked] : cat->mChildLinks)
{
if (find(linked->mParentKeys.begin(), linked->mParentKeys.end(), iv->mTag) == linked->mParentKeys.end())
continue;
Condition cond;
std::string childTag;
for (size_t ix = 0; ix < linked->mParentKeys.size(); ++ix)
{
std::string pk = linked->mParentKeys[ix];
std::string ck = linked->mChildKeys[ix];
// TODO add code to *NOT* test mandatory fields for Empty
if (pk == iv->mTag)
{
childTag = ck;
cond = std::move(cond) && Key(ck) == oldStrValue;
}
else
{
const char *pk_value = (*this)[pk].c_str();
if (*pk_value == 0)
cond = std::move(cond) && Key(ck) == Empty();
else
cond = std::move(cond) && ((Key(ck) == pk_value) or Key(ck) == Empty());
}
}
auto rows = childCat->find(std::move(cond));
if (rows.empty())
continue;
// if (cif::VERBOSE > 2)
// {
// std::cerr << "Parent: " << linked->mParentCategory << " Child: " << linked->mChildCategory << std::endl
// << cond << std::endl;
// }
// Now, suppose there are already rows in child that conform to the new value,
// we then skip this renam
Condition cond_n;
for (size_t ix = 0; ix < linked->mParentKeys.size(); ++ix)
{
std::string pk = linked->mParentKeys[ix];
std::string ck = linked->mChildKeys[ix];
// TODO add code to *NOT* test mandatory fields for Empty
if (pk == iv->mTag)
cond_n = std::move(cond_n) && Key(ck) == value;
else
{
const char *pk_value = (*this)[pk].c_str();
if (*pk_value == 0)
cond_n = std::move(cond_n) && Key(ck) == Empty();
else
cond_n = std::move(cond_n) && ((Key(ck) == pk_value) or Key(ck) == Empty());
}
}
auto rows_n = childCat->find(std::move(cond_n));
if (not rows_n.empty())
{
if (cif::VERBOSE > 0)
std::cerr << "Will not rename in child category since there are already rows that link to the parent" << std::endl;
continue;
}
for (auto &cr : rows)
cr.assign(childTag, value, false);
}
}
}
void Row::swap(size_t cix, ItemRow *a, ItemRow *b)
{
if (a == nullptr or b == nullptr)
throw std::logic_error("invalid Rows in swap");
assert(a->mCategory == b->mCategory);
if (a->mCategory != b->mCategory)
throw std::logic_error("Categories not same in swap");
auto cat = a->mCategory;
// If the field is part of the Key for this Category, remove it from the index
// before updating
bool reinsert = false;
if (cat->mIndex != nullptr and cat->keyFieldsByIndex().count(cix))
{
reinsert = true;
cat->mIndex->erase(a);
cat->mIndex->erase(b);
}
ItemValue *ap = nullptr; // parent of ai
ItemValue *ai = nullptr;
ItemValue *bp = nullptr; // parent of bi
ItemValue *bi = nullptr;
if (a->mValues == nullptr)
;
else if (a->mValues->mColumnIndex == cix)
ai = a->mValues;
else
{
ap = a->mValues;
while (ap->mNext != nullptr)
{
if (ap->mNext->mColumnIndex == cix)
{
ai = ap->mNext;
ap->mNext = ai->mNext;
ai->mNext = nullptr;
break;
}
ap = ap->mNext;
}
}
if (b->mValues == nullptr)
;
else if (b->mValues->mColumnIndex == cix)
bi = b->mValues;
else
{
bp = b->mValues;
while (bp->mNext != nullptr)
{
if (bp->mNext->mColumnIndex == cix)
{
bi = bp->mNext;
bp->mNext = bi->mNext;
bi->mNext = nullptr;
break;
}
bp = bp->mNext;
}
}
if (ai != nullptr)
{
if (bp == nullptr)
b->mValues = ai;
else
{
ai->mNext = bp->mNext;
bp->mNext = ai;
}
}
if (bi != nullptr)
{
if (ap == nullptr)
a->mValues = bi;
else
{
bi->mNext = ap->mNext;
ap->mNext = bi;
}
}
if (reinsert)
{
cat->mIndex->insert(a);
cat->mIndex->insert(b);
}
if ((ai != nullptr or bi != nullptr))
{
auto parentColName = cat->getColumnName(cix);
// see if we need to update any child categories that depend on these values
auto parentCatValidator = cat->getCatValidator();
for (auto &&[childCat, link] : cat->mChildLinks)
{
if (find(link->mParentKeys.begin(), link->mParentKeys.end(), parentColName) == link->mParentKeys.end())
continue;
auto childCatValidator = childCat->getCatValidator();
if (childCatValidator == nullptr)
continue;
std::string linkChildColName;
Condition cond[2];
for (size_t ab = 0; ab < 2; ++ab)
{
auto i = ab == 0 ? ai : bi;
auto r = ab == 0 ? a : b;
for (size_t ix = 0; ix < link->mChildKeys.size(); ++ix)
{
assert(ix < link->mParentKeys.size());
auto pcix = cat->getColumnIndex(link->mParentKeys[ix]);
auto childColName = link->mChildKeys[ix];
bool mandatory =
find(childCatValidator->mMandatoryFields.begin(), childCatValidator->mMandatoryFields.end(), childColName) != childCatValidator->mMandatoryFields.end() or
find(parentCatValidator->mMandatoryFields.begin(), parentCatValidator->mMandatoryFields.end(), link->mParentKeys[ix]) != parentCatValidator->mMandatoryFields.end();
std::string childValue;
if (pcix == cix)
{
linkChildColName = childColName;
if (not(i == nullptr or strcmp(i->mText, ".") == 0 or strcmp(i->mText, "?") == 0))
childValue = i->mText;
}
else
{
std::string ps = r->c_str(pcix);
if (not(ps.empty() or ps == "." or ps == "?"))
childValue = ps;
}
if (not childValue.empty())
{
if (mandatory or pcix == cix)
cond[ab] = std::move(cond[ab]) and Key(childColName) == childValue;
else
cond[ab] = std::move(cond[ab]) and (Key(childColName) == childValue or Key(childColName) == Empty());
}
else
cond[ab] = std::move(cond[ab]) and Key(childColName) == Empty();
}
}
std::vector<conditional_iterator_proxy<Category>> rs;
// first find the respective rows, then flip values, otherwise you won't find them anymore!
for (size_t ab = 0; ab < 2; ++ab)
{
if (cond[ab].empty())
continue;
// if (VERBOSE > 1)
// std::cerr << "Fixing link from " << cat->mName << " to " << childCat->mName << " with " << std::endl
// << cond[ab] << std::endl;
rs.push_back(childCat->find(std::move(cond[ab])));
}
for (size_t ab = 0; ab < 2; ++ab)
{
auto i = ab == 0 ? bi : ai;
for (auto r : rs[ab])
{
// now due to the way links are defined, we might have found a row
// that contains an empty value for all child columns...
// Now, that's not a real hit, is it?
size_t n = 0;
for (auto c : link->mChildKeys)
if (r[c].empty())
++n;
if (n == link->mChildKeys.size())
{
if (VERBOSE > 1)
std::cerr << "All empty columns, skipping" << std::endl;
}
else
{
if (VERBOSE > 0)
std::cerr << "In " << childCat->mName << " changing " << linkChildColName << ": " << r[linkChildColName].as<std::string>() << " => " << (i ? i->mText : "") << std::endl;
r[linkChildColName] = i ? i->mText : "";
}
}
}
}
}
}
size_t Row::ColumnForItemTag(std::string_view itemTag) const
{
size_t result = 0;
if (mData != nullptr)
{
auto cat = mData->mCategory;
result = cat->getColumnIndex(itemTag);
}
return result;
}
bool Row::empty() const
{
return mData == nullptr or mData->mValues == nullptr;
}
auto Row::begin() const -> const_iterator
{
return const_iterator(mData, mData ? mData->mValues : nullptr);
}
auto Row::end() const -> const_iterator
{
return const_iterator(mData, nullptr);
}
uint32_t Row::lineNr() const
{
return mData ? mData->mLineNr : 0;
}
void Row::lineNr(uint32_t l)
{
if (mData)
mData->mLineNr = l;
}
Row::const_iterator::const_iterator(ItemRow *data, ItemValue *ptr)
: mData(data)
, mPtr(ptr)
{
if (mPtr != nullptr)
fetch();
}
Row::const_iterator &Row::const_iterator::operator++()
{
if (mPtr != nullptr)
mPtr = mPtr->mNext;
if (mPtr != nullptr)
fetch();
return *this;
}
void Row::const_iterator::fetch()
{
mCurrent = Item(
mData->mCategory->getColumnName(mPtr->mColumnIndex),
mPtr->mText);
}
std::ostream &operator<<(std::ostream &os, const Row &row)
{
auto category = row.mData->mCategory;
std::string catName = category->name();
for (auto item = row.mData->mValues; item != nullptr; item = item->mNext)
{
std::string tagName = category->getColumnName(item->mColumnIndex);
os << '_' << catName << '.' << tagName << ' ' << item->mText << std::endl;
}
return os;
}
// --------------------------------------------------------------------
File::File()
: mHead(nullptr)
, mValidator(nullptr)
{
}
File::File(std::istream &is, bool validate)
: File()
{
load(is);
}
File::File(const std::filesystem::path &path, bool validate)
: File()
{
try
{
load(path);
}
catch (const std::exception &ex)
{
if (cif::VERBOSE >= 0)
std::cerr << "Error while loading file " << path << std::endl;
throw;
}
}
File::File(const char *data, std::size_t length)
{
load(data, length);
}
File::File(File &&rhs)
: mHead(nullptr)
, mValidator(nullptr)
{
std::swap(mHead, rhs.mHead);
std::swap(mValidator, rhs.mValidator);
}
File::~File()
{
delete mHead;
}
void File::append(Datablock *e)
{
e->setValidator(mValidator);
if (mHead == nullptr)
mHead = e;
else
{
auto ie = mHead;
for (;;)
{
if (iequals(ie->getName(), e->getName()))
throw ValidationError("Datablock " + e->getName() + " already defined in File");
if (ie->mNext == nullptr)
{
ie->mNext = e;
break;
}
ie = ie->mNext;
}
}
}
void File::load(const std::filesystem::path &p)
{
fs::path path(p);
gxrio::ifstream in(p);
try
{
load(in);
}
catch (const std::exception &ex)
{
if (cif::VERBOSE >= 0)
std::cerr << "Error loading file " << path << std::endl;
throw;
}
}
void File::save(const std::filesystem::path &p)
{
gxrio::ofstream outFile(p);
save(outFile);
}
void File::load(std::istream &is)
{
auto saved = mValidator;
setValidator(nullptr);
Parser p(is, *this);
p.parseFile();
if (saved != nullptr)
{
setValidator(saved);
(void)isValid();
}
}
void File::load(std::istream &is, const std::string &datablock)
{
auto saved = mValidator;
setValidator(nullptr);
Parser p(is, *this);
p.parseSingleDatablock(datablock);
if (saved != nullptr)
{
setValidator(saved);
(void)isValid();
}
}
void File::load(const char *data, std::size_t length)
{
struct membuf : public std::streambuf
{
membuf(char *data, size_t length) { this->setg(data, data, data + length); }
} buffer(const_cast<char *>(data), length);
gxrio::istream is(&buffer);
load(is);
}
void File::save(std::ostream &os)
{
Datablock *e = mHead;
while (e != nullptr)
{
e->write(os);
e = e->mNext;
}
}
void File::write(std::ostream &os, const std::vector<std::string> &order)
{
Datablock *e = mHead;
while (e != nullptr)
{
e->write(os, order);
e = e->mNext;
}
}
Datablock *File::get(std::string_view name) const
{
const Datablock *result = mHead;
while (result != nullptr and not iequals(result->mName, name))
result = result->mNext;
return const_cast<Datablock *>(result);
}
Datablock &File::operator[](std::string_view name)
{
using namespace std::literals;
Datablock *result = mHead;
while (result != nullptr and not iequals(result->mName, name))
result = result->mNext;
if (result == nullptr)
throw std::runtime_error("Datablock " + std::string(name) + " does not exist");
return *result;
}
Datablock &File::front()
{
assert(mHead);
return *mHead;
}
Datablock &File::back()
{
assert(mHead);
auto *block = mHead;
while (block->mNext != nullptr)
block = block->mNext;
return *block;
}
bool File::isValid()
{
if (mValidator == nullptr)
{
if (VERBOSE > 0)
std::cerr << "No dictionary loaded explicitly, loading default" << std::endl;
loadDictionary();
}
bool result = true;
for (auto d = mHead; d != nullptr; d = d->mNext)
result = d->isValid() and result;
return result;
}
void File::validateLinks() const
{
for (auto d = mHead; d != nullptr; d = d->mNext)
d->validateLinks();
}
const Validator &File::getValidator() const
{
if (mValidator == nullptr)
throw std::runtime_error("no Validator defined yet");
return *mValidator;
}
void File::loadDictionary()
{
loadDictionary("mmcif_ddl");
}
void File::loadDictionary(const char *dict)
{
setValidator(&ValidatorFactory::instance()[dict]);
}
void File::setValidator(const Validator *v)
{
mValidator = v;
for (auto d = mHead; d != nullptr; d = d->mNext)
d->setValidator(mValidator);
}
void File::getTagOrder(std::vector<std::string> &tags) const
{
for (auto d = mHead; d != nullptr; d = d->mNext)
d->getTagOrder(tags);
}
auto File::iterator::operator++() -> iterator &
{
mCurrent = mCurrent->mNext;
return *this;
}
auto File::begin() const -> iterator
{
return iterator(mHead);
}
auto File::end() const -> iterator
{
return iterator(nullptr);
}
} // namespace cif
...@@ -26,9 +26,9 @@ ...@@ -26,9 +26,9 @@
#include <numeric> #include <numeric>
#include <cif++/cif/category.hpp> #include <cif++/category.hpp>
#include <cif++/cif/datablock.hpp> #include <cif++/datablock.hpp>
#include <cif++/cif/parser.hpp> #include <cif++/parser.hpp>
// TODO: Find out what the rules are exactly for linked items, the current implementation // TODO: Find out what the rules are exactly for linked items, the current implementation
// is inconsistent. It all depends whether a link is satified if a field taking part in the // is inconsistent. It all depends whether a link is satified if a field taking part in the
...@@ -539,7 +539,7 @@ category::category(const category &rhs) ...@@ -539,7 +539,7 @@ category::category(const category &rhs)
for (auto r = rhs.m_head; r != nullptr; r = r->m_next) for (auto r = rhs.m_head; r != nullptr; r = r->m_next)
insert_impl(end(), clone_row(*r)); insert_impl(end(), clone_row(*r));
if (m_validator != nullptr) if (m_cat_validator != nullptr)
m_index = new category_index(this); m_index = new category_index(this);
} }
...@@ -585,7 +585,7 @@ category &category::operator=(const category &rhs) ...@@ -585,7 +585,7 @@ category &category::operator=(const category &rhs)
m_parent_links = rhs.m_parent_links; m_parent_links = rhs.m_parent_links;
m_child_links = rhs.m_child_links; m_child_links = rhs.m_child_links;
if (m_validator != nullptr) if (m_cat_validator != nullptr)
m_index = new category_index(this); m_index = new category_index(this);
} }
...@@ -620,7 +620,6 @@ category &category::operator=(category &&rhs) ...@@ -620,7 +620,6 @@ category &category::operator=(category &&rhs)
category::~category() category::~category()
{ {
clear(); clear();
delete m_index;
} }
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -1076,9 +1075,7 @@ void category::clear() ...@@ -1076,9 +1075,7 @@ void category::clear()
m_head = m_tail = nullptr; m_head = m_tail = nullptr;
delete m_index; delete m_index;
m_index = nullptr;
if (m_validator != nullptr)
m_index = new category_index(this);
} }
void category::erase_orphans(condition &&cond) void category::erase_orphans(condition &&cond)
...@@ -1145,7 +1142,7 @@ void category::update_value(const std::vector<row_handle> &rows, std::string_vie ...@@ -1145,7 +1142,7 @@ void category::update_value(const std::vector<row_handle> &rows, std::string_vie
(*col.m_validator)(value); (*col.m_validator)(value);
// first some sanity checks, what was the old value and is it the same for all rows? // first some sanity checks, what was the old value and is it the same for all rows?
std::string_view oldValue = rows.front()[tag].text(); std::string oldValue{ rows.front()[tag].text() };
for (auto row : rows) for (auto row : rows)
{ {
if (oldValue != row[tag].text()) if (oldValue != row[tag].text())
...@@ -1258,6 +1255,10 @@ void category::update_value(const std::vector<row_handle> &rows, std::string_vie ...@@ -1258,6 +1255,10 @@ void category::update_value(const std::vector<row_handle> &rows, std::string_vie
void category::update_value(row *row, size_t column, std::string_view value, bool updateLinked, bool validate) void category::update_value(row *row, size_t column, std::string_view value, bool updateLinked, bool validate)
{ {
// make sure we have an index, if possible
if (m_index == nullptr and m_cat_validator != nullptr)
m_index = new category_index(this);
auto &col = m_columns[column]; auto &col = m_columns[column];
const char *oldValue = nullptr; const char *oldValue = nullptr;
...@@ -1285,7 +1286,6 @@ void category::update_value(row *row, size_t column, std::string_view value, boo ...@@ -1285,7 +1286,6 @@ void category::update_value(row *row, size_t column, std::string_view value, boo
// before updating // before updating
bool reinsert = false; bool reinsert = false;
if (updateLinked and // an update of an Item's value if (updateLinked and // an update of an Item's value
m_index != nullptr and key_field_indices().count(column)) m_index != nullptr and key_field_indices().count(column))
{ {
...@@ -1498,6 +1498,9 @@ row_handle category::create_copy(row_handle r) ...@@ -1498,6 +1498,9 @@ row_handle category::create_copy(row_handle r)
// proxy methods for every insertion // proxy methods for every insertion
category::iterator category::insert_impl(const_iterator pos, row *n) category::iterator category::insert_impl(const_iterator pos, row *n)
{ {
if (m_index == nullptr and m_cat_validator != nullptr)
m_index = new category_index(this);
assert(n != nullptr); assert(n != nullptr);
assert(n->m_next == nullptr); assert(n->m_next == nullptr);
......
...@@ -24,8 +24,8 @@ ...@@ -24,8 +24,8 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <cif++/cif/category.hpp> #include <cif++/category.hpp>
#include <cif++/cif/condition.hpp> #include <cif++/condition.hpp>
namespace cif namespace cif
{ {
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <cif++/cif/datablock.hpp> #include <cif++/datablock.hpp>
namespace cif namespace cif
{ {
......
...@@ -24,10 +24,10 @@ ...@@ -24,10 +24,10 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <cif++/cif/condition.hpp> #include <cif++/condition.hpp>
#include <cif++/cif/dictionary_parser.hpp> #include <cif++/dictionary_parser.hpp>
#include <cif++/cif/file.hpp> #include <cif++/file.hpp>
#include <cif++/cif/parser.hpp> #include <cif++/parser.hpp>
namespace cif namespace cif
{ {
......
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
#include <gxrio.hpp> #include <gxrio.hpp>
#include <cif++/cif/file.hpp> #include <cif++/file.hpp>
namespace cif namespace cif
{ {
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <cif++/cif/row.hpp> #include <cif++/row.hpp>
namespace cif namespace cif
{ {
......
...@@ -32,9 +32,9 @@ ...@@ -32,9 +32,9 @@
#include <cif++/utilities.hpp> #include <cif++/utilities.hpp>
#include <cif++/cif/forward_decl.hpp> #include <cif++/forward_decl.hpp>
#include <cif++/cif/parser.hpp> #include <cif++/parser.hpp>
#include <cif++/cif/file.hpp> #include <cif++/file.hpp>
namespace cif namespace cif
{ {
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cif++/Cif++.hpp>
#include <map>
#include <set>
#include <cif++/AtomType.hpp>
#include <cif++/Compound.hpp>
#include <cif++/PDB2CifRemark3.hpp>
#include <cif++/utilities.hpp>
using cif::Datablock;
using cif::Category;
using cif::Row;
using cif::Key;
using cif::iequals;
// --------------------------------------------------------------------
struct TemplateLine
{
const char* rx;
int nextStateOffset;
const char* category;
std::initializer_list<const char*> items;
const char* lsRestrType = nullptr;
bool createNew;
};
// --------------------------------------------------------------------
const TemplateLine kBusterTNT_Template[] = {
/* 0 */ { R"(DATA USED IN REFINEMENT\.)", 1 },
/* 1 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\) :\s+(.+?))", 1, "refine", { "ls_d_res_high" } },
/* 2 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\) :\s+(.+?))", 1, "refine", { "ls_d_res_low" } },
/* 3 */ { R"(DATA CUTOFF \(SIGMA\(F\)\) :\s+(.+?))", 1, "refine", { "pdbx_ls_sigma_F" } },
/* 4 */ { R"(COMPLETENESS FOR RANGE \(%\) :\s+(.+?))", 1, "refine", { "ls_percent_reflns_obs" } },
/* 5 */ { R"(NUMBER OF REFLECTIONS :\s+(.+?))", 1, "refine", { "ls_number_reflns_obs" } },
/* 6 */ { R"(FIT TO DATA USED IN REFINEMENT\.)", 1 },
/* 7 */ { R"(CROSS-VALIDATION METHOD :\s+(.+?))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
/* 8 */ { R"(FREE R VALUE TEST SET SELECTION :\s+(.+?))", 1, "refine", { "pdbx_R_Free_selection_details" } },
/* 9 */ { R"(R VALUE \(WORKING ?\+ ?TEST SET\) :\s+(.+?))", 1, "refine", { "ls_R_factor_obs" } },
/* 10 */ { R"(R VALUE \(WORKING SET\) :\s+(.+?))", 1, "refine", { "ls_R_factor_R_work" } },
/* 11 */ { R"(FREE R VALUE :\s+(.+?))", 1, "refine", { "ls_R_factor_R_free" } },
/* 12 */ { R"(FREE R VALUE TEST SET SIZE \(%\) :\s+(.+?))", 1, "refine", { "ls_percent_reflns_R_free" } },
/* 13 */ { R"(FREE R VALUE TEST SET COUNT :\s+(.+?))", 1, "refine", { "ls_number_reflns_R_free" } },
/* 14 */ { R"(ESTIMATED ERROR OF FREE R VALUE :\s+(.+?))", 1, "refine", { "ls_R_factor_R_free_error" } },
/* 15 */ { R"(FIT IN THE HIGHEST RESOLUTION BIN\.)", 1 },
/* 16 */ { R"(TOTAL NUMBER OF BINS USED :\s+(.+?))", 1, "refine_ls_shell", { "pdbx_total_number_of_bins_used" } },
/* 17 */ { R"(BIN RESOLUTION RANGE HIGH \(A(?:NGSTROMS)?\) :\s+(.+?))", 1, "refine_ls_shell", { "d_res_high" } },
/* 18 */ { R"(BIN RESOLUTION RANGE LOW \(A(?:NGSTROMS)?\) :\s+(.+?))", 1, "refine_ls_shell", { "d_res_low" } },
/* 19 */ { R"(BIN COMPLETENESS \(WORKING\+TEST\) \(%\) :\s+(.+?))", 1, "refine_ls_shell", { "percent_reflns_obs" } },
/* 20 */ { R"(REFLECTIONS IN BIN \(WORKING ?\+ ?TEST(?: SET)?\) :\s+(.+?))", 1, "refine_ls_shell", { "number_reflns_all" } },
/* 21 */ { R"(BIN R VALUE \(WORKING ?\+ ?TEST(?: SET)?\) :\s+(.+?))", 1, "refine_ls_shell", { "R_factor_all" } },
/* 22 */ { R"(REFLECTIONS IN BIN \(WORKING SET\) :\s+(.+?))", 1, "refine_ls_shell", { "number_reflns_R_work" } },
/* 23 */ { R"(BIN R VALUE \(WORKING SET\) :\s+(.+?))", 1, "refine_ls_shell", { "R_factor_R_work" } },
/* 24 */ { R"(BIN FREE R VALUE :\s+(.+?))", 1, "refine_ls_shell", { "R_factor_R_free" } },
/* 25 */ { R"(BIN FREE R VALUE TEST SET SIZE \(%\) :\s+(.+?))", 1, "refine_ls_shell", { "percent_reflns_R_free" } },
/* 26 */ { R"(BIN FREE R VALUE TEST SET COUNT :\s+(.+?))", 1, "refine_ls_shell", { "number_reflns_R_free" } },
/* 27 */ { R"(ESTIMATED ERROR OF BIN FREE R VALUE :\s+(.+?))", 1, "refine_ls_shell", { "R_factor_R_free_error" } },
/* 28 */ { R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
/* 29 */ { R"(PROTEIN ATOMS :\s+(.+?))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
/* 30 */ { R"(NUCLEIC ACID ATOMS :\s+(.+?))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
/* 31 */ { R"(HETEROGEN ATOMS :\s+(.+?))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
/* 32 */ { R"(SOLVENT ATOMS :\s+(.+?))", 1, "refine_hist", { "number_atoms_solvent" } },
/* 33 */ { R"(B VALUES\.)", 1 },
/* 34 */ { R"(B VALUE TYPE :\s+(.+?))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
/* 35 */ { R"(FROM WILSON PLOT \(A\*\*2\) :\s+(.+?))", 1, "reflns", { "B_iso_Wilson_estimate" } },
/* 36 */ { R"(MEAN B VALUE \(OVERALL, A\*\*2\) :\s+(.+?))", 1, "refine", { "B_iso_mean" } },
/* 37 */ { R"(OVERALL ANISOTROPIC B VALUE\.)", 1 },
/* 38 */ { R"(B11 \(A\*\*2\) :\s+(.+?))", 1, "refine", { "aniso_B[1][1]" } },
/* 39 */ { R"(B22 \(A\*\*2\) :\s+(.+?))", 1, "refine", { "aniso_B[2][2]" } },
/* 40 */ { R"(B33 \(A\*\*2\) :\s+(.+?))", 1, "refine", { "aniso_B[3][3]" } },
/* 41 */ { R"(B12 \(A\*\*2\) :\s+(.+?))", 1, "refine", { "aniso_B[1][2]" } },
/* 42 */ { R"(B13 \(A\*\*2\) :\s+(.+?))", 1, "refine", { "aniso_B[1][3]" } },
/* 43 */ { R"(B23 \(A\*\*2\) :\s+(.+?))", 1, "refine", { "aniso_B[2][3]" } },
/* 44 */ { R"(ESTIMATED COORDINATE ERROR\.)", 1 },
/* 45 */ { R"(ESD FROM LUZZATI PLOT \(A\) :\s+(.+?))", 1, "refine_analyze", { "Luzzati_coordinate_error_obs" } },
/* 46 */ { R"(DPI \(BLOW EQ-10\) BASED ON R VALUE \(A\) :\s+(.+?))", 1, "refine", { "pdbx_overall_SU_R_Blow_DPI" } },
/* 47 */ { R"(DPI \(BLOW EQ-9\) BASED ON FREE R VALUE \(A\) :\s+(.+?))", 1, "refine", { "pdbx_overall_SU_R_free_Blow_DPI" } },
/* 48 */ { R"(DPI \(CRUICKSHANK\) BASED ON R VALUE \(A\) :\s+(.+?))", 1, "refine", { "overall_SU_R_Cruickshank_DPI" } },
/* 49 */ { R"(DPI \(CRUICKSHANK\) BASED ON FREE R VALUE \(A\) :\s+(.+?))", 1, "refine", { "pdbx_overall_SU_R_free_Cruickshank_DPI" } },
/* 50 */ { R"(REFERENCES: BLOW.+)", 1 },
/* 51 */ { R"(CORRELATION COEFFICIENTS\.)", 1 },
/* 52 */ { R"(CORRELATION COEFFICIENT FO-FC :\s+(.+?))", 1, "refine", { "correlation_coeff_Fo_to_Fc" } },
/* 53 */ { R"(CORRELATION COEFFICIENT FO-FC FREE :\s+(.+?))", 1, "refine", { "correlation_coeff_Fo_to_Fc_free" } },
/* 54 */ { R"(NUMBER OF GEOMETRIC FUNCTION TERMS DEFINED : 15)", 1 },
/* 55 */ { R"(TERM COUNT WEIGHT FUNCTION\.)", 1 },
/* 56 */ { R"(BOND LENGTHS :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_bond_d", true },
/* 57 */ { R"(BOND ANGLES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_angle_deg", true },
/* 58 */ { R"(TORSION ANGLES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_dihedral_angle_d", true },
/* 59 */ { R"(TRIGONAL CARBON PLANES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_trig_c_planes", true },
/* 60 */ { R"(GENERAL PLANES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_gen_planes", true },
/* 61 */ { R"(ISOTROPIC THERMAL FACTORS :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_it", true },
/* 62 */ { R"(BAD NON-BONDED CONTACTS :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_nbd", true },
/* 63 */ { R"(IMPROPER TORSIONS :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_improper_torsion", true },
/* 64 */ { R"(PSEUDOROTATION ANGLES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_pseud_angle", true },
/* 65 */ { R"(CHIRAL IMPROPER TORSION :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_chiral_improper_torsion", true },
/* 66 */ { R"(SUM OF OCCUPANCIES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_sum_occupancies", true },
/* 67 */ { R"(UTILITY DISTANCES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_utility_distance", true },
/* 68 */ { R"(UTILITY ANGLES :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_utility_angle", true },
/* 69 */ { R"(UTILITY TORSION :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_utility_torsion", true },
/* 70 */ { R"(IDEAL-DIST CONTACT TERM :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "number", "weight", "pdbx_restraint_function" }, "t_ideal_dist_contact", true },
/* 71 */ { R"(RMS DEVIATIONS FROM IDEAL VALUES\.)", 1 },
/* 72 */ { R"(BOND LENGTHS \(A\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_bond_d", false },
/* 73 */ { R"(BOND ANGLES \(DEGREES\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_angle_deg", false },
/* 74 */ { R"(TORSION ANGLES \(DEGREES\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_dihedral_angle_d", false },
/* 75 */ { R"(PSEUDO ROTATION ANGLES \(DEGREES\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_pseud_angle", false },
/* 76 */ { R"(TRIGONAL CARBON PLANES \(A\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_trig_c_planes", false },
/* 77 */ { R"(GENERAL PLANES \(A\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_gen_planes", false },
/* 78 */ { R"(ISOTROPIC THERMAL FACTORS \(A\*\*2\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_it", false },
/* 79 */ { R"(NON-BONDED CONTACTS \(A\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_nbd", false },
/* 80 */ { R"(PEPTIDE OMEGA TORSION ANGLES \(DEGREES\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_omega_torsion", false },
/* 81 */ { R"(OTHER TORSION ANGLES \(DEGREES\) :\s+(.+?);\s+(.+?);\s+(.+?))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_other_torsion", false },
/* 82 */ { R"(TLS DETAILS\.?)", 1 },
/* 83 */ { R"(NUMBER OF TLS GROUPS :.+)", 1 },
/* 84 */ { R"(TLS GROUP :\s*(\d+))", 1, "pdbx_refine_tls", { "id" }, nullptr, true },
/* 85 */ { R"((?:SELECTION|SET) *:\s+(.+?))", 1, "pdbx_refine_tls_group", { "selection_details" }, nullptr, true },
/* 86 */ { R"(ORIGIN FOR THE GROUP \(A\):\s+(.+?)\s+(.+?)\s+(.+?))", 1, "pdbx_refine_tls", { "origin_x", "origin_y", "origin_z" } },
/* 87 */ { R"(T TENSOR)", 1 },
/* 88 */ { R"(T11:\s+(.+?) T22:\s+(.+?))", 1, "pdbx_refine_tls", { "T[1][1]", "T[2][2]" } },
/* 89 */ { R"(T33:\s+(.+?) T12:\s+(.+?))", 1, "pdbx_refine_tls", { "T[3][3]", "T[1][2]" } },
/* 90 */ { R"(T13:\s+(.+?) T23:\s+(.+?))", 1, "pdbx_refine_tls", { "T[1][3]", "T[2][3]" } },
/* 91 */ { R"(L TENSOR)", 1 },
/* 92 */ { R"(L11:\s+(.+?) L22:\s+(.+?))", 1, "pdbx_refine_tls", { "L[1][1]", "L[2][2]" } },
/* 93 */ { R"(L33:\s+(.+?) L12:\s+(.+?))", 1, "pdbx_refine_tls", { "L[3][3]", "L[1][2]" } },
/* 94 */ { R"(L13:\s+(.+?) L23:\s+(.+?))", 1, "pdbx_refine_tls", { "L[1][3]", "L[2][3]" } },
/* 95 */ { R"(S TENSOR)", 1 },
/* 96 */ { R"(S11:\s+(.+?) S12:\s+(.+?) S13:\s+(.+?))", 1, "pdbx_refine_tls", { "S[1][1]", "S[1][2]", "S[1][3]" } },
/* 97 */ { R"(S21:\s+(.+?) S22:\s+(.+?) S23:\s+(.+?))", 1, "pdbx_refine_tls", { "S[2][1]", "S[2][2]", "S[2][3]" } },
/* 98 */ { R"(S31:\s+(.+?) S32:\s+(.+?) S33:\s+(.+?))", 84 - 98, "pdbx_refine_tls", { "S[3][1]", "S[3][2]", "S[3][3]" } },
};
class BUSTER_TNT_Remark3Parser : public Remark3Parser
{
public:
BUSTER_TNT_Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::Datablock& db)
: Remark3Parser(name, expMethod, r, db,
kBusterTNT_Template, sizeof(kBusterTNT_Template) / sizeof(TemplateLine),
std::regex(R"((BUSTER(?:-TNT)?)(?: (\d+(?:\..+)?))?)")) {}
};
const TemplateLine kCNS_Template[] = {
/* 0 */ { R"(REFINEMENT TARGET\s*:\s*(.+))", 1, "refine", { "pdbx_stereochemistry_target_values" } },
/* 1 */ { R"(DATA USED IN REFINEMENT\.)", 1 },
/* 2 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
/* 3 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
/* 4 */ { R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
/* 5 */ { R"(DATA CUTOFF HIGH \(ABS\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_data_cutoff_high_absF" } },
/* 6 */ { R"(DATA CUTOFF LOW \(ABS\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_data_cutoff_low_absF" } },
/* 7 */ { R"(COMPLETENESS \(WORKING\+TEST\) \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
/* 8 */ { R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
/* 9 */ { R"(FIT TO DATA USED IN REFINEMENT\.)", 1 },
/* 10 */ { R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
/* 11 */ { R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
/* 12 */ { R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
/* 13 */ { R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
/* 14 */ { R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
/* 15 */ { R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
/* 16 */ { R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
/* 17 */ { R"(ESTIMATED ERROR OF FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free_error" } },
/* 18 */ { R"(FIT/AGREEMENT OF MODEL WITH ALL DATA\.)", 1 },
/* 19 */ { R"(R VALUE \(WORKING \+ TEST SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_all_no_cutoff" } },
/* 20 */ { R"(R VALUE \(WORKING SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_obs_no_cutoff" } },
/* 21 */ { R"(FREE R VALUE \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_factor_no_cutoff" } },
/* 22 */ { R"(FREE R VALUE TEST SET SIZE \(%, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_size_perc_no_cutoff" } },
/* 23 */ { R"(FREE R VALUE TEST SET COUNT \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_ct_no_cutoff" } },
/* 24 */ { R"(ESTIMATED ERROR OF FREE R VALUE \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_error_no_cutoff" } },
/* 25 */ { R"(TOTAL NUMBER OF REFLECTIONS \(NO CUTOFF\)\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_all" } },
/* 26 */ { R"(FIT IN THE HIGHEST RESOLUTION BIN\.)", 1 },
/* 27 */ { R"(TOTAL NUMBER OF BINS USED\s*:\s*(.+))", 1, "refine_ls_shell", { "pdbx_total_number_of_bins_used" } },
/* 28 */ { R"(BIN RESOLUTION RANGE HIGH \(A\)\s*:\s*(.+))", 1, "refine_ls_shell", { "d_res_high" } },
/* 29 */ { R"(BIN RESOLUTION RANGE LOW \(A\)\s*:\s*(.+))", 1, "refine_ls_shell", { "d_res_low" } },
/* 30 */ { R"(BIN COMPLETENESS \(WORKING\+TEST\) \(%\)\s*:\s*(.+))", 1, "refine_ls_shell", { "percent_reflns_obs" } },
/* 31 */ { R"(REFLECTIONS IN BIN \(WORKING SET\)\s*:\s*(.+))", 1, "refine_ls_shell", { "number_reflns_R_work" } },
/* 32 */ { R"(BIN R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine_ls_shell", { "R_factor_R_work" } },
/* 33 */ { R"(BIN FREE R VALUE\s*:\s*(.+))", 1, "refine_ls_shell", { "R_factor_R_free" } },
/* 34 */ { R"(BIN FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine_ls_shell", { "percent_reflns_R_free" } },
/* 35 */ { R"(BIN FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine_ls_shell", { "number_reflns_R_free" } },
/* 36 */ { R"(ESTIMATED ERROR OF BIN FREE R VALUE\s*:\s*(.+))", 1, "refine_ls_shell", { "R_factor_R_free_error" } },
/* 37 */ { R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
/* 38 */ { R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
/* 39 */ { R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
/* 40 */ { R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
/* 41 */ { R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
/* 42 */ { R"(B VALUES\.)", 1 },
/* 43 */ { R"(B VALUE TYPE\s*:\s*(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
/* 44 */ { R"(FROM WILSON PLOT \(A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
/* 45 */ { R"(MEAN B VALUE \(OVERALL, A\*\*2\)\s*:\s*(.+))", 1, "refine", { "B_iso_mean" } },
/* 46 */ { R"(OVERALL ANISOTROPIC B VALUE\.)", 1 },
/* 47 */ { R"(B11 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][1]" } },
/* 48 */ { R"(B22 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][2]" } },
/* 49 */ { R"(B33 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[3][3]" } },
/* 50 */ { R"(B12 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][2]" } },
/* 51 */ { R"(B13 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][3]" } },
/* 52 */ { R"(B23 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][3]" } },
/* 53 */ { R"(ESTIMATED COORDINATE ERROR\.)", 1 },
/* 54 */ { R"(ESD FROM LUZZATI PLOT \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_coordinate_error_obs" } },
/* 55 */ { R"(ESD FROM SIGMAA \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_sigma_a_obs" } },
/* 56 */ { R"(LOW RESOLUTION CUTOFF \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_d_res_low_obs" } },
/* 57 */ { R"(CROSS-VALIDATED ESTIMATED COORDINATE ERROR\.)", 1 },
/* 58 */ { R"(ESD FROM C-V LUZZATI PLOT \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_coordinate_error_free" } },
/* 59 */ { R"(ESD FROM C-V SIGMAA \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_sigma_a_free" } },
/* 60 */ { R"(RMS DEVIATIONS FROM IDEAL VALUES\.)", 1 },
/* 61 */ { R"(BOND LENGTHS \(A\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "c_bond_d", false },
/* 62 */ { R"(BOND ANGLES \(DEGREES\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "c_angle_deg", false },
/* 63 */ { R"(DIHEDRAL ANGLES \(DEGREES\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "c_dihedral_angle_d", false },
/* 64 */ { R"(IMPROPER ANGLES \(DEGREES\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "c_improper_angle_d", false },
/* 65 */ { R"(ISOTROPIC THERMAL MODEL\s*:\s*(.+))", 1, "refine", { "pdbx_isotropic_thermal_model" } },
/* 66 */ { R"(ISOTROPIC THERMAL FACTOR RESTRAINTS\. RMS SIGMA)", 1 },
/* 67 */ { R"(MAIN-CHAIN BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "c_mcbond_it", false },
/* 68 */ { R"(MAIN-CHAIN ANGLE \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "c_mcangle_it", false },
/* 69 */ { R"(SIDE-CHAIN BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "c_scbond_it", false },
/* 70 */ { R"(SIDE-CHAIN ANGLE \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "c_scangle_it", false },
/* 71 */ { R"(BULK SOLVENT MODELING\.)", 1 },
/* 72 */ { R"(METHOD USED\s*:\s*(.+))", 1, "refine", { "solvent_model_details" } },
/* 73 */ { R"(KSOL\s*:\s*(.+))", 1, "refine", { "solvent_model_param_ksol" } },
/* 74 */ { R"(BSOL\s*:\s*(.+))", 1, "refine", { "solvent_model_param_bsol" } },
/* 75 */ { R"(NCS MODEL\s*:\s*(.+))", 1, /* "refine_ls_restr_ncs", { "ncs_model_details" } */ },
/* 76 */ { R"(NCS RESTRAINTS\. RMS SIGMA/WEIGHT)", 1 },
/* 77 */ { R"(GROUP (\d+) POSITIONAL \(A\)\s*:\s*(.+))", 1, /* "refine_ls_restr_ncs", { "dom_id", "rms_dev_position", "weight_position" } */ },
/* 78 */ { R"(GROUP (\d+) B-FACTOR \(A\*\*2\)\s*:\s*(.+))", 1, /* "refine_ls_restr_ncs", { "dom_id", "rms_dev_B_iso", "weight_B_iso" } */ },
/* 79 */ { R"(PARAMETER FILE (\d+) :\s+(.+))", 1, /* "pdbx_xplor_file", { "serial_no", "param_file" } */ },
/* 80 */ { R"(TOPOLOGY FILE (\d+) :\s+(.+))", 1, /* "pdbx_xplor_file", { "serial_no", "topol_file" } */ },
};
class CNS_Remark3Parser : public Remark3Parser
{
public:
CNS_Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::Datablock& db)
: Remark3Parser(name, expMethod, r, db, kCNS_Template,
sizeof(kCNS_Template) / sizeof(TemplateLine), std::regex(R"((CN[SX])(?: (\d+(?:\.\d+)?))?)")) {}
};
const TemplateLine kPHENIX_Template[] = {
/* 0 */ { R"(REFINEMENT TARGET\s*:\s*(.+))", 1, "refine", { "pdbx_stereochemistry_target_values" } },
/* 1 */ { R"(DATA USED IN REFINEMENT\.)", 1 },
/* 2 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
/* 3 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
/* 4 */ { R"(MIN\(FOBS/SIGMA_FOBS\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
/* 5 */ { R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
/* 6 */ { R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
/* 7 */ { R"(FIT TO DATA USED IN REFINEMENT\.)", 1 },
/* 8 */ { R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
/* 9 */ { R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
/* 10 */ { R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
/* 11 */ { R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
/* 12 */ { R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
/* 13 */ { R"(FIT TO DATA USED IN REFINEMENT \(IN BINS\)\.)", 1 },
/* 14 */ { R"(BIN RESOLUTION RANGE COMPL\. NWORK NFREE RWORK RFREE)", 1 },
/* 15 */ { R"(\d+ (\d+(?:\.\d+)?) - (\d+(?:\.\d+)?) (\d+(?:\.\d+)?) (\d+) (\d+) (\d+(?:\.\d+)?) (\d+(?:\.\d+)?))", 0,
"refine_ls_shell", { "d_res_low", "d_res_high", "percent_reflns_obs", "number_reflns_R_work", "number_reflns_R_free", "R_factor_R_work", "R_factor_R_free" },
nullptr, true },
/* 16 */ { R"(BULK SOLVENT MODELLING\.)", 1 },
/* 17 */ { R"(METHOD USED\s*:\s*(.+))", 1, "refine", { "solvent_model_details" } },
/* 18 */ { R"(SOLVENT RADIUS\s*:\s*(.+))", 1, "refine", { "pdbx_solvent_vdw_probe_radii" } },
/* 19 */ { R"(SHRINKAGE RADIUS\s*:\s*(.+))", 1, "refine", { "pdbx_solvent_shrinkage_radii" } },
/* 20 */ { R"(K_SOL\s*:\s*(.+))", 1, "refine", { "solvent_model_param_ksol" } },
/* 21 */ { R"(B_SOL\s*:\s*(.+))", 1, "refine", { "solvent_model_param_bsol" } },
/* 22 */ { R"(ERROR ESTIMATES\.)", 1 },
/* 23 */ { R"(COORDINATE ERROR \(MAXIMUM-LIKELIHOOD BASED\)\s*:\s*(.+))", 1, "refine", { "overall_SU_ML" } },
/* 24 */ { R"(PHASE ERROR \(DEGREES, MAXIMUM-LIKELIHOOD BASED\)\s*:\s*(.+))", 1, "refine", { "pdbx_overall_phase_error" } },
/* 25 */ { R"(B VALUES\.)", 1 },
/* 26 */ { R"(B VALUE TYPE\s*:\s*(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
/* 27 */ { R"(FROM WILSON PLOT \(A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
/* 28 */ { R"(MEAN B VALUE \(OVERALL, A\*\*2\)\s*:\s*(.+))", 1, "refine", { "B_iso_mean" } },
/* 29 */ { R"(OVERALL ANISOTROPIC B VALUE\.)", 1 },
/* 30 */ { R"(B11 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][1]" } },
/* 31 */ { R"(B22 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][2]" } },
/* 32 */ { R"(B33 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[3][3]" } },
/* 33 */ { R"(B12 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][2]" } },
/* 34 */ { R"(B13 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][3]" } },
/* 35 */ { R"(B23 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][3]" } },
/* 36 */ { R"(TWINNING INFORMATION\.)", 1 },
/* 37 */ { R"(FRACTION:\s*(.+))", 1, "pdbx_reflns_twin", { "fraction" } },
/* 38 */ { R"(OPERATOR:\s*(.+))", 1, "pdbx_reflns_twin", { "operator" } },
/* 39 */ { R"(DEVIATIONS FROM IDEAL VALUES\.)", 1 },
/* 40 */ { R"(RMSD COUNT)", 1 },
/* 41 */ { R"(BOND\s*:\s*(\d+(?:\.\d+))\s+(\d+))", 1, "refine_ls_restr", { "dev_ideal", "number" }, "f_bond_d", false },
/* 42 */ { R"(ANGLE\s*:\s*(\d+(?:\.\d+))\s+(\d+))", 1, "refine_ls_restr", { "dev_ideal", "number" }, "f_angle_d", false },
/* 43 */ { R"(CHIRALITY\s*:\s*(\d+(?:\.\d+))\s+(\d+))", 1, "refine_ls_restr", { "dev_ideal", "number" }, "f_chiral_restr", false },
/* 44 */ { R"(PLANARITY\s*:\s*(\d+(?:\.\d+))\s+(\d+))", 1, "refine_ls_restr", { "dev_ideal", "number" }, "f_plane_restr", false },
/* 45 */ { R"(DIHEDRAL\s*:\s*(\d+(?:\.\d+))\s+(\d+))", 1, "refine_ls_restr", { "dev_ideal", "number" }, "f_dihedral_angle_d", false },
/* 46 */ { R"(TLS DETAILS)", 1 },
/* 47 */ { R"(NUMBER OF TLS GROUPS\s*:\s*(.+))", 1 },
/* 48 */ { R"(TLS GROUP\s*:\s*(.+))", 1, "pdbx_refine_tls", { "id" }, nullptr, true },
/* 49 */ { R"(SELECTION:\s*(.+))", 1, "pdbx_refine_tls_group", { "selection_details" }, nullptr, true },
/* 50 */ { R"(ORIGIN FOR THE GROUP(?:\s*\(A\))?\s*:\s*(\S+)\s+(\S+)\s+(\S+))", 1, "pdbx_refine_tls", { "origin_x", "origin_y", "origin_z" } },
/* 51 */ { R"(T TENSOR)", 1 },
/* 52 */ { R"(T11\s*:\s*(.+) T22\s*:\s*(.+))", 1, "pdbx_refine_tls", { "T[1][1]", "T[2][2]" } },
/* 53 */ { R"(T33\s*:\s*(.+) T12\s*:\s*(.+))", 1, "pdbx_refine_tls", { "T[3][3]", "T[1][2]" } },
/* 54 */ { R"(T13\s*:\s*(.+) T23\s*:\s*(.+))", 1, "pdbx_refine_tls", { "T[1][3]", "T[2][3]" } },
/* 55 */ { R"(L TENSOR)", 1 },
/* 56 */ { R"(L11\s*:\s*(.+) L22\s*:\s*(.+))", 1, "pdbx_refine_tls", { "L[1][1]", "L[2][2]" } },
/* 57 */ { R"(L33\s*:\s*(.+) L12\s*:\s*(.+))", 1, "pdbx_refine_tls", { "L[3][3]", "L[1][2]" } },
/* 58 */ { R"(L13\s*:\s*(.+) L23\s*:\s*(.+))", 1, "pdbx_refine_tls", { "L[1][3]", "L[2][3]" } },
/* 59 */ { R"(S TENSOR)", 1 },
/* 60 */ { R"(S11\s*:\s*(.+) S12\s*:\s*(.+) S13\s*:\s*(.+))", 1, "pdbx_refine_tls", { "S[1][1]", "S[1][2]", "S[1][3]" } },
/* 61 */ { R"(S21\s*:\s*(.+) S22\s*:\s*(.+) S23\s*:\s*(.+))", 1, "pdbx_refine_tls", { "S[2][1]", "S[2][2]", "S[2][3]" } },
/* 62 */ { R"(S31\s*:\s*(.+) S32\s*:\s*(.+) S33\s*:\s*(.+))", 48 - 62, "pdbx_refine_tls", { "S[3][1]", "S[3][2]", "S[3][3]" } },
/* 63 */ { R"(ANOMALOUS SCATTERER GROUPS DETAILS\.)", 1 },
/* 64 */ { R"(NUMBER OF ANOMALOUS SCATTERER GROUPS\s*:\s*\d+)", 1 },
/* 65 */ { R"(ANOMALOUS SCATTERER GROUP\s*:\s*\d+)", 1 },
/* 66 */ { R"(SELECTION: .+)", 1 },
/* 67 */ { R"(fp\s*:\s*.+)", 1 },
/* 68 */ { R"(fdp\s*:\s*.+)", 63 - 68 },
/* 69 */ { R"(NCS DETAILS)", 1 },
/* 70 */ { R"(NUMBER OF NCS GROUPS\s*:\s*(.+))", 1 },
};
class PHENIX_Remark3Parser : public Remark3Parser
{
public:
PHENIX_Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::Datablock& db)
: Remark3Parser(name, expMethod, r, db, kPHENIX_Template, sizeof(kPHENIX_Template) / sizeof(TemplateLine),
std::regex(R"((PHENIX)(?: \(PHENIX\.REFINE:) (\d+(?:\.[^)]+)?)\)?)")) {}
virtual void fixup();
};
void PHENIX_Remark3Parser::fixup()
{
for (auto r: mDb["refine_ls_shell"])
{
try
{
float val = r["percent_reflns_obs"].as<float>();
int perc = static_cast<int>(val * 100);
r["percent_reflns_obs"] = perc;
}
catch (...) {}
}
}
const TemplateLine kNUCLSQ_Template[] = {
/* 0 */ { R"(DATA USED IN REFINEMENT\.)", 1 },
/* 1 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
/* 2 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
/* 3 */ { R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
/* 4 */ { R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
/* 5 */ { R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
/* 6 */ { R"(FIT TO DATA USED IN REFINEMENT\.)", 1 },
/* 7 */ { R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
/* 8 */ { R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
/* 9 */ { R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
/* 10 */ { R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
/* 11 */ { R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
/* 12 */ { R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
/* 13 */ { R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
/* 14 */ { R"(FIT/AGREEMENT OF MODEL WITH ALL DATA\.)", 1 },
/* 15 */ { R"(R VALUE \(WORKING \+ TEST SET, NO CUTOFF\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_all" } },
/* 16 */ { R"(R VALUE \(WORKING SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_obs_no_cutoff" } },
/* 17 */ { R"(FREE R VALUE \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_factor_no_cutoff" } },
/* 18 */ { R"(FREE R VALUE TEST SET SIZE \(%, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_size_perc_no_cutoff" } },
/* 19 */ { R"(FREE R VALUE TEST SET COUNT \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_ct_no_cutoff" } },
/* 20 */ { R"(TOTAL NUMBER OF REFLECTIONS \(NO CUTOFF\)\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_all" } },
/* 21 */ { R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
/* 22 */ { R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
/* 23 */ { R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
/* 24 */ { R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
/* 25 */ { R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
/* 26 */ { R"(B VALUES\.)", 1 },
/* 27 */ { R"(B VALUE TYPE\s*:\s*(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
/* 28 */ { R"(FROM WILSON PLOT \(A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
/* 29 */ { R"(MEAN B VALUE \(OVERALL, A\*\*2\)\s*:\s*(.+))", 1, "refine", { "B_iso_mean" } },
/* 30 */ { R"(OVERALL ANISOTROPIC B VALUE\.)", 1 },
/* 31 */ { R"(B11 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][1]" } },
/* 32 */ { R"(B22 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][2]" } },
/* 33 */ { R"(B33 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[3][3]" } },
/* 34 */ { R"(B12 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][2]" } },
/* 35 */ { R"(B13 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][3]" } },
/* 36 */ { R"(B23 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][3]" } },
/* 37 */ { R"(ESTIMATED COORDINATE ERROR\.)", 1 },
/* 38 */ { R"(ESD FROM LUZZATI PLOT \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_coordinate_error_obs" } },
/* 39 */ { R"(ESD FROM SIGMAA \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_sigma_a_obs" } },
/* 40 */ { R"(LOW RESOLUTION CUTOFF \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_d_res_low_obs" } },
/* 41 */ { R"(RMS DEVIATIONS FROM IDEAL VALUES\.)", 1 },
/* 42 */ { R"(DISTANCE RESTRAINTS\. RMS SIGMA)", 1 },
/* 43 */ { R"(SUGAR-BASE BOND DISTANCE \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_sugar_bond_d", false },
/* 44 */ { R"(SUGAR-BASE BOND ANGLE DISTANCE \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_sugar_bond_angle_d", false },
/* 45 */ { R"(PHOSPHATE BONDS DISTANCE \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_phos_bond_d", false },
/* 46 */ { R"(PHOSPHATE BOND ANGLE, H-BOND \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_phos_bond_angle_d", false },
/* 47 */ { R"(PLANE RESTRAINT \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_plane_restr", false },
/* 48 */ { R"(CHIRAL-CENTER RESTRAINT \(A\*\*3\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_chiral_restr", false },
/* 49 */ { R"(NON-BONDED CONTACT RESTRAINTS\.)", 1 },
/* 50 */ { R"(SINGLE TORSION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_singtor_nbd", false },
/* 51 */ { R"(MULTIPLE TORSION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_multtor_nbd", false },
/* 59 */ { R"(ISOTROPIC THERMAL FACTOR RESTRAINTS\. RMS SIGMA)", 1 },
/* 60 */ { R"(SUGAR-BASE BONDS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_sugar_bond_it", false },
/* 61 */ { R"(SUGAR-BASE ANGLES \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_sugar_angle_it", false },
/* 62 */ { R"(PHOSPHATE BONDS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_phos_bond_it", false },
/* 63 */ { R"(PHOSPHATE BOND ANGLE, H-BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "n_phos_angle_it", false },
};
class NUCLSQ_Remark3Parser : public Remark3Parser
{
public:
NUCLSQ_Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::Datablock& db)
: Remark3Parser(name, expMethod, r, db, kNUCLSQ_Template, sizeof(kNUCLSQ_Template) / sizeof(TemplateLine),
std::regex(R"((NUCLSQ)(?: (\d+(?:\.\d+)?))?)")) {}
virtual void fixup()
{
for (auto r: mDb["refine_hist"])
{
try
{
int p, n, h, s;
cif::tie(p, n, h, s) = r.get("pdbx_number_atoms_protein", "pdbx_number_atoms_nucleic_acid", "pdbx_number_atoms_ligand", "number_atoms_solvent");
r["number_atoms_total"] = p + n + h + s;
}
catch (...) {}
}
}
};
const TemplateLine kPROLSQ_Template[] = {
/* 0 */ { R"(DATA USED IN REFINEMENT\.)", 1 },
/* 1 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
/* 2 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
/* 3 */ { R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
/* 4 */ { R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
/* 5 */ { R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
/* 6 */ { R"(FIT TO DATA USED IN REFINEMENT\.)", 1 },
/* 7 */ { R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
/* 8 */ { R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
/* 9 */ { R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
/* 10 */ { R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
/* 11 */ { R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
/* 12 */ { R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
/* 13 */ { R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
/* 14 */ { R"(FIT/AGREEMENT OF MODEL WITH ALL DATA\.)", 1 },
/* 15 */ { R"(R VALUE \(WORKING \+ TEST SET, NO CUTOFF\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_all" } },
/* 16 */ { R"(R VALUE \(WORKING SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_obs_no_cutoff" } },
/* 17 */ { R"(FREE R VALUE \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_factor_no_cutoff" } },
/* 18 */ { R"(FREE R VALUE TEST SET SIZE \(%, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_size_perc_no_cutoff" } },
/* 19 */ { R"(FREE R VALUE TEST SET COUNT \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_ct_no_cutoff" } },
/* 20 */ { R"(TOTAL NUMBER OF REFLECTIONS \(NO CUTOFF\)\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_all" } },
/* 21 */ { R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
/* 22 */ { R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
/* 23 */ { R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
/* 24 */ { R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
/* 25 */ { R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
/* 26 */ { R"(B VALUES\.)", 1 },
/* 27 */ { R"(B VALUE TYPE\s*:\s*(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
/* 28 */ { R"(FROM WILSON PLOT \(A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
/* 29 */ { R"(MEAN B VALUE \(OVERALL, A\*\*2\)\s*:\s*(.+))", 1, "refine", { "B_iso_mean" } },
/* 30 */ { R"(OVERALL ANISOTROPIC B VALUE\.)", 1 },
/* 31 */ { R"(B11 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][1]" } },
/* 32 */ { R"(B22 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][2]" } },
/* 33 */ { R"(B33 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[3][3]" } },
/* 34 */ { R"(B12 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][2]" } },
/* 35 */ { R"(B13 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][3]" } },
/* 36 */ { R"(B23 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][3]" } },
/* 37 */ { R"(ESTIMATED COORDINATE ERROR\.)", 1 },
/* 38 */ { R"(ESD FROM LUZZATI PLOT \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_coordinate_error_obs" } },
/* 39 */ { R"(ESD FROM SIGMAA \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_sigma_a_obs" } },
/* 40 */ { R"(LOW RESOLUTION CUTOFF \(A\)\s*:\s*(.+))", 1, "refine_analyze", { "Luzzati_d_res_low_obs" } },
/* 41 */ { R"(RMS DEVIATIONS FROM IDEAL VALUES\.)", 1 },
/* 42 */ { R"(DISTANCE RESTRAINTS\. RMS SIGMA)", 1 },
/* 43 */ { R"(BOND LENGTH \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_bond_d", false },
/* 44 */ { R"(ANGLE DISTANCE \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_angle_d", false },
/* 45 */ { R"(INTRAPLANAR 1-4 DISTANCE \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_planar_d", false },
/* 46 */ { R"(H-BOND OR METAL COORDINATION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_hb_or_metal_coord", false },
/* 47 */ { R"(PLANE RESTRAINT \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_plane_restr", false },
/* 48 */ { R"(CHIRAL-CENTER RESTRAINT \(A\*\*3\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_chiral_restr", false },
/* 49 */ { R"(NON-BONDED CONTACT RESTRAINTS\.)", 1 },
/* 50 */ { R"(SINGLE TORSION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_singtor_nbd", false },
/* 51 */ { R"(MULTIPLE TORSION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_multtor_nbd", false },
/* 52 */ { R"(H-BOND \(X\.\.\.Y\) \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_xyhbond_nbd", false },
/* 53 */ { R"(H-BOND \(X-H\.\.\.Y\) \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_xhyhbond_nbd", false },
/* 54 */ { R"(CONFORMATIONAL TORSION ANGLE RESTRAINTS\.)", 1 },
/* 55 */ { R"(SPECIFIED \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_special_tor", false },
/* 56 */ { R"(PLANAR \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_planar_tor", false },
/* 57 */ { R"(STAGGERED \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_staggered_tor", false },
/* 58 */ { R"(TRANSVERSE \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_transverse_tor", false },
/* 59 */ { R"(ISOTROPIC THERMAL FACTOR RESTRAINTS\. RMS SIGMA)", 1 },
/* 60 */ { R"(MAIN-CHAIN BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_mcbond_it", false },
/* 61 */ { R"(MAIN-CHAIN ANGLE \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_mcangle_it", false },
/* 62 */ { R"(SIDE-CHAIN BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_scbond_it", false },
/* 63 */ { R"(SIDE-CHAIN ANGLE \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_scangle_it", false },
};
class PROLSQ_Remark3Parser : public Remark3Parser
{
public:
PROLSQ_Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::Datablock& db)
: Remark3Parser(name, expMethod, r, db, kPROLSQ_Template, sizeof(kPROLSQ_Template) / sizeof(TemplateLine),
std::regex(R"((PROLSQ)(?: (\d+(?:\.\d+)?))?)")) {}
virtual void fixup()
{
for (auto r: mDb["refine_hist"])
{
try
{
int p, n, h, s;
cif::tie(p, n, h, s) = r.get("pdbx_number_atoms_protein", "pdbx_number_atoms_nucleic_acid", "pdbx_number_atoms_ligand", "number_atoms_solvent");
r["number_atoms_total"] = p + n + h + s;
}
catch (...) {}
}
}
};
const TemplateLine kREFMAC_Template[] = {
/* 0 */ { "DATA USED IN REFINEMENT.", 1 },
/* 1 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
/* 3 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
/* 4 */ { R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
/* 5 */ { R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
/* 6 */ { R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
/* 7 */ { R"(FIT TO DATA USED IN REFINEMENT.)", 1 },
/* 8 */ { R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
/* 9 */ { R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
/* 10 */ { R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
/* 11 */ { R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
/* 12 */ { R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
/* 13 */ { R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
/* 14 */ { R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
/* 15 */ { R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT.)", 1 },
/* 16 */ { R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
/* 17 */ { R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
/* 18 */ { R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
/* 19 */ { R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
/* 20 */ { R"(ALL ATOMS\s*:\s*(.+))", 1, /* "refine_hist", "pdbx_number_atoms_protein" */ },
/* 21 */ { R"(B VALUES\..*)", 1 },
/* 22 */ { R"(B VALUE TYPE\s*:\s*(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
/* 23 */ { R"(FROM WILSON PLOT \(A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
/* 24 */ { R"(MEAN B VALUE \(OVERALL, A\*\*2\)\s*:\s*(.+))", 1, "refine", { "B_iso_mean" } },
/* 25 */ { R"(OVERALL ANISOTROPIC B VALUE.)", 1 },
/* 26 */ { R"(B11 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][1]" } },
/* 27 */ { R"(B22 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][2]" } },
/* 28 */ { R"(B33 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[3][3]" } },
/* 29 */ { R"(B12 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][2]" } },
/* 30 */ { R"(B13 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][3]" } },
/* 31 */ { R"(B23 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][3]" } },
/* 32 */ { R"(ESTIMATED OVERALL COORDINATE ERROR.)", 1 },
/* 33 */ { R"(ESU BASED ON R VALUE(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine", { "pdbx_overall_ESU_R" } },
/* 34 */ { R"(ESU BASED ON FREE R VALUE(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine", { "pdbx_overall_ESU_R_Free" } },
/* 35 */ { R"(ESU BASED ON MAXIMUM LIKELIHOOD(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine", { "overall_SU_ML" } },
/* 36 */ { R"(ESU FOR B VALUES BASED ON MAXIMUM LIKELIHOOD \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "overall_SU_B" } },
/* 37 */ { R"(RMS DEVIATIONS FROM IDEAL VALUES.)", 1 },
/* 38 */ { R"(DISTANCE RESTRAINTS. RMS SIGMA)", 1 },
/* 39 */ { R"(BOND LENGTH \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_bond_d", false },
/* 40 */ { R"(ANGLE DISTANCE \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_angle_d", false },
/* 41 */ { R"(INTRAPLANAR 1-4 DISTANCE \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_planar_d", false },
/* 42 */ { R"(H-BOND OR METAL COORDINATION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_hb_or_metal_coord", false },
/* 43 */ { R"(PLANE RESTRAINT \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_plane_restr", false },
/* 44 */ { R"(CHIRAL-CENTER RESTRAINT \(A\*\*3\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_chiral_restr", false },
/* 45 */ { R"(NON-BONDED CONTACT RESTRAINTS.)", 1 },
/* 46 */ { R"(SINGLE TORSION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_singtor_nbd", false },
/* 47 */ { R"(MULTIPLE TORSION \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_multtor_nbd", false },
/* 48 */ { R"(H-BOND \(X\.\..Y\) \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_xyhbond_nbd", false },
/* 49 */ { R"(H-BOND \(X-H\.\.\.Y\) \(A\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_xhyhbond_nbd", false },
/* 50 */ { R"(CONFORMATIONAL TORSION ANGLE RESTRAINTS.)", 1 },
/* 51 */ { R"(SPECIFIED \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_special_tor", false },
/* 52 */ { R"(PLANAR \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_planar_tor", false },
/* 53 */ { R"(STAGGERED \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_staggered_tor", false },
/* 54 */ { R"(TRANSVERSE \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_transverse_tor", false },
/* 55 */ { R"(ISOTROPIC THERMAL FACTOR RESTRAINTS. RMS SIGMA)", 1 },
/* 56 */ { R"(MAIN-CHAIN BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_mcbond_it", false },
/* 57 */ { R"(MAIN-CHAIN ANGLE \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_mcangle_it", false },
/* 58 */ { R"(SIDE-CHAIN BOND \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_scbond_it", false },
/* 59 */ { R"(SIDE-CHAIN ANGLE \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "p_scangle_it", false },
};
class REFMAC_Remark3Parser : public Remark3Parser
{
public:
REFMAC_Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::Datablock& db)
: Remark3Parser(name, expMethod, r, db, kREFMAC_Template, sizeof(kREFMAC_Template) / sizeof(TemplateLine),
std::regex(".+")) {}
virtual std::string program() { return "REFMAC"; }
virtual std::string version() { return ""; }
};
const TemplateLine kREFMAC5_Template[] = {
/* 0 */ { R"(REFINEMENT TARGET\s*:\s*(.+))", 1, "refine", { "pdbx_stereochemistry_target_values" } },
/* 1 */ { R"(DATA USED IN REFINEMENT\.)", 1 },
/* 2 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
/* 3 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
/* 4 */ { R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
/* 5 */ { R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
/* 6 */ { R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
/* 7 */ { R"(FIT TO DATA USED IN REFINEMENT.)", 1 },
/* 8 */ { R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
/* 9 */ { R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
/* 10 */ { R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
/* 11 */ { R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
/* 12 */ { R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
/* 13 */ { R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
/* 14 */ { R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
/* 15 */ { R"(FIT IN THE HIGHEST RESOLUTION BIN.)", 1 },
/* 16 */ { R"(TOTAL NUMBER OF BINS USED\s*:\s*(.+))", 1, "refine_ls_shell", { "pdbx_total_number_of_bins_used" } },
/* 17 */ { R"(BIN RESOLUTION RANGE HIGH(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine_ls_shell", { "d_res_high" } },
/* 18 */ { R"(BIN RESOLUTION RANGE LOW(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine_ls_shell", { "d_res_low" } },
/* 19 */ { R"(REFLECTION IN BIN \(WORKING SET\)\s*:\s*(.+))", 1, "refine_ls_shell", { "number_reflns_R_work" } },
/* 20 */ { R"(BIN COMPLETENESS \(WORKING\+TEST\) \(%\)\s*:\s*(.+))", 1, "refine_ls_shell", { "percent_reflns_obs" } },
/* 21 */ { R"(BIN R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine_ls_shell", { "R_factor_R_work" } },
/* 22 */ { R"(BIN FREE R VALUE SET COUNT\s*:\s*(.+))", 1, "refine_ls_shell", { "number_reflns_R_free" } },
/* 23 */ { R"(BIN FREE R VALUE\s*:\s*(.+))", 1, "refine_ls_shell", { "R_factor_R_free" } },
/* 24 */ { R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT.)", 1 },
/* 25 */ { R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
/* 26 */ { R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
/* 27 */ { R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
/* 28 */ { R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
/* 29 */ { R"(ALL ATOMS\s*:\s*(.+))", 1, /* "refine_hist", { "pdbx_number_atoms_protein" } */ },
/* 30 */ { R"(B VALUES\..*)", 1 },
/* 31 */ { R"(B VALUE TYPE\s*:\s*(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
/* 32 */ { R"(FROM WILSON PLOT \(A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
/* 33 */ { R"(MEAN B VALUE \(OVERALL, A\*\*2\)\s*:\s*(.+))", 1, "refine", { "B_iso_mean" } },
/* 34 */ { R"(OVERALL ANISOTROPIC B VALUE.)", 1 },
/* 35 */ { R"(B11 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][1]" } },
/* 36 */ { R"(B22 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][2]" } },
/* 37 */ { R"(B33 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[3][3]" } },
/* 38 */ { R"(B12 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][2]" } },
/* 39 */ { R"(B13 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[1][3]" } },
/* 40 */ { R"(B23 \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "aniso_B[2][3]" } },
/* 41 */ { R"(ESTIMATED OVERALL COORDINATE ERROR.)", 1 },
/* 42 */ { R"(ESU BASED ON R VALUE(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine", { "pdbx_overall_ESU_R" } },
/* 43 */ { R"(ESU BASED ON FREE R VALUE(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine", { "pdbx_overall_ESU_R_Free" } },
/* 44 */ { R"(ESU BASED ON MAXIMUM LIKELIHOOD(?:\s*\(A\))?\s*:\s*(.+))", 1, "refine", { "overall_SU_ML" } },
/* 45 */ { R"(ESU FOR B VALUES BASED ON MAXIMUM LIKELIHOOD \(A\*\*2\)\s*:\s*(.+))", 1, "refine", { "overall_SU_B" } },
/* 46 */ { R"(CORRELATION COEFFICIENTS.)", 1 },
/* 47 */ { R"(CORRELATION COEFFICIENT FO-FC\s*:\s*(.+))", 1, "refine", { "correlation_coeff_Fo_to_Fc" } },
/* 48 */ { R"(CORRELATION COEFFICIENT FO-FC FREE\s*:\s*(.+))", 1, "refine", { "correlation_coeff_Fo_to_Fc_free" } },
/* 49 */ { R"(RMS DEVIATIONS FROM IDEAL VALUES COUNT RMS WEIGHT)", 1 },
/* 50 */ { R"(BOND LENGTHS REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_bond_refined_d", false },
/* 51 */ { R"(BOND LENGTHS OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_bond_other_d", false },
/* 52 */ { R"(BOND ANGLES REFINED ATOMS \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_angle_refined_deg", false },
/* 53 */ { R"(BOND ANGLES OTHERS \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_angle_other_deg", false },
/* 54 */ { R"(TORSION ANGLES, PERIOD 1 \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_dihedral_angle_1_deg", false },
/* 55 */ { R"(TORSION ANGLES, PERIOD 2 \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_dihedral_angle_2_deg", false },
/* 56 */ { R"(TORSION ANGLES, PERIOD 3 \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_dihedral_angle_3_deg", false },
/* 57 */ { R"(TORSION ANGLES, PERIOD 4 \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_dihedral_angle_4_deg", false },
/* 58 */ { R"(CHIRAL-CENTER RESTRAINTS \(A\*\*3\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_chiral_restr", false },
/* 59 */ { R"(GENERAL PLANES REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_gen_planes_refined", false },
/* 60 */ { R"(GENERAL PLANES OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_gen_planes_other", false },
/* 61 */ { R"(NON-BONDED CONTACTS REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_nbd_refined", false },
/* 62 */ { R"(NON-BONDED CONTACTS OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_nbd_other", false },
/* 63 */ { R"(NON-BONDED TORSION REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_nbtor_refined", false },
/* 64 */ { R"(NON-BONDED TORSION OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_nbtor_other", false },
/* 65 */ { R"(H-BOND \(X...Y\) REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_xyhbond_nbd_refined", false },
/* 66 */ { R"(H-BOND \(X...Y\) OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_xyhbond_nbd_other", false },
/* 67 */ { R"(POTENTIAL METAL-ION REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_metal_ion_refined", false },
/* 68 */ { R"(POTENTIAL METAL-ION OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_metal_ion_other", false },
/* 69 */ { R"(SYMMETRY VDW REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_symmetry_vdw_refined", false },
/* 70 */ { R"(SYMMETRY VDW OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_symmetry_vdw_other", false },
/* 71 */ { R"(SYMMETRY H-BOND REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_symmetry_hbond_refined", false },
/* 72 */ { R"(SYMMETRY H-BOND OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_symmetry_hbond_other", false },
/* 73 */ { R"(SYMMETRY METAL-ION REFINED ATOMS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_symmetry_metal_ion_refined", false },
/* 74 */ { R"(SYMMETRY METAL-ION OTHERS(?:\s*\(A\))?\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_symmetry_metal_ion_other", false },
/* 75 */ { R"(ISOTROPIC THERMAL FACTOR RESTRAINTS. COUNT RMS WEIGHT)", 1 },
/* 76 */ { R"(MAIN-CHAIN BOND REFINED ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_mcbond_it", false },
/* 77 */ { R"(MAIN-CHAIN BOND OTHER ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_mcbond_other", false },
/* 78 */ { R"(MAIN-CHAIN ANGLE REFINED ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_mcangle_it", false },
/* 79 */ { R"(MAIN-CHAIN ANGLE OTHER ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_mcangle_other", false },
/* 80 */ { R"(SIDE-CHAIN BOND REFINED ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_scbond_it", false },
/* 81 */ { R"(SIDE-CHAIN BOND OTHER ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_scbond_other", false },
/* 82 */ { R"(SIDE-CHAIN ANGLE REFINED ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_scangle_it", false },
/* 83 */ { R"(SIDE-CHAIN ANGLE OTHER ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_scangle_other", false },
/* 84 */ { R"(LONG RANGE B REFINED ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_long_range_B_refined", false },
/* 85 */ { R"(LONG RANGE B OTHER ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_long_range_B_other", false },
/* 86 */ { R"(ANISOTROPIC THERMAL FACTOR RESTRAINTS. COUNT RMS WEIGHT)", 1 },
/* 87 */ { R"(RIGID-BOND RESTRAINTS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_rigid_bond_restr", false },
/* 88 */ { R"(SPHERICITY; FREE ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_sphericity_free", false },
/* 89 */ { R"(SPHERICITY; BONDED ATOMS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "number", "dev_ideal", "dev_ideal_target" }, "r_sphericity_bonded", false },
// Simply ignore NCS, you can ask Robbie why
/* 90 */ { R"(NCS RESTRAINTS STATISTICS)", 1 },
/* 91 */ { R"(NUMBER OF DIFFERENT NCS GROUPS\s*:\s*(.+))", 1 },
/* 92 */ { R"(NCS GROUP NUMBER\s*:\s*(\d+))", 1, /*"struct_ncs_dom", { "pdbx_ens_id" }*/ },
/* 93 */ { R"(CHAIN NAMES\s*:\s*(.+))", 1, /*"struct_ncs_dom", { "details" }*/ },
/* 94 */ { R"(NUMBER OF COMPONENTS NCS GROUP\s*:\s*(\d+))", 1 },
/* 95 */ { R"(COMPONENT C SSSEQI TO C SSSEQI CODE)", 1 },
//// This sucks.... The following line is fixed format
/* 97 */ { R"((\d+)\s+(.)\s+(\d+)(.)\s+(.)\s+(\d+)(.)\s+(.+))", 0 },//, "struct_ncs_dom_lim", { "pdbx_component_id", "beg_auth_asym_id", "beg_auth_seq_id", "beg_auth_icode", "end_auth_asym_id", "end_auth_seq_id", "end_auth_icode", "pdbx_refine_code" }, {}, 1 },
/* 98 */ { R"((\d+)\s+(.)\s+(\d+)\s+(.)\s+(\d+)\s+(.+))", 0 },//, "struct_ncs_dom_lim", { "pdbx_component_id", "beg_auth_asym_id", "beg_auth_seq_id", "end_auth_asym_id", "end_auth_seq_id", "pdbx_refine_code" }, {}, 1 },
/* 96 */ { R"(GROUP CHAIN COUNT RMS WEIGHT)", 1 }, /*, "refine_ls_restr_ncs", { "pdbx_type", "dom_id", "pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position", }*/
/* 99 */ { R"(TIGHT POSITIONAL\s+\d+\s+(.)\s+\(A\):\s+(\d+)\s*;\s*(\d+(?:\.\d*)?)\s*;\s*(\d+(?:\.\d*)?))", 0 },// , "refine_ls_restr_ncs", {"pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position"}, { "pdbx_type", "tight positional"}, 1 },
/* 100 */ { R"(MEDIUM POSITIONAL\s+\d+\s+(.)\s+\(A\):\s+(\d+)\s*;\s*(\d+(?:\.\d*)?)\s*;\s*(\d+(?:\.\d*)?))", 0 },// , "refine_ls_restr_ncs", {"pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position"}, { "pdbx_type", "medium positional"}, 1 },
/* 101 */ { R"(LOOSE POSITIONAL\s+\d+\s+(.)\s+\(A\):\s+(\d+)\s*;\s*(\d+(?:\.\d*)?)\s*;\s*(\d+(?:\.\d*)?))", 0 },// , "refine_ls_restr_ncs", {"pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position"}, { "pdbx_type", "loose positional"}, 1 },
/* 102 */ { R"(TIGHT THERMAL\s+\d+\s+(.)\s+\(A\*\*2\):\s+(\d+)\s*;\s*(\d+(?:\.\d*)?)\s*;\s*(\d+(?:\.\d*)?))", 0 },// , "refine_ls_restr_ncs", {"pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position"}, { "pdbx_type", "tight thermal", }, 1 },
/* 103 */ { R"(MEDIUM THERMAL\s+\d+\s+(.)\s+\(A\*\*2\):\s+(\d+)\s*;\s*(\d+(?:\.\d*)?)\s*;\s*(\d+(?:\.\d*)?))", 0 },// , "refine_ls_restr_ncs", {"pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position"}, { "pdbx_type", "medium thermal", }, 1 },
/* 104 */ { R"(LOOSE THERMAL\s+\d+\s+(.)\s+\(A\*\*2\):\s+(\d+)\s*;\s*(\d+(?:\.\d*)?)\s*;\s*(\d+(?:\.\d*)?))", 0 },// , "refine_ls_restr_ncs", {"pdbx_auth_asym_id", "pdbx_number", "rms_dev_position", "weight_position"}, { "pdbx_type", "loose thermal", }, 10 },
/* 105 */ { R"(NCS GROUP NUMBER\s*:\s*(\d+))", 93 - 105, /*"struct_ncs_dom", { "pdbx_ens_id" }*/ },
/* 106 */ { R"(TWIN DETAILS)", 1 },
/* 107 */ { R"(NUMBER OF TWIN DOMAINS\s*:\s*(\d*))", 1 },
/* 108 */ { R"(TWIN DOMAIN\s*:\s*(.+))", 1, "pdbx_reflns_twin", { "domain_id" }, nullptr, true },
/* 109 */ { R"(TWIN OPERATOR\s*:\s*(.+))", 1, "pdbx_reflns_twin", { "operator" } },
/* 110 */ { R"(TWIN FRACTION\s*:\s*(.+))", 108 - 110, "pdbx_reflns_twin", { "fraction" } },
/* 111 */ { R"(TLS DETAILS)", 1 },
/* 112 */ { R"(NUMBER OF TLS GROUPS\s*:\s*(.+))", 1 },
/* 113 */ { R"(TLS GROUP\s*:\s*(.+))", 1, "pdbx_refine_tls", { "id" }, nullptr, true },
/* 114 */ { R"(NUMBER OF COMPONENTS GROUP\s*:\s*(.+))", 1 },
/* 115 */ { R"(COMPONENTS C SSSEQI TO C SSSEQI)", 1 },
/* 116 */ { R"(RESIDUE RANGE\s*:\s+(\S+)\s+(\d*\S)\s+(\S+)\s+(\d*\S))", 0, "pdbx_refine_tls_group", { "beg_auth_asym_id", "beg_auth_seq_id", "end_auth_asym_id", "end_auth_seq_id" }, nullptr, true },
/* 117 */ { R"(ORIGIN FOR THE GROUP(?:\s*\(A\))?\s*:\s*([-+]?\d+(?:\.\d+)?)\s*([-+]?\d+(?:\.\d+)?)\s*([-+]?\d+(?:\.\d+)?))", 1, "pdbx_refine_tls", { "origin_x", "origin_y", "origin_z" } },
/* 118 */ { R"(T TENSOR)", 1 },
/* 119 */ { R"(T11\s*:\s*(.+) T22\s*:\s*(.+))", 1, "pdbx_refine_tls", { "T[1][1]", "T[2][2]" } },
/* 120 */ { R"(T33\s*:\s*(.+) T12\s*:\s*(.+))", 1, "pdbx_refine_tls", { "T[3][3]", "T[1][2]" } },
/* 121 */ { R"(T13\s*:\s*(.+) T23\s*:\s*(.+))", 1, "pdbx_refine_tls", { "T[1][3]", "T[2][3]" } },
/* 122 */ { R"(L TENSOR)", 1 },
/* 123 */ { R"(L11\s*:\s*(.+) L22\s*:\s*(.+))", 1, "pdbx_refine_tls", { "L[1][1]", "L[2][2]" } },
/* 124 */ { R"(L33\s*:\s*(.+) L12\s*:\s*(.+))", 1, "pdbx_refine_tls", { "L[3][3]", "L[1][2]" } },
/* 125 */ { R"(L13\s*:\s*(.+) L23\s*:\s*(.+))", 1, "pdbx_refine_tls", { "L[1][3]", "L[2][3]" } },
/* 126 */ { R"(S TENSOR)", 1 },
/* 127 */ { R"(S11\s*:\s*(.+) S12\s*:\s*(.+) S13\s*:\s*(.+))", 1, "pdbx_refine_tls", { "S[1][1]", "S[1][2]", "S[1][3]" } },
/* 128 */ { R"(S21\s*:\s*(.+) S22\s*:\s*(.+) S23\s*:\s*(.+))", 1, "pdbx_refine_tls", { "S[2][1]", "S[2][2]", "S[2][3]" } },
/* 129 */ { R"(S31\s*:\s*(.+) S32\s*:\s*(.+) S33\s*:\s*(.+))", 113 - 129, "pdbx_refine_tls", { "S[3][1]", "S[3][2]", "S[3][3]" } },
/* 130 */ { R"(BULK SOLVENT MODELLING.)", 1 },
/* 131 */ { R"(METHOD USED\s*:\s*(.+))", 1, "refine", { "solvent_model_details" } },
/* 132 */ { R"(PARAMETERS FOR MASK CALCULATION)", 1 },
/* 133 */ { R"(VDW PROBE RADIUS\s*:\s*(.+))", 1, "refine", { "pdbx_solvent_vdw_probe_radii" } },
/* 134 */ { R"(ION PROBE RADIUS\s*:\s*(.+))", 1, "refine", { "pdbx_solvent_ion_probe_radii" } },
/* 135 */ { R"(SHRINKAGE RADIUS\s*:\s*(.+))", 1, "refine", { "pdbx_solvent_shrinkage_radii" } },
};
class REFMAC5_Remark3Parser : public Remark3Parser
{
public:
REFMAC5_Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::Datablock& db)
: Remark3Parser(name, expMethod, r, db, kREFMAC5_Template, sizeof(kREFMAC5_Template) / sizeof(TemplateLine),
std::regex(R"((REFMAC)(?: (\d+(?:\..+)?))?)")) {}
};
const TemplateLine kSHELXL_Template[] = {
/* 0 */ { R"(DATA USED IN REFINEMENT\.)", 1 },
/* 1 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
/* 2 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
/* 3 */ { R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
/* 4 */ { R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
/* 5 */ { R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
/* 6 */ { R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
/* 7 */ { R"(FIT TO DATA USED IN REFINEMENT \(NO CUTOFF\)\.)", 1 },
/* 8 */ { R"(R VALUE \(WORKING \+ TEST SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_all_no_cutoff" } },
/* 9 */ { R"(R VALUE \(WORKING SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_obs_no_cutoff" } },
/* 10 */ { R"(FREE R VALUE \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_factor_no_cutoff" } },
/* 11 */ { R"(FREE R VALUE TEST SET SIZE \(%, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_size_perc_no_cutoff" } },
/* 12 */ { R"(FREE R VALUE TEST SET COUNT \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_ct_no_cutoff" } },
/* 13 */ { R"(TOTAL NUMBER OF REFLECTIONS \(NO CUTOFF\)\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_all" } },
/* 14 */ { R"(FIT/AGREEMENT OF MODEL FOR DATA WITH F>4SIG\(F\)\.)", 1 },
/* 15 */ { R"(R VALUE \(WORKING \+ TEST SET, F>4SIG\(F\)\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_all_4sig_cutoff" } },
/* 16 */ { R"(R VALUE \(WORKING SET, F>4SIG\(F\)\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_obs_4sig_cutoff" } },
/* 17 */ { R"(FREE R VALUE \(F>4SIG\(F\)\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_factor_4sig_cutoff" } },
/* 18 */ { R"(FREE R VALUE TEST SET SIZE \(%, F>4SIG\(F\)\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_size_perc_4sig_cutoff" } },
/* 19 */ { R"(FREE R VALUE TEST SET COUNT \(F>4SIG\(F\)\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_ct_4sig_cutoff" } },
/* 20 */ { R"(TOTAL NUMBER OF REFLECTIONS \(F>4SIG\(F\)\)\s*:\s*(.+))", 1, "pdbx_refine", { "number_reflns_obs_4sig_cutoff" } },
/* 21 */ { R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
/* 22 */ { R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
/* 23 */ { R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
/* 24 */ { R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
/* 25 */ { R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
/* 26 */ { R"(MODEL REFINEMENT\.)", 1 },
/* 27 */ { R"(OCCUPANCY SUM OF NON-HYDROGEN ATOMS\s*:\s*(.+))", 1, "refine_analyze", { "occupancy_sum_non_hydrogen" } },
/* 28 */ { R"(OCCUPANCY SUM OF HYDROGEN ATOMS\s*:\s*(.+))", 1, "refine_analyze", { "occupancy_sum_hydrogen" } },
/* 29 */ { R"(NUMBER OF DISCRETELY DISORDERED RESIDUES\s*:\s*(.+))", 1, "refine_analyze", { "number_disordered_residues" } },
/* 30 */ { R"(NUMBER OF LEAST-SQUARES PARAMETERS\s*:\s*(.+))", 1, "refine", { "ls_number_parameters" } },
/* 31 */ { R"(NUMBER OF RESTRAINTS\s*:\s*(.+))", 1, "refine", { "ls_number_restraints" } },
/* 32 */ { R"(RMS DEVIATIONS FROM RESTRAINT TARGET VALUES\.)", 1 },
/* 33 */ { R"(BOND LENGTHS \(A\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_bond_d", false },
/* 34 */ { R"(ANGLE DISTANCES \(A\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_angle_d", false },
/* 35 */ { R"(SIMILAR DISTANCES \(NO TARGET VALUES\) \(A\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_similar_dist", false },
/* 36 */ { R"(DISTANCES FROM RESTRAINT PLANES \(A\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_from_restr_planes", false },
/* 37 */ { R"(ZERO CHIRAL VOLUMES \(A\*\*3\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_zero_chiral_vol", false },
/* 38 */ { R"(NON-ZERO CHIRAL VOLUMES \(A\*\*3\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_non_zero_chiral_vol", false },
/* 39 */ { R"(ANTI-BUMPING DISTANCE RESTRAINTS \(A\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_anti_bump_dis_restr", false },
/* 40 */ { R"(RIGID-BOND ADP COMPONENTS \(A\*\*2\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_rigid_bond_adp_cmpnt", false },
/* 41 */ { R"(SIMILAR ADP COMPONENTS \(A\*\*2\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_similar_adp_cmpnt", false },
/* 42 */ { R"(APPROXIMATELY ISOTROPIC ADPS \(A\*\*2\)\s*:\s*(.+))", 1, "refine_ls_restr", { "dev_ideal" }, "s_approx_iso_adps", false },
/* 43 */ { R"(BULK SOLVENT MODELING\.)", 1 },
/* 44 */ { R"(METHOD USED\s*:\s*(.+))", 1, "refine", { "solvent_model_details" } },
/* 45 */ { R"(STEREOCHEMISTRY TARGET VALUES\s*:\s*(.+))", 1, "refine", { "pdbx_stereochemistry_target_values" } },
/* 46 */ { R"(SPECIAL CASE\s*:\s*(.+))", 1, "refine", { "pdbx_stereochem_target_val_spec_case" } },
};
class SHELXL_Remark3Parser : public Remark3Parser
{
public:
SHELXL_Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::Datablock& db)
: Remark3Parser(name, expMethod, r, db, kSHELXL_Template, sizeof(kSHELXL_Template) / sizeof(TemplateLine),
std::regex(R"((SHELXL)(?:-(\d+(?:\..+)?)))")) {}
};
const TemplateLine kTNT_Template[] = {
/* 0 */ { R"(DATA USED IN REFINEMENT\.)", 1 },
/* 1 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_high" } },
/* 2 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\)\s*:\s*(.+))", 1, "refine", { "ls_d_res_low" } },
/* 3 */ { R"(DATA CUTOFF \(SIGMA\(F\)\)\s*:\s*(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
/* 4 */ { R"(COMPLETENESS FOR RANGE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
/* 5 */ { R"(NUMBER OF REFLECTIONS\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_obs" } },
/* 6 */ { R"(USING DATA ABOVE SIGMA CUTOFF\.)", 1 },
/* 7 */ { R"(CROSS-VALIDATION METHOD\s*:\s*(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
/* 8 */ { R"(FREE R VALUE TEST SET SELECTION\s*:\s*(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
/* 9 */ { R"(R VALUE \(WORKING \+ TEST SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_obs" } },
/* 10 */ { R"(R VALUE \(WORKING SET\)\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_work" } },
/* 11 */ { R"(FREE R VALUE\s*:\s*(.+))", 1, "refine", { "ls_R_factor_R_free" } },
/* 12 */ { R"(FREE R VALUE TEST SET SIZE \(%\)\s*:\s*(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
/* 13 */ { R"(FREE R VALUE TEST SET COUNT\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
/* 14 */ { R"(USING ALL DATA, NO SIGMA CUTOFF\.)", 1 },
/* 15 */ { R"(R VALUE \(WORKING \+ TEST SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_all_no_cutoff" } },
/* 16 */ { R"(R VALUE \(WORKING SET, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "R_factor_obs_no_cutoff" } },
/* 17 */ { R"(FREE R VALUE \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_factor_no_cutoff" } },
/* 18 */ { R"(FREE R VALUE TEST SET SIZE \(%, NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_size_perc_no_cutoff" } },
/* 19 */ { R"(FREE R VALUE TEST SET COUNT \(NO CUTOFF\)\s*:\s*(.+))", 1, "pdbx_refine", { "free_R_val_test_set_ct_no_cutoff" } },
/* 20 */ { R"(TOTAL NUMBER OF REFLECTIONS \(NO CUTOFF\)\s*:\s*(.+))", 1, "refine", { "ls_number_reflns_all" } },
/* 21 */ { R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
/* 22 */ { R"(PROTEIN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
/* 23 */ { R"(NUCLEIC ACID ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
/* 24 */ { R"(HETEROGEN ATOMS\s*:\s*(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
/* 25 */ { R"(SOLVENT ATOMS\s*:\s*(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
/* 26 */ { R"(WILSON B VALUE \(FROM FCALC, A\*\*2\)\s*:\s*(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
/* 27 */ { R"(RMS DEVIATIONS FROM IDEAL VALUES\. RMS WEIGHT COUNT)", 1 },
/* 28 */ { R"(BOND LENGTHS \(A\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_bond_d", false },
/* 29 */ { R"(BOND ANGLES \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_angle_deg", false },
/* 30 */ { R"(TORSION ANGLES \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_dihedral_angle_d", false },
/* 31 */ { R"(PSEUDOROTATION ANGLES \(DEGREES\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_pseud_angle", false },
/* 32 */ { R"(TRIGONAL CARBON PLANES \(A\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_trig_c_planes", false },
/* 33 */ { R"(GENERAL PLANES \(A\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_gen_planes", false },
/* 34 */ { R"(ISOTROPIC THERMAL FACTORS \(A\*\*2\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_it", false },
/* 35 */ { R"(NON-BONDED CONTACTS \(A\)\s*:\s*(.+)\s*;\s*(.+)\s*;\s*(.+))", 1, "refine_ls_restr", { "dev_ideal", "weight", "number" }, "t_nbd", false },
/* 36 */ { R"(INCORRECT CHIRAL-CENTERS \(COUNT\)\s*:\s*(.+)\s*)", 1, "refine_ls_restr", { "number" }, "t_incorr_chiral_ct", false },
/* 37 */ { R"(BULK SOLVENT MODELING\.)", 1 },
/* 38 */ { R"(METHOD USED\s*:\s*(.+))", 1, "refine", { "solvent_model_details" } },
/* 39 */ { R"(KSOL\s*:\s*(.+))", 1, "refine", { "solvent_model_param_ksol" } },
/* 40 */ { R"(BSOL\s*:\s*(.+))", 1, "refine", { "solvent_model_param_bsol" } },
/* 41 */ { R"(RESTRAINT LIBRARIES\.)", 1 },
/* 42 */ { R"(STEREOCHEMISTRY\s*:\s*(.+))", 1, "refine", { "pdbx_stereochemistry_target_values" } },
/* 43 */ { R"(ISOTROPIC THERMAL FACTOR RESTRAINTS\s*:\s*(.+))", 1, "refine", { "pdbx_isotropic_thermal_model" } },
};
class TNT_Remark3Parser : public Remark3Parser
{
public:
TNT_Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::Datablock& db)
: Remark3Parser(name, expMethod, r, db, kTNT_Template, sizeof(kTNT_Template) / sizeof(TemplateLine),
std::regex(R"((TNT)(?: V. (\d+.+)?)?)")) {}
};
const TemplateLine kXPLOR_Template[] = {
/* 0 */ { R"(DATA USED IN REFINEMENT\.)", 1 },
/* 1 */ { R"(RESOLUTION RANGE HIGH \(ANGSTROMS\) :\s+(.+))", 1, "refine", { "ls_d_res_high" } },
/* 2 */ { R"(RESOLUTION RANGE LOW \(ANGSTROMS\) :\s+(.+))", 1, "refine", { "ls_d_res_low" } },
/* 3 */ { R"(DATA CUTOFF \(SIGMA\(F\)\) :\s+(.+))", 1, "refine", { "pdbx_ls_sigma_F" } },
/* 4 */ { R"(DATA CUTOFF HIGH \(ABS\(F\)\) :\s+(.+))", 1, "refine", { "pdbx_data_cutoff_high_absF" } },
/* 5 */ { R"(DATA CUTOFF LOW \(ABS\(F\)\) :\s+(.+))", 1, "refine", { "pdbx_data_cutoff_low_absF" } },
/* 6 */ { R"(COMPLETENESS \(WORKING\+TEST\) \(%\) :\s+(.+))", 1, "refine", { "ls_percent_reflns_obs" } },
/* 7 */ { R"(NUMBER OF REFLECTIONS :\s+(.+))", 1, "refine", { "ls_number_reflns_obs" } },
/* 8 */ { R"(FIT TO DATA USED IN REFINEMENT\.)", 1 },
/* 9 */ { R"(CROSS-VALIDATION METHOD :\s+(.+))", 1, "refine", { "pdbx_ls_cross_valid_method" } },
/* 10 */ { R"(FREE R VALUE TEST SET SELECTION :\s+(.+))", 1, "refine", { "pdbx_R_Free_selection_details" } },
/* 11 */ { R"(R VALUE \(WORKING SET\) :\s+(.+))", 1, "refine", { "ls_R_factor_R_work" } },
/* 12 */ { R"(FREE R VALUE :\s+(.+))", 1, "refine", { "ls_R_factor_R_free" } },
/* 13 */ { R"(FREE R VALUE TEST SET SIZE \(%\) :\s+(.+))", 1, "refine", { "ls_percent_reflns_R_free" } },
/* 14 */ { R"(FREE R VALUE TEST SET COUNT :\s+(.+))", 1, "refine", { "ls_number_reflns_R_free" } },
/* 15 */ { R"(ESTIMATED ERROR OF FREE R VALUE :\s+(.+))", 1, "refine", { "ls_R_factor_R_free_error" } },
/* 16 */ { R"(FIT IN THE HIGHEST RESOLUTION BIN\.)", 1 },
/* 17 */ { R"(TOTAL NUMBER OF BINS USED :\s+(.+))", 1, "refine_ls_shell", { "pdbx_total_number_of_bins_used" } },
/* 18 */ { R"(BIN RESOLUTION RANGE HIGH \(A\) :\s+(.+))", 1, "refine_ls_shell", { "d_res_high" } },
/* 19 */ { R"(BIN RESOLUTION RANGE LOW \(A\) :\s+(.+))", 1, "refine_ls_shell", { "d_res_low" } },
/* 20 */ { R"(BIN COMPLETENESS \(WORKING\+TEST\) \(%\) :\s+(.+))", 1, "refine_ls_shell", { "percent_reflns_obs" } },
/* 21 */ { R"(REFLECTIONS IN BIN \(WORKING SET\) :\s+(.+))", 1, "refine_ls_shell", { "number_reflns_R_work" } },
/* 22 */ { R"(BIN R VALUE \(WORKING SET\) :\s+(.+))", 1, "refine_ls_shell", { "R_factor_R_work" } },
/* 23 */ { R"(BIN FREE R VALUE :\s+(.+))", 1, "refine_ls_shell", { "R_factor_R_free" } },
/* 24 */ { R"(BIN FREE R VALUE TEST SET SIZE \(%\) :\s+(.+))", 1, "refine_ls_shell", { "percent_reflns_R_free" } },
/* 25 */ { R"(BIN FREE R VALUE TEST SET COUNT :\s+(.+))", 1, "refine_ls_shell", { "number_reflns_R_free" } },
/* 26 */ { R"(ESTIMATED ERROR OF BIN FREE R VALUE :\s+(.+))", 1, "refine_ls_shell", { "R_factor_R_free_error" } },
/* 27 */ { R"(NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT\.)", 1 },
/* 28 */ { R"(PROTEIN ATOMS :\s+(.+))", 1, "refine_hist", { "pdbx_number_atoms_protein" } },
/* 29 */ { R"(NUCLEIC ACID ATOMS :\s+(.+))", 1, "refine_hist", { "pdbx_number_atoms_nucleic_acid" } },
/* 30 */ { R"(HETEROGEN ATOMS :\s+(.+))", 1, "refine_hist", { "pdbx_number_atoms_ligand" } },
/* 31 */ { R"(SOLVENT ATOMS :\s+(.+))", 1, "refine_hist", { "number_atoms_solvent" } },
/* 32 */ { R"(B VALUES\.)", 1 },
/* 33 */ { R"(B VALUE TYPE :\s+(.+))", 1, "refine", { "pdbx_TLS_residual_ADP_flag" } },
/* 34 */ { R"(FROM WILSON PLOT \(A\*\*2\) :\s+(.+))", 1, "reflns", { "B_iso_Wilson_estimate" } },
/* 35 */ { R"(MEAN B VALUE \(OVERALL, A\*\*2\) :\s+(.+))", 1, "refine", { "B_iso_mean" } },
/* 36 */ { R"(OVERALL ANISOTROPIC B VALUE\.)", 1 },
/* 37 */ { R"(B11 \(A\*\*2\) :\s+(.+))", 1, "refine", { "aniso_B[1][1]" } },
/* 38 */ { R"(B22 \(A\*\*2\) :\s+(.+))", 1, "refine", { "aniso_B[2][2]" } },
/* 39 */ { R"(B33 \(A\*\*2\) :\s+(.+))", 1, "refine", { "aniso_B[3][3]" } },
/* 40 */ { R"(B12 \(A\*\*2\) :\s+(.+))", 1, "refine", { "aniso_B[1][2]" } },
/* 41 */ { R"(B13 \(A\*\*2\) :\s+(.+))", 1, "refine", { "aniso_B[1][3]" } },
/* 42 */ { R"(B23 \(A\*\*2\) :\s+(.+))", 1, "refine", { "aniso_B[2][3]" } },
/* 43 */ { R"(ESTIMATED COORDINATE ERROR\.)", 1 },
/* 44 */ { R"(ESD FROM LUZZATI PLOT \(A\) :\s+(.+))", 1, "refine_analyze", { "Luzzati_coordinate_error_obs" } },
/* 45 */ { R"(ESD FROM SIGMAA \(A\) :\s+(.+))", 1, "refine_analyze", { "Luzzati_sigma_a_obs" } },
/* 46 */ { R"(LOW RESOLUTION CUTOFF \(A\) :\s+(.+))", 1, "refine_analyze", { "Luzzati_d_res_low_obs" } },
/* 47 */ { R"(CROSS-VALIDATED ESTIMATED COORDINATE ERROR\.)", 1 },
/* 48 */ { R"(ESD FROM C-V LUZZATI PLOT \(A\) :\s+(.+))", 1, "refine_analyze", { "Luzzati_coordinate_error_free" } },
/* 49 */ { R"(ESD FROM C-V SIGMAA \(A\) :\s+(.+))", 1, "refine_analyze", { "Luzzati_sigma_a_free" } },
/* 50 */ { R"(RMS DEVIATIONS FROM IDEAL VALUES\..*)", 1 },
/* 51 */ { R"(BOND LENGTHS \(A\) :\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_bond_d", false },
/* 52 */ { R"(BOND ANGLES \(DEGREES\) :\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_angle_deg", false },
/* 53 */ { R"(DIHEDRAL ANGLES \(DEGREES\) :\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_dihedral_angle_d", false },
/* 54 */ { R"(IMPROPER ANGLES \(DEGREES\) :\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_improper_angle_d", false },
/* 55 */ { R"(ISOTROPIC THERMAL MODEL :\s+(.+))", 1, "refine", { "pdbx_isotropic_thermal_model" } },
/* 56 */ { R"(ISOTROPIC THERMAL FACTOR RESTRAINTS\. RMS SIGMA)", 1 },
/* 57 */ { R"(MAIN-CHAIN BOND \(A\*\*2\) :\s+(.+?);\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_mcbond_it", false },
/* 58 */ { R"(MAIN-CHAIN ANGLE \(A\*\*2\) :\s+(.+?);\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_mcangle_it", false },
/* 59 */ { R"(SIDE-CHAIN BOND \(A\*\*2\) :\s+(.+?);\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_scbond_it", false },
/* 60 */ { R"(SIDE-CHAIN ANGLE \(A\*\*2\) :\s+(.+?);\s+(.+))", 1, "refine_ls_restr", { "dev_ideal", "dev_ideal_target" }, "x_scangle_it", false },
/* 61 */ { R"(NCS MODEL :\s+(.+))", 1, /* "refine_ls_restr_ncs", { "ncs_model_details" } */ },
/* 62 */ { R"(NCS RESTRAINTS\. RMS SIGMA/WEIGHT)", 1 },
/* 63 */ { R"(GROUP (\d+) POSITIONAL \(A\) :\s+(.+?);\s+(.+))", 1, /* "refine_ls_restr_ncs", { ":dom_id", "rms_dev_position", "weight_position" } */ },
/* 64 */ { R"(GROUP (\d+) B-FACTOR \(A\*\*2\) :\s+(.+?);\s+(.+))", 63 - 64, /* "refine_ls_restr_ncs", { ":dom_id", "rms_dev_B_iso", "weight_B_iso" } */ },
/* 65 */ { R"(PARAMETER FILE (\d+) :\s+(.+))", 0, /* "pdbx_xplor_file", { "serial_no", "param_file" } */ },
/* 66 */ { R"(TOPOLOGY FILE (\d+) :\s+(.+))", 0, /* "pdbx_xplor_file", { "serial_no", "topol_file" } */ },
};
class XPLOR_Remark3Parser : public Remark3Parser
{
public:
XPLOR_Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::Datablock& db)
: Remark3Parser(name, expMethod, r, db, kXPLOR_Template, sizeof(kXPLOR_Template) / sizeof(TemplateLine),
std::regex(R"((X-PLOR)(?: (\d+(?:\.\d+)?))?)")) {}
};
// --------------------------------------------------------------------
Remark3Parser::Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::Datablock& db,
const TemplateLine templatelines[], uint32_t templateLineCount, std::regex programversion)
: mName(name), mExpMethod(expMethod), mRec(r), mDb(db.getName())
, mTemplate(templatelines), mTemplateCount(templateLineCount), mProgramVersion(programversion)
{
}
std::string Remark3Parser::nextLine()
{
mLine.clear();
while (mRec != nullptr and mRec->is("REMARK 3"))
{
size_t valueIndent = 0;
for (size_t i = 4; i < mRec->mVlen; ++i)
{
if (mRec->mValue[i] == ' ')
continue;
if (mRec->mValue[i] == ':')
{
valueIndent = i;
while (valueIndent < mRec->mVlen and mRec->mValue[i] == ' ')
++valueIndent;
break;
}
}
mLine = mRec->vS(12);
mRec = mRec->mNext;
if (mLine.empty())
continue;
// concatenate value that is wrapped over multiple lines (tricky code...)
if (valueIndent > 4)
{
std::string indent(valueIndent - 4, ' ');
while (mRec->is("REMARK 3") and mRec->mVlen > valueIndent)
{
std::string v(mRec->mValue + 4, mRec->mValue + mRec->mVlen);
if (not cif::starts_with(v, indent))
break;
mLine += ' ';
mLine.append(mRec->mValue + valueIndent, mRec->mValue + mRec->mVlen);
mRec = mRec->mNext;
}
}
// collapse multiple spaces
bool space = false;
auto i = mLine.begin(), j = i;
while (i != mLine.end())
{
bool nspace = isspace(*i);
if (nspace == false)
{
if (space)
*j++ = ' ';
*j++ = *i;
}
space = nspace;
++i;
}
mLine.erase(j, mLine.end());
break;
}
if (cif::VERBOSE >= 2)
std::cerr << "RM3: " << mLine << std::endl;
return mLine;
}
bool Remark3Parser::match(const char* expr, int nextState)
{
std::regex rx(expr);
bool result = regex_match(mLine, mM, rx);
if (result)
mState = nextState;
else if (cif::VERBOSE >= 3)
std::cerr << cif::coloured("No match:", cif::scWHITE, cif::scRED) << " '" << expr << '\'' << std::endl;
return result;
}
float Remark3Parser::parse()
{
int lineCount = 0, dropped = 0;
std::string remarks;
mState = 0;
while (mRec != nullptr)
{
nextLine();
if (mLine.empty())
break;
++lineCount;
// Skip over AUTHORS lines
if (mState == 0 and match(R"(AUTHORS\s*:.+)", 0))
continue;
auto state = mState;
for (state = mState; state < mTemplateCount; ++state)
{
const TemplateLine& tmpl = mTemplate[state];
if (match(tmpl.rx, state + tmpl.nextStateOffset))
{
if (not (tmpl.category == nullptr or tmpl.items.size() == 0))
{
if (tmpl.lsRestrType == nullptr)
storeCapture(tmpl.category, tmpl.items, tmpl.createNew);
else if (tmpl.createNew)
storeRefineLsRestr(tmpl.lsRestrType, tmpl.items);
else
updateRefineLsRestr(tmpl.lsRestrType, tmpl.items);
}
break;
}
}
if (state < mTemplateCount)
continue;
if (state == mTemplateCount and match(R"(OTHER REFINEMENT REMARKS\s*:\s*(.*))", mTemplateCount + 1))
{
remarks = mM[1].str();
continue;
}
if (state == mTemplateCount + 1)
{
remarks = remarks + '\n' + mLine;
continue;
}
if (cif::VERBOSE >= 2)
std::cerr << cif::coloured("Dropping line:", cif::scWHITE, cif::scRED) << " '" << mLine << '\'' << std::endl;
++dropped;
}
if (not remarks.empty() and not iequals(remarks, "NULL"))
{
if (not mDb["refine"].empty())
mDb["refine"].front()["details"] = remarks;
}
float score = float(lineCount - dropped) / lineCount;
return score;
}
std::string Remark3Parser::program()
{
std::string result = mName;
std::smatch m;
if (regex_match(mName, m, mProgramVersion))
result = m[1].str();
return result;
}
std::string Remark3Parser::version()
{
std::string result;
std::smatch m;
if (regex_match(mName, m, mProgramVersion))
result = m[2].str();
return result;
}
void Remark3Parser::storeCapture(const char* category, std::initializer_list<const char*> items, bool createNew)
{
int capture = 0;
for (auto item: items)
{
++capture;
std::string value = mM[capture].str();
cif::trim(value);
if (iequals(value, "NULL") or iequals(value, "NONE") or iequals(value, "Inf") or iequals(value, "+Inf") or iequals(value, std::string(value.length(), '*')))
continue;
if (cif::VERBOSE >= 3)
std::cerr << "storing: '" << value << "' in _" << category << '.' << item << std::endl;
auto& cat = mDb[category];
if (cat.empty() or createNew)
{
if (iequals(category, "refine"))
cat.emplace({
{ "pdbx_refine_id", mExpMethod },
{ "entry_id", mDb.getName() },
//#warning("this diffrn-id is probably not correct?")
{ "pdbx_diffrn_id", 1 }
});
else if (iequals(category, "refine_analyze") or iequals(category, "pdbx_refine"))
cat.emplace({
{ "pdbx_refine_id", mExpMethod },
{ "entry_id", mDb.getName() },
// { "pdbx_diffrn_id", 1 }
});
else if (iequals(category, "refine_hist"))
{
std::string dResHigh, dResLow;
for (auto r: mDb["refine"])
{
cif::tie(dResHigh, dResLow) = r.get("ls_d_res_high", "ls_d_res_low");
break;
}
cat.emplace({
{ "pdbx_refine_id", mExpMethod },
{ "cycle_id", "LAST" },
{ "d_res_high", dResHigh.empty() ? "." : dResHigh },
{ "d_res_low", dResLow.empty() ? "." : dResLow }
});
}
else if (iequals(category, "refine_ls_shell"))
{
cat.emplace({
{ "pdbx_refine_id", mExpMethod },
});
}
else if (iequals(category, "pdbx_refine_tls_group"))
{
std::string tlsGroupID;
if (not mDb["pdbx_refine_tls"].empty())
tlsGroupID = mDb["pdbx_refine_tls"].back()["id"].as<std::string>();
cat.emplace({
{ "pdbx_refine_id", mExpMethod },
{ "id", tlsGroupID },
{ "refine_tls_id", tlsGroupID }
});
}
else if (iequals(category, "pdbx_refine_tls"))
{
cat.emplace({
{ "pdbx_refine_id", mExpMethod },
{ "method", "refined" }
});
}
// else if (iequals(category, "struct_ncs_dom"))
// {
// size_t id = cat.size() + 1;
//
// cat.emplace({
// { "id", id }
// });
// }
else if (iequals(category, "pdbx_reflns_twin"))
{
cat.emplace({
// #warning("crystal id, diffrn id, what should be put here?")
{ "crystal_id", 1 },
{ "diffrn_id", 1 }
});
}
else if (iequals(category, "reflns"))
cat.emplace({
{ "pdbx_ordinal", cat.size() + 1 },
{ "entry_id", mDb.getName() },
{ "pdbx_diffrn_id", 1 }
});
else
cat.emplace({});
createNew = false;
}
cat.back()[item] = value;
}
}
void Remark3Parser::storeRefineLsRestr(const char* type, std::initializer_list<const char*> items)
{
Row r;
int capture = 0;
for (auto item: items)
{
++capture;
std::string value = mM[capture].str();
cif::trim(value);
if (value.empty() or iequals(value, "NULL") or iequals(value, "Inf") or iequals(value, "+Inf") or iequals(value, std::string(value.length(), '*')))
continue;
if (not r)
{
std::tie(r, std::ignore) = mDb["refine_ls_restr"].emplace({});
r["pdbx_refine_id"] = mExpMethod;
r["type"] = type;
}
r[item] = value;
}
}
void Remark3Parser::updateRefineLsRestr(const char* type, std::initializer_list<const char*> items)
{
auto rows = mDb["refine_ls_restr"].find(cif::Key("type") == type and cif::Key("pdbx_refine_id") == mExpMethod);
if (rows.empty())
storeRefineLsRestr(type, items);
else
{
for (Row r: rows)
{
int capture = 0;
for (auto item: items)
{
++capture;
std::string value = mM[capture].str();
cif::trim(value);
if (iequals(value, "NULL") or iequals(value, std::string(value.length(), '*')))
value.clear();
r[item] = value;
}
break;
}
}
}
// --------------------------------------------------------------------
bool Remark3Parser::parse(const std::string& expMethod, PDBRecord* r, cif::Datablock& db)
{
// simple version, only for the first few lines
auto getNextLine = [&]()
{
std::string result;
while (result.empty() and r != nullptr and r->is("REMARK 3"))
{
result = r->vS(12);
r = r->mNext;
}
return result;
};
// All remark 3 records should start with the same data.
std::string line = getNextLine();
if (line != "REFINEMENT.")
{
if (cif::VERBOSE > 0)
std::cerr << "Unexpected data in REMARK 3" << std::endl;
return false;
}
line = getNextLine();
std::regex rxp(R"(^PROGRAM\s*:\s*(.+))");
std::smatch m;
if (not std::regex_match(line, m, rxp))
{
if (cif::VERBOSE > 0)
std::cerr << "Expected valid PROGRAM line in REMARK 3" << std::endl;
return false;
}
line = m[1].str();
struct programScore
{
programScore(const std::string& program, Remark3Parser* parser, float score)
: program(program), parser(parser), score(score) {}
std::string program;
std::unique_ptr<Remark3Parser> parser;
float score;
bool operator<(const programScore& rhs) const
{
return score > rhs.score;
}
};
std::vector<programScore> scores;
auto tryParser = [&](Remark3Parser* p)
{
std::unique_ptr<Remark3Parser> parser(p);
float score;
try
{
score = parser->parse();
}
catch(const std::exception& e)
{
if (cif::VERBOSE >= 0)
std::cerr << "Error parsing REMARK 3 with " << parser->program() << std::endl
<< e.what() << '\n';
score = 0;
}
if (cif::VERBOSE >= 2)
std::cerr << "Score for " << parser->program() << ": " << score << std::endl;
if (score > 0)
{
std::string program = parser->program();
std::string version = parser->version();
scores.emplace_back(program, parser.release(), score);
}
};
for (auto program : cif::split<std::string>(line, ", ", true))
{
if (cif::starts_with(program, "BUSTER"))
tryParser(new BUSTER_TNT_Remark3Parser(program, expMethod, r, db));
else if (cif::starts_with(program, "CNS") or cif::starts_with(program, "CNX"))
tryParser(new CNS_Remark3Parser(program, expMethod, r, db));
else if (cif::starts_with(program, "PHENIX"))
tryParser(new PHENIX_Remark3Parser(program, expMethod, r, db));
else if (cif::starts_with(program, "NUCLSQ"))
tryParser(new NUCLSQ_Remark3Parser(program, expMethod, r, db));
else if (cif::starts_with(program, "PROLSQ"))
tryParser(new PROLSQ_Remark3Parser(program, expMethod, r, db));
else if (cif::starts_with(program, "REFMAC"))
{
// simply try both and take the best
tryParser(new REFMAC_Remark3Parser(program, expMethod, r, db));
tryParser(new REFMAC5_Remark3Parser(program, expMethod, r, db));
}
else if (cif::starts_with(program, "SHELXL"))
tryParser(new SHELXL_Remark3Parser(program, expMethod, r, db));
else if (cif::starts_with(program, "TNT"))
tryParser(new TNT_Remark3Parser(program, expMethod, r, db));
else if (cif::starts_with(program, "X-PLOR"))
tryParser(new XPLOR_Remark3Parser(program, expMethod, r, db));
else if (cif::VERBOSE > 0)
std::cerr << "Skipping unknown program (" << program << ") in REMARK 3" << std::endl;
}
sort(scores.begin(), scores.end());
bool guessProgram = scores.empty() or scores.front().score < 0.9f;;
if (guessProgram)
{
if (cif::VERBOSE >= 0)
std::cerr << "Unknown or untrusted program in REMARK 3, trying all parsers to see if there is a match" << std::endl;
tryParser(new BUSTER_TNT_Remark3Parser("BUSTER-TNT", expMethod, r, db));
tryParser(new CNS_Remark3Parser("CNS", expMethod, r, db));
tryParser(new PHENIX_Remark3Parser("PHENIX", expMethod, r, db));
tryParser(new NUCLSQ_Remark3Parser("NUCLSQ", expMethod, r, db));
tryParser(new PROLSQ_Remark3Parser("PROLSQ", expMethod, r, db));
tryParser(new REFMAC_Remark3Parser("REFMAC", expMethod, r, db));
tryParser(new REFMAC5_Remark3Parser("REFMAC5", expMethod, r, db));
tryParser(new SHELXL_Remark3Parser("SHELXL", expMethod, r, db));
tryParser(new TNT_Remark3Parser("TNT", expMethod, r, db));
tryParser(new XPLOR_Remark3Parser("X-PLOR", expMethod, r, db));
}
bool result = false;
if (not scores.empty())
{
result = true;
sort(scores.begin(), scores.end());
auto& best = scores.front();
if (cif::VERBOSE > 0)
std::cerr << "Choosing " << best.parser->program() << " version '" << best.parser->version() << "' as refinement program. Score = " << best.score << std::endl;
auto& software = db["software"];
std::string program = best.parser->program();
std::string version = best.parser->version();
software.emplace({
{ "name", program },
{ "classification", "refinement" },
{ "version", version },
{ "pdbx_ordinal", software.size() + 1 }
});
best.parser->fixup();
for (auto& cat1: best.parser->mDb)
{
auto& cat2 = db[cat1.name()];
// copy only the values in the first row for the following categories
if (cat1.name() == "reflns" or cat1.name() == "refine")
{
if (cat2.empty()) // duh... this will generate a validation error anyway...
cat2.emplace({});
Row r1 = cat1.front();
Row r2 = cat2.front();
for (auto& i: r1)
r2[i.name()] = i.value();
}
else
{
for (auto rs: cat1)
cat2.emplace(rs);
}
}
}
return result;
}
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <cif++/cif/category.hpp> #include <cif++/category.hpp>
namespace cif namespace cif
{ {
......
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cmath>
#include <cif++/cif.hpp>
#include <cif++/structure/AtomType.hpp>
namespace mmcif
{
namespace data
{
const float kNA = std::nanf("1");
const AtomTypeInfo kKnownAtoms[] =
{
{ Nn, "Unknown", "Nn", 0, false, { kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // 0 Nn Unknown
{ H, "Hydrogen", "H", 1.008f, false, { 53, 25, 37, 32, kNA, kNA, 120 } }, // 1 H Hydro­gen
{ He, "Helium", "He", 4.0026f, false, { 31, kNA, 32, 46, kNA, kNA, 140 } }, // 2 He He­lium
{ Li, "Lithium", "Li", 6.94f, true, { 167, 145, 134, 133, 124, kNA, 182 } }, // 3 Li Lith­ium
{ Be, "Beryllium", "Be", 9.0122f, true, { 112, 105, 90, 102, 90, 85, kNA } }, // 4 Be Beryl­lium
{ B, "Boron", "B", 10.81f, true, { 87, 85, 82, 85, 78, 73, kNA } }, // 5 B Boron
{ C, "Carbon", "C", 12.011f, false, { 67, 70, 77, 75, 67, 60, 170 } }, // 6 C Carbon
{ N, "Nitrogen", "N", 14.007f, false, { 56, 65, 75, 71, 60, 54, 155 } }, // 7 N Nitro­gen
{ O, "Oxygen", "O", 15.999f, false, { 48, 60, 73, 63, 57, 53, 152 } }, // 8 O Oxy­gen
{ F, "Fluorine", "F", 18.998f, false, { 42, 50, 71, 64, 59, 53, 147 } }, // 9 F Fluor­ine
{ Ne, "Neon", "Ne", 20.180f, false, { 38, kNA, 69, 67, 96, kNA, 154 } }, // 10 Ne Neon
{ Na, "Sodium", "Na", 22.990f, true, { 190, 180, 154, 155, 160, kNA, 227 } }, // 11 Na So­dium
{ Mg, "Magnesium", "Mg", 24.305f, true, { 145, 150, 130, 139, 132, 127, 173 } }, // 12 Mg Magne­sium
{ Al, "Aluminium", "Al", 26.982f, true, { 118, 125, 118, 126, 113, 111, kNA } }, // 13 Al Alumin­ium
{ Si, "Silicon", "Si", 28.085f, true, { 111, 110, 111, 116, 107, 102, 210 } }, // 14 Si Sili­con
{ P, "Phosphorus", "P", 30.974f, false, { 98, 100, 106, 111, 102, 94, 180 } }, // 15 P Phos­phorus
{ S, "Sulfur", "S", 32.06f, false, { 88, 100, 102, 103, 94, 95, 180 } }, // 16 S Sulfur
{ Cl, "Chlorine", "Cl", 35.45f, false, { 79, 100, 99, 99, 95, 93, 175 } }, // 17 Cl Chlor­ine
{ Ar, "Argon", "Ar", 39.948f, false, { 71, kNA, 97, 96, 107, 96, 188 } }, // 18 Ar Argon
{ K, "Potassium", "K", 39.098f, true, { 243, 220, 196, 196, 193, kNA, 275 } }, // 19 K Potas­sium
{ Ca, "Calcium", "Ca", 40.078f, true, { 194, 180, 174, 171, 147, 133, kNA } }, // 20 Ca Cal­cium
{ Sc, "Scandium", "Sc", 44.956f, true, { 184, 160, 144, 148, 116, 114, kNA } }, // 21 Sc Scan­dium
{ Ti, "Titanium", "Ti", 47.867f, true, { 176, 140, 136, 136, 117, 108, kNA } }, // 22 Ti Tita­nium
{ V, "Vanadium", "V", 50.942f, true, { 171, 135, 125, 134, 112, 106, kNA } }, // 23 V Vana­dium
{ Cr, "Chromium", "Cr", 51.996f, true, { 166, 140, 127, 122, 111, 103, kNA } }, // 24 Cr Chrom­ium
{ Mn, "Manganese", "Mn", 54.938f, true, { 161, 140, 139, 119, 105, 103, kNA } }, // 25 Mn Manga­nese
{ Fe, "Iron", "Fe", 55.845f, true, { 156, 140, 125, 116, 109, 102, kNA } }, // 26 Fe Iron
{ Co, "Cobalt", "Co", 58.933f, true, { 152, 135, 126, 111, 103, 96, kNA } }, // 27 Co Cobalt
{ Ni, "Nickel", "Ni", 58.693f, true, { 149, 135, 121, 110, 101, 101, 163 } }, // 28 Ni Nickel
{ Cu, "Copper", "Cu", 63.546f, true, { 145, 135, 138, 112, 115, 120, 140 } }, // 29 Cu Copper
{ Zn, "Zinc", "Zn", 65.38f, true, { 142, 135, 131, 118, 120, kNA, 139 } }, // 30 Zn Zinc
{ Ga, "Gallium", "Ga", 69.723f, true, { 136, 130, 126, 124, 117, 121, 187 } }, // 31 Ga Gallium
{ Ge, "Germanium", "Ge", 72.630f, true, { 125, 125, 122, 121, 111, 114, kNA } }, // 32 Ge Germa­nium
{ As, "Arsenic", "As", 74.922f, true, { 114, 115, 119, 121, 114, 106, 185 } }, // 33 As Arsenic
{ Se, "Selenium", "Se", 78.971f, false, { 103, 115, 116, 116, 107, 107, 190 } }, // 34 Se Sele­nium
{ Br, "Bromine", "Br", 79.904f, false, { 94, 115, 114, 114, 109, 110, 185 } }, // 35 Br Bromine
{ Kr, "Krypton", "Kr", 83.798f, false, { 88, kNA, 110, 117, 121, 108, 202 } }, // 36 Kr Kryp­ton
{ Rb, "Rubidium", "Rb", 85.468f, true, { 265, 235, 211, 210, 202, kNA, kNA } }, // 37 Rb Rubid­ium
{ Sr, "Strontium", "Sr", 87.62f, true, { 219, 200, 192, 185, 157, 139, kNA } }, // 38 Sr Stront­ium
{ Y, "Yttrium", "Y", 88.906f, true, { 212, 180, 162, 163, 130, 124, kNA } }, // 39 Y Yttrium
{ Zr, "Zirconium", "Zr", 91.224f, true, { 206, 155, 148, 154, 127, 121, kNA } }, // 40 Zr Zirco­nium
{ Nb, "Niobium", "Nb", 92.906f, true, { 198, 145, 137, 147, 125, 116, kNA } }, // 41 Nb Nio­bium
{ Mo, "Molybdenum", "Mo", 95.95f, true, { 190, 145, 145, 138, 121, 113, kNA } }, // 42 Mo Molyb­denum
{ Tc, "Technetium", "Tc", 98, true, { 183, 135, 156, 128, 120, 110, kNA } }, // 43 Tc Tech­netium
{ Ru, "Ruthenium", "Ru", 101.07f, true, { 178, 130, 126, 125, 114, 103, kNA } }, // 44 Ru Ruthe­nium
{ Rh, "Rhodium", "Rh", 102.91f, true, { 173, 135, 135, 125, 110, 106, kNA } }, // 45 Rh Rho­dium
{ Pd, "Palladium", "Pd", 106.42f, true, { 169, 140, 131, 120, 117, 112, 163 } }, // 46 Pd Pallad­ium
{ Ag, "Silver", "Ag", 107.87f, true, { 165, 160, 153, 128, 139, 137, 172 } }, // 47 Ag Silver
{ Cd, "Cadmium", "Cd", 112.41f, true, { 161, 155, 148, 136, 144, kNA, 158 } }, // 48 Cd Cad­mium
{ In, "Indium", "In", 114.82f, true, { 156, 155, 144, 142, 136, 146, 193 } }, // 49 In Indium
{ Sn, "Tin", "Sn", 118.71f, true, { 145, 145, 141, 140, 130, 132, 217 } }, // 50 Sn Tin
{ Sb, "Antimony", "Sb", 121.76f, false, { 133, 145, 138, 140, 133, 127, kNA } }, // 51 Sb Anti­mony
{ Te, "Tellurium", "Te", 127.60f, false, { 123, 140, 135, 136, 128, 121, 206 } }, // 52 Te Tellurium
{ I, "Iodine", "I", 126.90f, false, { 115, 140, 133, 133, 129, 125, 198 } }, // 53 I Iodine
{ Xe, "Xenon", "Xe", 131.29f, false, { 108, kNA, 130, 131, 135, 122, 216 } }, // 54 Xe Xenon
{ Cs, "Caesium", "Cs", 132.91f, true, { 298, 260, 225, 232, 209, kNA, kNA } }, // 55 Cs Cae­sium
{ Ba, "Barium", "Ba", 137.33f, true, { 253, 215, 198, 196, 161, 149, kNA } }, // 56 Ba Ba­rium
{ La, "Lanthanum", "La", 138.91f, true, { kNA, 195, 169, 180, 139, 139, kNA } }, // 57 La Lan­thanum
{ Ce, "Cerium", "Ce", 140.12f, true, { kNA, 185, kNA, 163, 137, 131, kNA } }, // 58 Ce Cerium
{ Pr, "Praseodymium", "Pr", 140.91f, true, { 247, 185, kNA, 176, 138, 128, kNA } }, // 59 Pr Praseo­dymium
{ Nd, "Neodymium", "Nd", 144.24f, true, { 206, 185, kNA, 174, 137, kNA, kNA } }, // 60 Nd Neo­dymium
{ Pm, "Promethium", "Pm", 145, true, { 205, 185, kNA, 173, 135, kNA, kNA } }, // 61 Pm Prome­thium
{ Sm, "Samarium", "Sm", 150.36f, true, { 238, 185, kNA, 172, 134, kNA, kNA } }, // 62 Sm Sama­rium
{ Eu, "Europium", "Eu", 151.96f, true, { 231, 185, kNA, 168, 134, kNA, kNA } }, // 63 Eu Europ­ium
{ Gd, "Gadolinium", "Gd", 157.25f, true, { 233, 180, kNA, 169, 135, 132, kNA } }, // 64 Gd Gadolin­ium
{ Tb, "Terbium", "Tb", 158.93f, true, { 225, 175, kNA, 168, 135, kNA, kNA } }, // 65 Tb Ter­bium
{ Dy, "Dysprosium", "Dy", 162.50f, true, { 228, 175, kNA, 167, 133, kNA, kNA } }, // 66 Dy Dyspro­sium
{ Ho, "Holmium", "Ho", 164.93f, true, { 226, 175, kNA, 166, 133, kNA, kNA } }, // 67 Ho Hol­mium
{ Er, "Erbium", "Er", 167.26f, true, { 226, 175, kNA, 165, 133, kNA, kNA } }, // 68 Er Erbium
{ Tm, "Thulium", "Tm", 168.93f, true, { 222, 175, kNA, 164, 131, kNA, kNA } }, // 69 Tm Thulium
{ Yb, "Ytterbium", "Yb", 173.05f, true, { 222, 175, kNA, 170, 129, kNA, kNA } }, // 70 Yb Ytter­bium
{ Lu, "Lutetium", "Lu", 174.97f, true, { 217, 175, 160, 162, 131, 131, kNA } }, // 71 Lu Lute­tium
{ Hf, "Hafnium", "Hf", 178.49f, true, { 208, 155, 150, 152, 128, 122, kNA } }, // 72 Hf Haf­nium
{ Ta, "Tantalum", "Ta", 180.95f, true, { 200, 145, 138, 146, 126, 119, kNA } }, // 73 Ta Tanta­lum
{ W, "Tungsten", "W", 183.84f, true, { 193, 135, 146, 137, 120, 115, kNA } }, // 74 W Tung­sten
{ Re, "Rhenium", "Re", 186.21f, true, { 188, 135, 159, 131, 119, 110, kNA } }, // 75 Re Rhe­nium
{ Os, "Osmium", "Os", 190.23f, true, { 185, 130, 128, 129, 116, 109, kNA } }, // 76 Os Os­mium
{ Ir, "Iridium", "Ir", 192.22f, true, { 180, 135, 137, 122, 115, 107, kNA } }, // 77 Ir Iridium
{ Pt, "Platinum", "Pt", 195.08f, true, { 177, 135, 128, 123, 112, 110, 175 } }, // 78 Pt Plat­inum
{ Au, "Gold", "Au", 196.97f, true, { 174, 135, 144, 124, 121, 123, 166 } }, // 79 Au Gold
{ Hg, "Mercury", "Hg", 200.59f, true, { 171, 150, 149, 133, 142, kNA, 155 } }, // 80 Hg Mer­cury
{ Tl, "Thallium", "Tl", 204.38f, true, { 156, 190, 148, 144, 142, 150, 196 } }, // 81 Tl Thallium
{ Pb, "Lead", "Pb", 207.2f, true, { 154, 180, 147, 144, 135, 137, 202 } }, // 82 Pb Lead
{ Bi, "Bismuth", "Bi", 208.98f, true, { 143, 160, 146, 151, 141, 135, kNA } }, // 83 Bi Bis­muth
{ Po, "Polonium", "Po", 209, true, { 135, 190, kNA, 145, 135, 129, kNA } }, // 84 Po Polo­nium
{ At, "Astatine", "At", 210, false, { 127, kNA, kNA, 147, 138, 138, kNA } }, // 85 At Asta­tine
{ Rn, "Radon", "Rn", 222, false, { 120, kNA, 145, 142, 145, 133, kNA } }, // 86 Rn Radon
{ Fr, "Francium", "Fr", 223, true, { kNA, kNA, kNA, 223, 218, kNA, kNA } }, // 87 Fr Fran­cium
{ Ra, "Radium", "Ra", 226, true, { kNA, 215, kNA, 201, 173, 159, kNA } }, // 88 Ra Ra­dium
{ Ac, "Actinium", "Ac", 227, true, { kNA, 195, kNA, 186, 153, 140, kNA } }, // 89 Ac Actin­ium
{ Th, "Thorium", "Th", 232.04f, true, { kNA, 180, kNA, 175, 143, 136, kNA } }, // 90 Th Thor­ium
{ Pa, "Protactinium", "Pa", 231.04f, true, { kNA, 180, kNA, 169, 138, 129, kNA } }, // 91 Pa Protac­tinium
{ U, "Uranium", "U", 238.03f, true, { kNA, 175, kNA, 170, 134, 118, 186 } }, // 92 U Ura­nium
{ Np, "Neptunium", "Np", 237, true, { kNA, 175, kNA, 171, 136, 116, kNA } }, // 93 Np Neptu­nium
{ Pu, "Plutonium", "Pu", 244, true, { kNA, 175, kNA, 172, 135, kNA, kNA } }, // 94 Pu Pluto­nium
{ Am, "Americium", "Am", 243, true, { kNA, 175, kNA, 166, 135, kNA, kNA } }, // 95 Am Ameri­cium
{ Cm, "Curium", "Cm", 247, true, { kNA, kNA, kNA, 166, 136, kNA, kNA } }, // 96 Cm Curium
{ Bk, "Berkelium", "Bk", 247, true, { kNA, kNA, kNA, 168, 139, kNA, kNA } }, // 97 Bk Berkel­ium
{ Cf, "Californium", "Cf", 251, true, { kNA, kNA, kNA, 168, 140, kNA, kNA } }, // 98 Cf Califor­nium
{ Es, "Einsteinium", "Es", 252, true, { kNA, kNA, kNA, 165, 140, kNA, kNA } }, // 99 Es Einstei­nium
{ Fm, "Fermium", "Fm", 257, true, { kNA, kNA, kNA, 167, kNA, kNA, kNA } }, // 100 Fm Fer­mium
{ Md, "Mendelevium", "Md", 258, true, { kNA, kNA, kNA, 173, 139, kNA, kNA } }, // 101 Md Mende­levium
{ No, "Nobelium", "No", 259, true, { kNA, kNA, kNA, 176, kNA, kNA, kNA } }, // 102 No Nobel­ium
{ Lr, "Lawrencium", "Lr", 266, true, { kNA, kNA, kNA, 161, 141, kNA, kNA } }, // 103 Lr Lawren­cium
{ Rf, "Rutherfordium", "Rf", 267, true, { kNA, kNA, kNA, 157, 140, 131, kNA } }, // 104 Rf Ruther­fordium
{ Db, "Dubnium", "Db", 268, true, { kNA, kNA, kNA, 149, 136, 126, kNA } }, // 105 Db Dub­nium
{ Sg, "Seaborgium", "Sg", 269, true, { kNA, kNA, kNA, 143, 128, 121, kNA } }, // 106 Sg Sea­borgium
{ Bh, "Bohrium", "Bh", 270, true, { kNA, kNA, kNA, 141, 128, 119, kNA } }, // 107 Bh Bohr­ium
{ Hs, "Hassium", "Hs", 277, true, { kNA, kNA, kNA, 134, 125, 118, kNA } }, // 108 Hs Has­sium
{ Mt, "Meitnerium", "Mt", 278, true, { kNA, kNA, kNA, 129, 125, 113, kNA } }, // 109 Mt Meit­nerium
{ Ds, "Darmstadtium", "Ds", 281, true, { kNA, kNA, kNA, 128, 116, 112, kNA } }, // 110 Ds Darm­stadtium
{ Rg, "Roentgenium", "Rg", 282, true, { kNA, kNA, kNA, 121, 116, 118, kNA } }, // 111 Rg Roent­genium
{ Cn, "Copernicium", "Cn", 285, true, { kNA, kNA, kNA, 122, 137, 130, kNA } }, // 112 Cn Coper­nicium
{ Nh, "Nihonium", "Nh", 286, true, { kNA, kNA, kNA, 136, kNA, kNA, kNA } }, // 113 Nh Nihon­ium
{ Fl, "Flerovium", "Fl", 289, true, { kNA, kNA, kNA, 143, kNA, kNA, kNA } }, // 114 Fl Flerov­ium
{ Mc, "Moscovium", "Mc", 290, true, { kNA, kNA, kNA, 162, kNA, kNA, kNA } }, // 115 Mc Moscov­ium
{ Lv, "Livermorium", "Lv", 293, true, { kNA, kNA, kNA, 175, kNA, kNA, kNA } }, // 116 Lv Liver­morium
{ Ts, "Tennessine", "Ts", 294, true, { kNA, kNA, kNA, 165, kNA, kNA, kNA } }, // 117 Ts Tenness­ine
{ Og, "Oganesson", "Og", 294, true, { kNA, kNA, kNA, 157, kNA, kNA, kNA } }, // 118 Og Oga­nesson
{ D, "Deuterium", "D", 2.014f, false, { 53, 25, 37, 32, kNA, kNA, 120 } }, // 1 D Deuterium
};
uint32_t kKnownAtomsCount = sizeof(kKnownAtoms) / sizeof(AtomTypeInfo);
// --------------------------------------------------------------------
// Crystal ionic radii, as taken from Wikipedia (https://en.m.wikipedia.org/wiki/Ionic_radius)
const struct IonicRadii
{
AtomType type;
float radii[11];
} kCrystalIonicRadii[] = {
{ H, { kNA, kNA, 208, -4, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Hydrogen
{ Li, { kNA, kNA, kNA, 90, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Lithium
{ Be, { kNA, kNA, kNA, kNA, 59, kNA, kNA, kNA, kNA, kNA, kNA } }, // Beryllium
{ B, { kNA, kNA, kNA, kNA, kNA, 41, kNA, kNA, kNA, kNA, kNA } }, // Boron
{ C, { kNA, kNA, kNA, kNA, kNA, kNA, 30, kNA, kNA, kNA, kNA } }, // Carbon
{ N, { 132, kNA, kNA, kNA, kNA, 30, kNA, 27, kNA, kNA, kNA } }, // Nitrogen
{ O, { kNA, 126, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Oxygen
{ F, { kNA, kNA, 119, kNA, kNA, kNA, kNA, kNA, kNA, 22, kNA } }, // Fluorine
{ Na, { kNA, kNA, kNA, 116, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Sodium
{ Mg, { kNA, kNA, kNA, kNA, 86, kNA, kNA, kNA, kNA, kNA, kNA } }, // Magnesium
{ Al, { kNA, kNA, kNA, kNA, kNA, 67.5, kNA, kNA, kNA, kNA, kNA } }, // Aluminium
{ Si, { kNA, kNA, kNA, kNA, kNA, kNA, 54, kNA, kNA, kNA, kNA } }, // Silicon
{ P, { kNA, kNA, kNA, kNA, kNA, 58, kNA, 52, kNA, kNA, kNA } }, // Phosphorus
{ S, { kNA, 170, kNA, kNA, kNA, kNA, 51, kNA, 43, kNA, kNA } }, // Sulfur
{ Cl, { kNA, kNA, 181, kNA, kNA, kNA, kNA, 26, kNA, 41, kNA } }, // Chlorine
{ K, { kNA, kNA, kNA, 152, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Potassium
{ Ca, { kNA, kNA, kNA, kNA, 114, kNA, kNA, kNA, kNA, kNA, kNA } }, // Calcium
{ Sc, { kNA, kNA, kNA, kNA, kNA, 88.5, kNA, kNA, kNA, kNA, kNA } }, // Scandium
{ Ti, { kNA, kNA, kNA, kNA, 100, 81, 74.5, kNA, kNA, kNA, kNA } }, // Titanium
{ V, { kNA, kNA, kNA, kNA, 93, 78, 72, 68, kNA, kNA, kNA } }, // Vanadium
{ Cr, { kNA, kNA, kNA, kNA, 87, 75.5, 69, 63, 58, kNA, kNA } }, // Chromium ls
// { Cr,{ kNA, kNA, kNA, kNA, 94, kNA, kNA, kNA, kNA, kNA, kNA } }, // Chromium hs
{ Mn, { kNA, kNA, kNA, kNA, 81, 72, 67, 47, 39.5, 60, kNA } }, // Manganese ls
// { Mn,{ kNA, kNA, kNA, kNA, 97, 78.5, kNA, kNA, kNA, kNA, kNA } }, // Manganese hs
{ Fe, { kNA, kNA, kNA, kNA, 75, 69, 72.5, kNA, 39, kNA, kNA } }, // Iron ls
// { Fe,{ kNA, kNA, kNA, kNA, 92, 78.5, kNA, kNA, kNA, kNA, kNA } }, // Iron hs
{ Co, { kNA, kNA, kNA, kNA, 79, 68.5, kNA, kNA, kNA, kNA, kNA } }, // Cobalt ls
// { Co,{ kNA, kNA, kNA, kNA, 88.5, 75, 67, kNA, kNA, kNA, kNA } }, // Cobalt hs
{ Ni, { kNA, kNA, kNA, kNA, 83, 70, 62, kNA, kNA, kNA, kNA } }, // Nickel ls
// { Ni,{ kNA, kNA, kNA, kNA, kNA, 74, kNA, kNA, kNA, kNA, kNA } }, // Nickel hs
{ Cu, { kNA, kNA, kNA, 91, 87, 68, kNA, kNA, kNA, kNA, kNA } }, // Copper
{ Zn, { kNA, kNA, kNA, kNA, 88 , kNA, kNA, kNA, kNA, kNA, kNA } }, // Zinc
{ Ga, { kNA, kNA, kNA, kNA, kNA, 76, kNA, kNA, kNA, kNA, kNA } }, // Gallium
{ Ge, { kNA, kNA, kNA, kNA, 87, kNA, 67, kNA, kNA, kNA, kNA } }, // Germanium
{ As, { kNA, kNA, kNA, kNA, kNA, 72, kNA, 60, kNA, kNA, kNA } }, // Arsenic
{ Se, { kNA, 184, kNA, kNA, kNA, kNA, 64, kNA, 56, kNA, kNA } }, // Selenium
{ Br, { kNA, kNA, 182, kNA, kNA, 73, kNA, 45, kNA, 53, kNA } }, // Bromine
{ Rb, { kNA, kNA, kNA, 166, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Rubidium
{ Sr, { kNA, kNA, kNA, kNA, 132, kNA, kNA, kNA, kNA, kNA, kNA } }, // Strontium
{ Y, { kNA, kNA, kNA, kNA, kNA, 104, kNA, kNA, kNA, kNA, kNA } }, // Yttrium
{ Zr, { kNA, kNA, kNA, kNA, kNA, kNA, 86, kNA, kNA, kNA, kNA } }, // Zirconium
{ Nb, { kNA, kNA, kNA, kNA, kNA, 86, 82, 78, kNA, kNA, kNA } }, // Niobium
{ Mo, { kNA, kNA, kNA, kNA, kNA, 83, 79, 75, 73, kNA, kNA } }, // Molybdenum
{ Tc, { kNA, kNA, kNA, kNA, kNA, kNA, 78.5, 74, kNA, 70, kNA } }, // Technetium
{ Ru, { kNA, kNA, kNA, kNA, kNA, 82, 76, 70.5, kNA, 52, 150 } }, // Ruthenium
{ Rh, { kNA, kNA, kNA, kNA, kNA, 80.5, 74, 69, kNA, kNA, kNA } }, // Rhodium
{ Pd, { kNA, kNA, kNA, 73, 100, 90, 75.5, kNA, kNA, kNA, kNA } }, // Palladium
{ Ag, { kNA, kNA, kNA, 129, 108, 89, kNA, kNA, kNA, kNA, kNA } }, // Silver
{ Cd, { kNA, kNA, kNA, kNA, 109, kNA, kNA, kNA, kNA, kNA, kNA } }, // Cadmium
{ In, { kNA, kNA, kNA, kNA, kNA, 94, kNA, kNA, kNA, kNA, kNA } }, // Indium
{ Sn, { kNA, kNA, kNA, kNA, kNA, kNA, 83, kNA, kNA, kNA, kNA } }, // Tin
{ Sb, { kNA, kNA, kNA, kNA, kNA, 90, kNA, 74, kNA, kNA, kNA } }, // Antimony
{ Te, { kNA, 207, kNA, kNA, kNA, kNA, 111, kNA, 70, kNA, kNA } }, // Tellurium
{ I, { kNA, kNA, 206, kNA, kNA, kNA, kNA, 109, kNA, 67, kNA } }, // Iodine
{ Xe, { kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, 62 } }, // Xenon
{ Cs, { kNA, kNA, kNA, 167, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Caesium
{ Ba, { kNA, kNA, kNA, kNA, 149, kNA, kNA, kNA, kNA, kNA, kNA } }, // Barium
{ La, { kNA, kNA, kNA, kNA, kNA, 117.2, kNA, kNA, kNA, kNA, kNA } }, // Lanthanum
{ Ce, { kNA, kNA, kNA, kNA, kNA, 115, 101, kNA, kNA, kNA, kNA } }, // Cerium
{ Pr, { kNA, kNA, kNA, kNA, kNA, 113, 99, kNA, kNA, kNA, kNA } }, // Praseodymium
{ Nd, { kNA, kNA, kNA, kNA, 143, 112.3, kNA, kNA, kNA, kNA, kNA } }, // Neodymium
{ Pm, { kNA, kNA, kNA, kNA, kNA, 111, kNA, kNA, kNA, kNA, kNA } }, // Promethium
{ Sm, { kNA, kNA, kNA, kNA, 136, 109.8, kNA, kNA, kNA, kNA, kNA } }, // Samarium
{ Eu, { kNA, kNA, kNA, kNA, 131, 108.7, kNA, kNA, kNA, kNA, kNA } }, // Europium
{ Gd, { kNA, kNA, kNA, kNA, kNA, 107.8, kNA, kNA, kNA, kNA, kNA } }, // Gadolinium
{ Tb, { kNA, kNA, kNA, kNA, kNA, 106.3, 90, kNA, kNA, kNA, kNA } }, // Terbium
{ Dy, { kNA, kNA, kNA, kNA, 121, 105.2, kNA, kNA, kNA, kNA, kNA } }, // Dysprosium
{ Ho, { kNA, kNA, kNA, kNA, kNA, 104.1, kNA, kNA, kNA, kNA, kNA } }, // Holmium
{ Er, { kNA, kNA, kNA, kNA, kNA, 103, kNA, kNA, kNA, kNA, kNA } }, // Erbium
{ Tm, { kNA, kNA, kNA, kNA, 117, 102, kNA, kNA, kNA, kNA, kNA } }, // Thulium
{ Yb, { kNA, kNA, kNA, kNA, 116, 100.8, kNA, kNA, kNA, kNA, kNA } }, // Ytterbium
{ Lu, { kNA, kNA, kNA, kNA, kNA, 100.1, kNA, kNA, kNA, kNA, kNA } }, // Lutetium
{ Hf, { kNA, kNA, kNA, kNA, kNA, kNA, 85, kNA, kNA, kNA, kNA } }, // Hafnium
{ Ta, { kNA, kNA, kNA, kNA, kNA, 86, 82, 78, kNA, kNA, kNA } }, // Tantalum
{ W, { kNA, kNA, kNA, kNA, kNA, kNA, 80, 76, 74, kNA, kNA } }, // Tungsten
{ Re, { kNA, kNA, kNA, kNA, kNA, kNA, 77, 72, 69, 67, kNA } }, // Rhenium
{ Os, { kNA, kNA, kNA, kNA, kNA, kNA, 77, 71.5, 68.5, 66.5, 53 } }, // Osmium
{ Ir, { kNA, kNA, kNA, kNA, kNA, 82, 76.5, 71, kNA, kNA, kNA } }, // Iridium
{ Pt, { kNA, kNA, kNA, kNA, 94, kNA, 76.5, 71, kNA, kNA, kNA } }, // Platinum
{ Au, { kNA, kNA, kNA, 151, kNA, 99, kNA, 71, kNA, kNA, kNA } }, // Gold
{ Hg, { kNA, kNA, kNA, 133, 116, kNA, kNA, kNA, kNA, kNA, kNA } }, // Mercury
{ Tl, { kNA, kNA, kNA, 164, kNA, 102.5, kNA, kNA, kNA, kNA, kNA } }, // Thallium
{ Pb, { kNA, kNA, kNA, kNA, 133, kNA, 91.5, kNA, kNA, kNA, kNA } }, // Lead
{ Bi, { kNA, kNA, kNA, kNA, kNA, 117, kNA, 90, kNA, kNA, kNA } }, // Bismuth
{ Po, { kNA, kNA, kNA, kNA, kNA, kNA, 108, kNA, 81, kNA, kNA } }, // Polonium
{ At, { kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, 76, kNA } }, // Astatine
{ Fr, { kNA, kNA, kNA, 194, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Francium
{ Ra, { kNA, kNA, kNA, kNA, 162, kNA, kNA, kNA, kNA, kNA, kNA } }, // Radium
{ Ac, { kNA, kNA, kNA, kNA, kNA, 126, kNA, kNA, kNA, kNA, kNA } }, // Actinium
{ Th, { kNA, kNA, kNA, kNA, kNA, kNA, 108, kNA, kNA, kNA, kNA } }, // Thorium
{ Pa, { kNA, kNA, kNA, kNA, kNA, 116, 104, 92, kNA, kNA, kNA } }, // Protactinium
{ U, { kNA, kNA, kNA, kNA, kNA, 116.5, 103, 90, 87, kNA, kNA } }, // Uranium
{ Np, { kNA, kNA, kNA, kNA, 124, 115, 101, 89, 86, 85, kNA } }, // Neptunium
{ Pu, { kNA, kNA, kNA, kNA, kNA, 114, 100, 88, 85, kNA, kNA } }, // Plutonium
{ Am, { kNA, kNA, kNA, kNA, 140, 111.5, 99, kNA, kNA, kNA, kNA } }, // Americium
{ Cm, { kNA, kNA, kNA, kNA, kNA, 111, 99, kNA, kNA, kNA, kNA } }, // Curium
{ Bk, { kNA, kNA, kNA, kNA, kNA, 110, 97, kNA, kNA, kNA, kNA } }, // Berkelium
{ Cf, { kNA, kNA, kNA, kNA, kNA, 109, 96.1, kNA, kNA, kNA, kNA } }, // Californium
{ Es, { kNA, kNA, kNA, kNA, kNA, 92.8, kNA, kNA, kNA, kNA, kNA } }, // Einsteinium
}, kEffectiveIonicRadii[] = {
{ H, { kNA, kNA, 139.9, -18, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Hydrogen
{ Li, { kNA, kNA, kNA, 76, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Lithium
{ Be, { kNA, kNA, kNA, kNA, 45, kNA, kNA, kNA, kNA, kNA, kNA } }, // Beryllium
{ B, { kNA, kNA, kNA, kNA, kNA, 27, kNA, kNA, kNA, kNA, kNA } }, // Boron
{ C, { kNA, kNA, kNA, kNA, kNA, kNA, 16, kNA, kNA, kNA, kNA } }, // Carbon
{ N, { 146, kNA, kNA, kNA, kNA, 16, kNA, 13, kNA, kNA, kNA } }, // Nitrogen
{ O, { kNA, 140, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Oxygen
{ F, { kNA, kNA, 133, kNA, kNA, kNA, kNA, kNA, kNA, 8, kNA } }, // Fluorine
{ Na, { kNA, kNA, kNA, 102, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Sodium
{ Mg, { kNA, kNA, kNA, kNA, 72, kNA, kNA, kNA, kNA, kNA, kNA } }, // Magnesium
{ Al, { kNA, kNA, kNA, kNA, kNA, 53.5, kNA, kNA, kNA, kNA, kNA } }, // Aluminium
{ Si, { kNA, kNA, kNA, kNA, kNA, kNA, 40, kNA, kNA, kNA, kNA } }, // Silicon
{ P, { 212, kNA, kNA, kNA, kNA, 44, kNA, 38, kNA, kNA, kNA } }, // Phosphorus
{ S, { kNA, 184, kNA, kNA, kNA, kNA, 37, kNA, 29, kNA, kNA } }, // Sulfur
{ Cl, { kNA, kNA, 181, kNA, kNA, kNA, kNA, 12, kNA, 27, kNA } }, // Chlorine
{ K, { kNA, kNA, kNA, 138, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Potassium
{ Ca, { kNA, kNA, kNA, kNA, 100, kNA, kNA, kNA, kNA, kNA, kNA } }, // Calcium
{ Sc, { kNA, kNA, kNA, kNA, kNA, 74.5, kNA, kNA, kNA, kNA, kNA } }, // Scandium
{ Ti, { kNA, kNA, kNA, kNA, 86, 67, 60.5, kNA, kNA, kNA, kNA } }, // Titanium
{ V, { kNA, kNA, kNA, kNA, 79, 64, 58, 54, kNA, kNA, kNA } }, // Vanadium
{ Cr, { kNA, kNA, kNA, kNA, 73, 61.5, 55, 49, 44, kNA, kNA } }, // Chromium ls
{ Cr, { kNA, kNA, kNA, kNA, 80, kNA, kNA, kNA, kNA, kNA, kNA } }, // Chromium hs
{ Mn, { kNA, kNA, kNA, kNA, 67, 58, 53, 33, 25.5, 46, kNA } }, // Manganese ls
{ Mn, { kNA, kNA, kNA, kNA, 83, 64.5, kNA, kNA, kNA, kNA, kNA } }, // Manganese hs
{ Fe, { kNA, kNA, kNA, kNA, 61, 55, 58.5, kNA, 25, kNA, kNA } }, // Iron ls
{ Fe, { kNA, kNA, kNA, kNA, 78, 64.5, kNA, kNA, kNA, kNA, kNA } }, // Iron hs
{ Co, { kNA, kNA, kNA, kNA, 65, 54.5, kNA, kNA, kNA, kNA, kNA } }, // Cobalt ls
{ Co, { kNA, kNA, kNA, kNA, 74.5, 61, 53, kNA, kNA, kNA, kNA } }, // Cobalt hs
{ Ni, { kNA, kNA, kNA, kNA, 69, 56, 48, kNA, kNA, kNA, kNA } }, // Nickel ls
{ Ni, { kNA, kNA, kNA, kNA, kNA, 60, kNA, kNA, kNA, kNA, kNA } }, // Nickel hs
{ Cu, { kNA, kNA, kNA, 77, 73, 54, kNA, kNA, kNA, kNA, kNA } }, // Copper
{ Zn, { kNA, kNA, kNA, kNA, 74, kNA, kNA, kNA, kNA, kNA, kNA } }, // Zinc
{ Ga, { kNA, kNA, kNA, kNA, kNA, 62, kNA, kNA, kNA, kNA, kNA } }, // Gallium
{ Ge, { kNA, kNA, kNA, kNA, 73, kNA, 53, kNA, kNA, kNA, kNA } }, // Germanium
{ As, { kNA, kNA, kNA, kNA, kNA, 58, kNA, 46, kNA, kNA, kNA } }, // Arsenic
{ Se, { kNA, 198, kNA, kNA, kNA, kNA, 50, kNA, 42, kNA, kNA } }, // Selenium
{ Br, { kNA, kNA, 196, kNA, kNA, 59, kNA, 31, kNA, 39, kNA } }, // Bromine
{ Rb, { kNA, kNA, kNA, 152, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Rubidium
{ Sr, { kNA, kNA, kNA, kNA, 118, kNA, kNA, kNA, kNA, kNA, kNA } }, // Strontium
{ Y, { kNA, kNA, kNA, kNA, kNA, 90, kNA, kNA, kNA, kNA, kNA } }, // Yttrium
{ Zr, { kNA, kNA, kNA, kNA, kNA, kNA, 72, kNA, kNA, kNA, kNA } }, // Zirconium
{ Nb, { kNA, kNA, kNA, kNA, kNA, 72, 68, 64, kNA, kNA, kNA } }, // Niobium
{ Mo, { kNA, kNA, kNA, kNA, kNA, 69, 65, 61, 59, kNA, kNA } }, // Molybdenum
{ Tc, { kNA, kNA, kNA, kNA, kNA, kNA, 64.5, 60, kNA, 56, kNA } }, // Technetium
{ Ru, { kNA, kNA, kNA, kNA, kNA, 68, 62, 56.5, kNA, 38, 36 } }, // Ruthenium
{ Rh, { kNA, kNA, kNA, kNA, kNA, 66.5, 60, 55, kNA, kNA, kNA } }, // Rhodium
{ Pd, { kNA, kNA, kNA, 59, 86, 76, 61.5, kNA, kNA, kNA, kNA } }, // Palladium
{ Ag, { kNA, kNA, kNA, 115, 94, 75, kNA, kNA, kNA, kNA, kNA } }, // Silver
{ Cd, { kNA, kNA, kNA, kNA, 95, kNA, kNA, kNA, kNA, kNA, kNA } }, // Cadmium
{ In, { kNA, kNA, kNA, kNA, kNA, 80, kNA, kNA, kNA, kNA, kNA } }, // Indium
{ Sn, { kNA, kNA, kNA, kNA, 118, kNA, 69, kNA, kNA, kNA, kNA } }, // Tin
{ Sb, { kNA, kNA, kNA, kNA, kNA, 76, kNA, 60, kNA, kNA, kNA } }, // Antimony
{ Te, { kNA, 221, kNA, kNA, kNA, kNA, 97, kNA, 56, kNA, kNA } }, // Tellurium
{ I, { kNA, kNA, 220, kNA, kNA, kNA, kNA, 95, kNA, 53, kNA } }, // Iodine
{ Xe, { kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, 48 } }, // Xenon
{ Cs, { kNA, kNA, kNA, 167, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Caesium
{ Ba, { kNA, kNA, kNA, kNA, 135, kNA, kNA, kNA, kNA, kNA, kNA } }, // Barium
{ La, { kNA, kNA, kNA, kNA, kNA, 103.2, kNA, kNA, kNA, kNA, kNA } }, // Lanthanum
{ Ce, { kNA, kNA, kNA, kNA, kNA, 101, 87, kNA, kNA, kNA, kNA } }, // Cerium
{ Pr, { kNA, kNA, kNA, kNA, kNA, 99, 85, kNA, kNA, kNA, kNA } }, // Praseodymium
{ Nd, { kNA, kNA, kNA, kNA, 129, 98.3, kNA, kNA, kNA, kNA, kNA } }, // Neodymium
{ Pm, { kNA, kNA, kNA, kNA, kNA, 97, kNA, kNA, kNA, kNA, kNA } }, // Promethium
{ Sm, { kNA, kNA, kNA, kNA, 122, 95.8, kNA, kNA, kNA, kNA, kNA } }, // Samarium
{ Eu, { kNA, kNA, kNA, kNA, 117, 94.7, kNA, kNA, kNA, kNA, kNA } }, // Europium
{ Gd, { kNA, kNA, kNA, kNA, kNA, 93.5, kNA, kNA, kNA, kNA, kNA } }, // Gadolinium
{ Tb, { kNA, kNA, kNA, kNA, kNA, 92.3, 76, kNA, kNA, kNA, kNA } }, // Terbium
{ Dy, { kNA, kNA, kNA, kNA, 107, 91.2, kNA, kNA, kNA, kNA, kNA } }, // Dysprosium
{ Ho, { kNA, kNA, kNA, kNA, kNA, 90.1, kNA, kNA, kNA, kNA, kNA } }, // Holmium
{ Er, { kNA, kNA, kNA, kNA, kNA, 89, kNA, kNA, kNA, kNA, kNA } }, // Erbium
{ Tm, { kNA, kNA, kNA, kNA, 103, 88, kNA, kNA, kNA, kNA, kNA } }, // Thulium
{ Yb, { kNA, kNA, kNA, kNA, 102, 86.8, kNA, kNA, kNA, kNA, kNA } }, // Ytterbium
{ Lu, { kNA, kNA, kNA, kNA, kNA, 86.1, kNA, kNA, kNA, kNA, kNA } }, // Lutetium
{ Hf, { kNA, kNA, kNA, kNA, kNA, kNA, 71, kNA, kNA, kNA, kNA } }, // Hafnium
{ Ta, { kNA, kNA, kNA, kNA, kNA, 72, 68, 64, kNA, kNA, kNA } }, // Tantalum
{ W, { kNA, kNA, kNA, kNA, kNA, kNA, 66, 62, 60, kNA, kNA } }, // Tungsten
{ Re, { kNA, kNA, kNA, kNA, kNA, kNA, 63, 58, 55, 53, kNA } }, // Rhenium
{ Os, { kNA, kNA, kNA, kNA, kNA, kNA, 63, 57.5, 54.5, 52.5, 39 } }, // Osmium
{ Ir, { kNA, kNA, kNA, kNA, kNA, 68, 62.5, 57, kNA, kNA, kNA } }, // Iridium
{ Pt, { kNA, kNA, kNA, kNA, 80, kNA, 62.5, 57, kNA, kNA, kNA } }, // Platinum
{ Au, { kNA, kNA, kNA, 137, kNA, 85, kNA, 57, kNA, kNA, kNA } }, // Gold
{ Hg, { kNA, kNA, kNA, 119, 102, kNA, kNA, kNA, kNA, kNA, kNA } }, // Mercury
{ Tl, { kNA, kNA, kNA, 150, kNA, 88.5, kNA, kNA, kNA, kNA, kNA } }, // Thallium
{ Pb, { kNA, kNA, kNA, kNA, 119, kNA, 77.5, kNA, kNA, kNA, kNA } }, // Lead
{ Bi, { kNA, kNA, kNA, kNA, kNA, 103, kNA, 76, kNA, kNA, kNA } }, // Bismuth
{ Po, { kNA, 223, kNA, kNA, kNA, kNA, 94, kNA, 67, kNA, kNA } }, // Polonium
{ At, { kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, 62, kNA } }, // Astatine
{ Fr, { kNA, kNA, kNA, 180, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Francium
{ Ra, { kNA, kNA, kNA, kNA, 148, kNA, kNA, kNA, kNA, kNA, kNA } }, // Radium
{ Ac, { kNA, kNA, kNA, kNA, kNA, 106.5, kNA, kNA, kNA, kNA, kNA } }, // Actinium
{ Th, { kNA, kNA, kNA, kNA, kNA, kNA, 94, kNA, kNA, kNA, kNA } }, // Thorium
{ Pa, { kNA, kNA, kNA, kNA, kNA, 104, 90, 78, kNA, kNA, kNA } }, // Protactinium
{ U, { kNA, kNA, kNA, kNA, kNA, 102.5, 89, 76, 73, kNA, kNA } }, // Uranium
{ Np, { kNA, kNA, kNA, kNA, 110, 101, 87, 75, 72, 71, kNA } }, // Neptunium
{ Pu, { kNA, kNA, kNA, kNA, kNA, 100, 86, 74, 71, kNA, kNA } }, // Plutonium
{ Am, { kNA, kNA, kNA, kNA, 126, 97.5, 85, kNA, kNA, kNA, kNA } }, // Americium
{ Cm, { kNA, kNA, kNA, kNA, kNA, 97, 85, kNA, kNA, kNA, kNA } }, // Curium
{ Bk, { kNA, kNA, kNA, kNA, kNA, 96, 83, kNA, kNA, kNA, kNA } }, // Berkelium
{ Cf, { kNA, kNA, kNA, kNA, kNA, 95, 82.1, kNA, kNA, kNA, kNA } }, // Californium
{ Es, { kNA, kNA, kNA, kNA, kNA, 83.5, kNA, kNA, kNA, kNA, kNA } }, // Einsteinium
};
// --------------------------------------------------------------------
// The coefficients from Waasmaier & Kirfel (1995), Acta Cryst. A51, 416-431.
struct SFDataArrayElement
{
AtomType symbol;
int8_t charge;
AtomTypeTraits::SFData sf;
};
SFDataArrayElement kWKSFData[] = {
{ H, 0, {{ 0.413048, 0.294953, 0.187491, 0.080701, 0.023736, 0.000049},
{ 15.569946, 32.398468, 5.711404, 61.889874, 1.334118, 0.000000}}},
{ He, 0, {{ 0.732354, 0.753896, 0.283819, 0.190003, 0.039139, 0.000487},
{ 11.553918, 4.595831, 1.546299, 26.463964, 0.377523, 0.000000}}},
{ Li, 0, {{ 0.974637, 0.158472, 0.811855, 0.262416, 0.790108, 0.002542},
{ 4.334946, 0.342451, 97.102966,201.363831, 1.409234, 0.000000}}},
{ Be, 0, {{ 1.533712, 0.638283, 0.601052, 0.106139, 1.118414, 0.002511},
{ 42.662079, 0.595420, 99.106499, 0.151340, 1.843093, 0.000000}}},
{ B, 0, {{ 2.085185, 1.064580, 1.062788, 0.140515, 0.641784, 0.003823},
{ 23.494068, 1.137894, 61.238976, 0.114886, 0.399036, 0.000000}}},
{ C, 0, {{ 2.657506, 1.078079, 1.490909, -4.241070, 0.713791, 4.297983},
{ 14.780758, 0.776775, 42.086842, -0.000294, 0.239535, 0.000000}}},
{ N, 0, {{ 11.893780, 3.277479, 1.858092, 0.858927, 0.912985,-11.804902},
{ 0.000158, 10.232723, 30.344690, 0.656065, 0.217287, 0.000000}}},
{ O, 0, {{ 2.960427, 2.508818, 0.637853, 0.722838, 1.142756, 0.027014},
{ 14.182259, 5.936858, 0.112726, 34.958481, 0.390240, 0.000000}}},
{ F, 0, {{ 3.511943, 2.772244, 0.678385, 0.915159, 1.089261, 0.032557},
{ 10.687859, 4.380466, 0.093982, 27.255203, 0.313066, 0.000000}}},
{ Ne, 0, {{ 4.183749, 2.905726, 0.520513, 1.135641, 1.228065, 0.025576},
{ 8.175457, 3.252536, 0.063295, 21.813910, 0.224952, 0.000000}}},
{ Na, 0, {{ 4.910127, 3.081783, 1.262067, 1.098938, 0.560991, 0.079712},
{ 3.281434, 9.119178, 0.102763,132.013947, 0.405878, 0.000000}}},
{ Mg, 0, {{ 4.708971, 1.194814, 1.558157, 1.170413, 3.239403, 0.126842},
{ 4.875207,108.506081, 0.111516, 48.292408, 1.928171, 0.000000}}},
{ Al, 0, {{ 4.730796, 2.313951, 1.541980, 1.117564, 3.154754, 0.139509},
{ 3.628931, 43.051167, 0.095960,108.932388, 1.555918, 0.000000}}},
{ Si, 0, {{ 5.275329, 3.191038, 1.511514, 1.356849, 2.519114, 0.145073},
{ 2.631338, 33.730728, 0.081119, 86.288643, 1.170087, 0.000000}}},
{ P, 0, {{ 1.950541, 4.146930, 1.494560, 1.522042, 5.729711, 0.155233},
{ 0.908139, 27.044952, 0.071280, 67.520187, 1.981173, 0.000000}}},
{ S, 0, {{ 6.372157, 5.154568, 1.473732, 1.635073, 1.209372, 0.154722},
{ 1.514347, 22.092527, 0.061373, 55.445175, 0.646925, 0.000000}}},
{ Cl, 0, {{ 1.446071, 6.870609, 6.151801, 1.750347, 0.634168, 0.146773},
{ 0.052357, 1.193165, 18.343416, 46.398396, 0.401005, 0.000000}}},
{ Ar, 0, {{ 7.188004, 6.638454, 0.454180, 1.929593, 1.523654, 0.265954},
{ 0.956221, 15.339877, 15.339862, 39.043823, 0.062409, 0.000000}}},
{ K, 0, {{ 8.163991, 7.146945, 1.070140, 0.877316, 1.486434, 0.253614},
{ 12.816323, 0.808945,210.327011, 39.597652, 0.052821, 0.000000}}},
{ Ca, 0, {{ 8.593655, 1.477324, 1.436254, 1.182839, 7.113258, 0.196255},
{ 10.460644, 0.041891, 81.390381,169.847839, 0.688098, 0.000000}}},
{ Sc, 0, {{ 1.476566, 1.487278, 1.600187, 9.177463, 7.099750, 0.157765},
{ 53.131023, 0.035325,137.319489, 9.098031, 0.602102, 0.000000}}},
{ Ti, 0, {{ 9.818524, 1.522646, 1.703101, 1.768774, 7.082555, 0.102473},
{ 8.001879, 0.029763, 39.885422,120.157997, 0.532405, 0.000000}}},
{ V, 0, {{ 10.473575, 1.547881, 1.986381, 1.865616, 7.056250, 0.067744},
{ 7.081940, 0.026040, 31.909672,108.022842, 0.474882, 0.000000}}},
{ Cr, 0, {{ 11.007069, 1.555477, 2.985293, 1.347855, 7.034779, 0.065510},
{ 6.366281, 0.023987, 23.244839,105.774498, 0.429369, 0.000000}}},
{ Mn, 0, {{ 11.709542, 1.733414, 2.673141, 2.023368, 7.003180, -0.147293},
{ 5.597120, 0.017800, 21.788420, 89.517914, 0.383054, 0.000000}}},
{ Fe, 0, {{ 12.311098, 1.876623, 3.066177, 2.070451, 6.975185, -0.304931},
{ 5.009415, 0.014461, 18.743040, 82.767876, 0.346506, 0.000000}}},
{ Co, 0, {{ 12.914510, 2.481908, 3.466894, 2.106351, 6.960892, -0.936572},
{ 4.507138, 0.009126, 16.438129, 76.987320, 0.314418, 0.000000}}},
{ Ni, 0, {{ 13.521865, 6.947285, 3.866028, 2.135900, 4.284731, -2.762697},
{ 4.077277, 0.286763, 14.622634, 71.966080, 0.004437, 0.000000}}},
{ Cu, 0, {{ 14.014192, 4.784577, 5.056806, 1.457971, 6.932996, -3.254477},
{ 3.738280, 0.003744, 13.034982, 72.554794, 0.265666, 0.000000}}},
{ Zn, 0, {{ 14.741002, 6.907748, 4.642337, 2.191766, 38.424042,-36.915829},
{ 3.388232, 0.243315, 11.903689, 63.312130, 0.000397, 0.000000}}},
{ Ga, 0, {{ 15.758946, 6.841123, 4.121016, 2.714681, 2.395246, -0.847395},
{ 3.121754, 0.226057, 12.482196, 66.203621, 0.007238, 0.000000}}},
{ Ge, 0, {{ 16.540613, 1.567900, 3.727829, 3.345098, 6.785079, 0.018726},
{ 2.866618, 0.012198, 13.432163, 58.866047, 0.210974, 0.000000}}},
{ As, 0, {{ 17.025642, 4.503441, 3.715904, 3.937200, 6.790175, -2.984117},
{ 2.597739, 0.003012, 14.272119, 50.437996, 0.193015, 0.000000}}},
{ Se, 0, {{ 17.354071, 4.653248, 4.259489, 4.136455, 6.749163, -3.160982},
{ 2.349787, 0.002550, 15.579460, 45.181202, 0.177432, 0.000000}}},
{ Br, 0, {{ 17.550570, 5.411882, 3.937180, 3.880645, 6.707793, -2.492088},
{ 2.119226, 16.557184, 0.002481, 42.164009, 0.162121, 0.000000}}},
{ Kr, 0, {{ 17.655279, 6.848105, 4.171004, 3.446760, 6.685200, -2.810592},
{ 1.908231, 16.606236, 0.001598, 39.917473, 0.146896, 0.000000}}},
{ Rb, 0, {{ 8.123134, 2.138042, 6.761702, 1.156051, 17.679546, 1.139548},
{ 15.142385, 33.542667, 0.129372,224.132507, 1.713368, 0.000000}}},
{ Sr, 0, {{ 17.730219, 9.795867, 6.099763, 2.620025, 0.600053, 1.140251},
{ 1.563060, 14.310868, 0.120574,135.771317, 0.120574, 0.000000}}},
{ Y, 0, {{ 17.792040, 10.253252, 5.714949, 3.170516, 0.918251, 1.131787},
{ 1.429691, 13.132816, 0.112173,108.197029, 0.112173, 0.000000}}},
{ Zr, 0, {{ 17.859772, 10.911038, 5.821115, 3.512513, 0.746965, 1.124859},
{ 1.310692, 12.319285, 0.104353, 91.777542, 0.104353, 0.000000}}},
{ Nb, 0, {{ 17.958399, 12.063054, 5.007015, 3.287667, 1.531019, 1.123452},
{ 1.211590, 12.246687, 0.098615, 75.011948, 0.098615, 0.000000}}},
{ Mo, 0, {{ 6.236218, 17.987711, 12.973127, 3.451426, 0.210899, 1.108770},
{ 0.090780, 1.108310, 11.468720, 66.684151, 0.090780, 0.000000}}},
{ Tc, 0, {{ 17.840963, 3.428236, 1.373012, 12.947364, 6.335469, 1.074784},
{ 1.005729, 41.901382,119.320541, 9.781542, 0.083391, 0.000000}}},
{ Ru, 0, {{ 6.271624, 17.906738, 14.123269, 3.746008, 0.908235, 1.043992},
{ 0.077040, 0.928222, 9.555345, 35.860680,123.552246, 0.000000}}},
{ Rh, 0, {{ 6.216648, 17.919739, 3.854252, 0.840326, 15.173498, 0.995452},
{ 0.070789, 0.856121, 33.889484,121.686691, 9.029517, 0.000000}}},
{ Pd, 0, {{ 6.121511, 4.784063, 16.631683, 4.318258, 13.246773, 0.883099},
{ 0.062549, 0.784031, 8.751391, 34.489983, 0.784031, 0.000000}}},
{ Ag, 0, {{ 6.073874, 17.155437, 4.173344, 0.852238, 17.988686, 0.756603},
{ 0.055333, 7.896512, 28.443739,110.376106, 0.716809, 0.000000}}},
{ Cd, 0, {{ 6.080986, 18.019468, 4.018197, 1.303510, 17.974669, 0.603504},
{ 0.048990, 7.273646, 29.119284, 95.831207, 0.661231, 0.000000}}},
{ In, 0, {{ 6.196477, 18.816183, 4.050479, 1.638929, 17.962912, 0.333097},
{ 0.042072, 6.695665, 31.009790,103.284348, 0.610714, 0.000000}}},
{ Sn, 0, {{ 19.325171, 6.281571, 4.498866, 1.856934, 17.917318, 0.119024},
{ 6.118104, 0.036915, 32.529045, 95.037186, 0.565651, 0.000000}}},
{ Sb, 0, {{ 5.394956, 6.549570, 19.650681, 1.827820, 17.867832, -0.290506},
{ 33.326523, 0.030974, 5.564929, 87.130966, 0.523992, 0.000000}}},
{ Te, 0, {{ 6.660302, 6.940756, 19.847015, 1.557175, 17.802427, -0.806668},
{ 33.031654, 0.025750, 5.065547, 84.101616, 0.487660, 0.000000}}},
{ I, 0, {{ 19.884502, 6.736593, 8.110516, 1.170953, 17.548716, -0.448811},
{ 4.628591, 0.027754, 31.849096, 84.406387, 0.463550, 0.000000}}},
{ Xe, 0, {{ 19.978920, 11.774945, 9.332182, 1.244749, 17.737501, -6.065902},
{ 4.143356, 0.010142, 28.796200, 75.280685, 0.413616, 0.000000}}},
{ Cs, 0, {{ 17.418674, 8.314444, 10.323193, 1.383834, 19.876251, -2.322802},
{ 0.399828, 0.016872, 25.605827,233.339676, 3.826915, 0.000000}}},
{ Ba, 0, {{ 19.747343, 17.368477, 10.465718, 2.592602, 11.003653, -5.183497},
{ 3.481823, 0.371224, 21.226641,173.834274, 0.010719, 0.000000}}},
{ La, 0, {{ 19.966019, 27.329655, 11.018425, 3.086696, 17.335455,-21.745489},
{ 3.197408, 0.003446, 19.955492,141.381973, 0.341817, 0.000000}}},
{ Ce, 0, {{ 17.355122, 43.988499, 20.546650, 3.130670, 11.353665,-38.386017},
{ 0.328369, 0.002047, 3.088196,134.907654, 18.832960, 0.000000}}},
{ Pr, 0, {{ 21.551311, 17.161730, 11.903859, 2.679103, 9.564197, -3.871068},
{ 2.995675, 0.312491, 17.716705,152.192825, 0.010468, 0.000000}}},
{ Nd, 0, {{ 17.331244, 62.783924, 12.160097, 2.663483, 22.239950,-57.189842},
{ 0.300269, 0.001320, 17.026001,148.748993, 2.910268, 0.000000}}},
{ Pm, 0, {{ 17.286388, 51.560162, 12.478557, 2.675515, 22.960947,-45.973682},
{ 0.286620, 0.001550, 16.223755,143.984512, 2.796480, 0.000000}}},
{ Sm, 0, {{ 23.700363, 23.072214, 12.777782, 2.684217, 17.204367,-17.452166},
{ 2.689539, 0.003491, 15.495437,139.862473, 0.274536, 0.000000}}},
{ Eu, 0, {{ 17.186195, 37.156837, 13.103387, 2.707246, 24.419271,-31.586687},
{ 0.261678, 0.001995, 14.787360,134.816299, 2.581883, 0.000000}}},
{ Gd, 0, {{ 24.898117, 17.104952, 13.222581, 3.266152, 48.995213,-43.505684},
{ 2.435028, 0.246961, 13.996325,110.863091, 0.001383, 0.000000}}},
{ Tb, 0, {{ 25.910013, 32.344139, 13.765117, 2.751404, 17.064405,-26.851971},
{ 2.373912, 0.002034, 13.481969,125.836510, 0.236916, 0.000000}}},
{ Dy, 0, {{ 26.671785, 88.687576, 14.065445, 2.768497, 17.067781,-83.279831},
{ 2.282593, 0.000665, 12.920230,121.937187, 0.225531, 0.000000}}},
{ Ho, 0, {{ 27.150190, 16.999819, 14.059334, 3.386979, 46.546471,-41.165253},
{ 2.169660, 0.215414, 12.213148,100.506783, 0.001211, 0.000000}}},
{ Er, 0, {{ 28.174887, 82.493271, 14.624002, 2.802756, 17.018515,-77.135223},
{ 2.120995, 0.000640, 11.915256,114.529938, 0.207519, 0.000000}}},
{ Tm, 0, {{ 28.925894, 76.173798, 14.904704, 2.814812, 16.998117,-70.839813},
{ 2.046203, 0.000656, 11.465375,111.411980, 0.199376, 0.000000}}},
{ Yb, 0, {{ 29.676760, 65.624069, 15.160854, 2.830288, 16.997850,-60.313812},
{ 1.977630, 0.000720, 11.044622,108.139153, 0.192110, 0.000000}}},
{ Lu, 0, {{ 30.122866, 15.099346, 56.314899, 3.540980, 16.943729,-51.049416},
{ 1.883090, 10.342764, 0.000780, 89.559250, 0.183849, 0.000000}}},
{ Hf, 0, {{ 30.617033, 15.145351, 54.933548, 4.096253, 16.896156,-49.719837},
{ 1.795613, 9.934469, 0.000739, 76.189705, 0.175914, 0.000000}}},
{ Ta, 0, {{ 31.066359, 15.341823, 49.278297, 4.577665, 16.828321,-44.119026},
{ 1.708732, 9.618455, 0.000760, 66.346199, 0.168002, 0.000000}}},
{ W, 0, {{ 31.507900, 15.682498, 37.960129, 4.885509, 16.792112,-32.864574},
{ 1.629485, 9.446448, 0.000898, 59.980675, 0.160798, 0.000000}}},
{ Re, 0, {{ 31.888456, 16.117104, 42.390297, 5.211669, 16.767591,-37.412682},
{ 1.549238, 9.233474, 0.000689, 54.516373, 0.152815, 0.000000}}},
{ Os, 0, {{ 32.210297, 16.678440, 48.559906, 5.455839, 16.735533,-43.677956},
{ 1.473531, 9.049695, 0.000519, 50.210201, 0.145771, 0.000000}}},
{ Ir, 0, {{ 32.004436, 1.975454, 17.070105, 15.939454, 5.990003, 4.018893},
{ 1.353767, 81.014175, 0.128093, 7.661196, 26.659403, 0.000000}}},
{ Pt, 0, {{ 31.273891, 18.445440, 17.063745, 5.555933, 1.575270, 4.050394},
{ 1.316992, 8.797154, 0.124741, 40.177994, 1.316997, 0.000000}}},
{ Au, 0, {{ 16.777390, 19.317156, 32.979683, 5.595453, 10.576854, -6.279078},
{ 0.122737, 8.621570, 1.256902, 38.008820, 0.000601, 0.000000}}},
{ Hg, 0, {{ 16.839890, 20.023823, 28.428564, 5.881564, 4.714706, 4.076478},
{ 0.115905, 8.256927, 1.195250, 39.247227, 1.195250, 0.000000}}},
{ Tl, 0, {{ 16.630795, 19.386616, 32.808571, 1.747191, 6.356862, 4.066939},
{ 0.110704, 7.181401, 1.119730, 90.660263, 26.014978, 0.000000}}},
{ Pb, 0, {{ 16.419567, 32.738590, 6.530247, 2.342742, 19.916475, 4.049824},
{ 0.105499, 1.055049, 25.025890, 80.906593, 6.664449, 0.000000}}},
{ Bi, 0, {{ 16.282274, 32.725136, 6.678302, 2.694750, 20.576559, 4.040914},
{ 0.101180, 1.002287, 25.714146, 77.057549, 6.291882, 0.000000}}},
{ Po, 0, {{ 16.289164, 32.807171, 21.095163, 2.505901, 7.254589, 4.046556},
{ 0.098121, 0.966265, 6.046622, 76.598068, 28.096128, 0.000000}}},
{ At, 0, {{ 16.011461, 32.615547, 8.113899, 2.884082, 21.377867, 3.995684},
{ 0.092639, 0.904416, 26.543257, 68.372963, 5.499512, 0.000000}}},
{ Rn, 0, {{ 16.070229, 32.641106, 21.489658, 2.299218, 9.480184, 4.020977},
{ 0.090437, 0.876409, 5.239687, 69.188477, 27.632641, 0.000000}}},
{ Fr, 0, {{ 16.007385, 32.663830, 21.594351, 1.598497, 11.121192, 4.003472},
{ 0.087031, 0.840187, 4.954467,199.805801, 26.905106, 0.000000}}},
{ Ra, 0, {{ 32.563690, 21.396671, 11.298093, 2.834688, 15.914965, 3.981773},
{ 0.801980, 4.590666, 22.758972,160.404388, 0.083544, 0.000000}}},
{ Ac, 0, {{ 15.914053, 32.535042, 21.553976, 11.433394, 3.612409, 3.939212},
{ 0.080511, 0.770669, 4.352206, 21.381622,130.500748, 0.000000}}},
{ Th, 0, {{ 15.784024, 32.454899, 21.849222, 4.239077, 11.736191, 3.922533},
{ 0.077067, 0.735137, 4.097976,109.464111, 20.512138, 0.000000}}},
{ Pa, 0, {{ 32.740208, 21.973675, 12.957398, 3.683832, 15.744058, 3.886066},
{ 0.709545, 4.050881, 19.231543,117.255005, 0.074040, 0.000000}}},
{ U, 0, {{ 15.679275, 32.824306, 13.660459, 3.687261, 22.279434, 3.854444},
{ 0.071206, 0.681177, 18.236156,112.500038, 3.930325, 0.000000}}},
{ Np, 0, {{ 32.999901, 22.638077, 14.219973, 3.672950, 15.683245, 3.769391},
{ 0.657086, 3.854918, 17.435474,109.464485, 0.068033, 0.000000}}},
{ Pu, 0, {{ 33.281178, 23.148544, 15.153755, 3.031492, 15.704215, 3.664200},
{ 0.634999, 3.856168, 16.849735,121.292038, 0.064857, 0.000000}}},
{ Am, 0, {{ 33.435162, 23.657259, 15.576339, 3.027023, 15.746100, 3.541160},
{ 0.612785, 3.792942, 16.195778,117.757004, 0.061755, 0.000000}}},
{ Cm, 0, {{ 15.804837, 33.480801, 24.150198, 3.655563, 15.499866, 3.390840},
{ 0.058619, 0.590160, 3.674720,100.736191, 15.408296, 0.000000}}},
{ Bk, 0, {{ 15.889072, 33.625286, 24.710381, 3.707139, 15.839268, 3.213169},
{ 0.055503, 0.569571, 3.615472, 97.694786, 14.754303, 0.000000}}},
{ Cf, 0, {{ 33.794075, 25.467693, 16.048487, 3.657525, 16.008982, 3.005326},
{ 0.550447, 3.581973, 14.357388, 96.064972, 0.052450, 0.000000}}},
{ H, -1, {{ 0.702260, 0.763666, 0.248678, 0.261323, 0.023017, 0.000425},
{ 23.945604, 74.897919, 6.773289,233.583450, 1.337531, 0.000000}}},
{ Li, +1, {{ 0.432724, 0.549257, 0.376575, -0.336481, 0.976060, 0.001764},
{ 0.260367, 1.042836, 7.885294, 0.260368, 3.042539, 0.000000}}},
{ Be, +2, {{ 3.055430, -2.372617, 1.044914, 0.544233, 0.381737, -0.653773},
{ 0.001226, 0.001227, 1.542106, 0.456279, 4.047479, 0.000000}}},
{ C, AtomTypeTraits::kWKSFVal,
{{ 1.258489, 0.728215, 1.119856, 2.168133, 0.705239, 0.019722},
{ 10.683769, 0.208177, 0.836097, 24.603704, 58.954273, 0.000000}}},
{ O, -1, {{ 3.106934, 3.235142, 1.148886, 0.783981, 0.676953, 0.046136},
{ 19.868080, 6.960252, 0.170043, 65.693512, 0.630757, 0.000000}}},
{ O, -2, {{ 3.990247, 2.300563, 0.607200, 1.907882, 1.167080, 0.025429},
{ 16.639956, 5.636819, 0.108493, 47.299709, 0.379984, 0.000000}}},
{ F, -1, {{ 0.457649, 3.841561, 1.432771, 0.801876, 3.395041, 0.069525},
{ 0.917243, 5.507803, 0.164955, 51.076206, 15.821679, 0.000000}}},
{ Na, +1, {{ 3.148690, 4.073989, 0.767888, 0.995612, 0.968249, 0.045300},
{ 2.594987, 6.046925, 0.070139, 14.122657, 0.217037, 0.000000}}},
{ Mg, +2, {{ 3.062918, 4.135106, 0.853742, 1.036792, 0.852520, 0.058851},
{ 2.015803, 4.417941, 0.065307, 9.669710, 0.187818, 0.000000}}},
{ Al, +3, {{ 4.132015, 0.912049, 1.102425, 0.614876, 3.219136, 0.019397},
{ 3.528641, 7.378344, 0.133708, 0.039065, 1.644728, 0.000000}}},
{ Si, AtomTypeTraits::kWKSFVal,
{{ 2.879033, 3.072960, 1.515981, 1.390030, 4.995051, 0.146030},
{ 1.239713, 38.706276, 0.081481, 93.616333, 2.770293, 0.000000}}},
{ Si, +4, {{ 3.676722, 3.828496, 1.258033, 0.419024, 0.720421, 0.097266},
{ 1.446851, 3.013144, 0.064397, 0.206254, 5.970222, 0.000000}}},
{ Cl, -1, {{ 1.061802, 7.139886, 6.524271, 2.355626, 35.829403,-34.916603},
{ 0.144727, 1.171795, 19.467655, 60.320301, 0.000436, 0.000000}}},
{ K, +1, {{-17.609339, 1.494873, 7.150305, 10.899569, 15.808228, 0.257164},
{ 18.840979, 0.053453, 0.812940, 22.264105, 14.351593, 0.000000}}},
{ Ca, +2, {{ 8.501441, 12.880483, 9.765095, 7.156669, 0.711160,-21.013187},
{ 10.525848, -0.004033, 0.010692, 0.684443, 27.231771, 0.000000}}},
{ Sc, +3, {{ 7.104348, 1.511488,-53.669773, 38.404816, 24.532240, 0.118642},
{ 0.601957, 0.033386, 12.572138, 10.859736, 14.125230, 0.000000}}},
{ Ti, +2, {{ 7.040119, 1.496285, 9.657304, 0.006534, 1.649561, 0.150362},
{ 0.537072, 0.031914, 8.009958,201.800293, 24.039482, 0.000000}}},
{ Ti, +3, {{ 36.587933, 7.230255, -9.086077, 2.084594, 17.294008,-35.111282},
{ 0.000681, 0.522262, 5.262317, 15.881716, 6.149805, 0.000000}}},
{ Ti, +4, {{ 45.355537, 7.092900, 7.483858,-43.498817, 1.678915, -0.110628},
{ 9.252186, 0.523046, 13.082852, 10.193876, 0.023064, 0.000000}}},
{ V, +2, {{ 7.754356, 2.064100, 2.576998, 2.011404, 7.126177, -0.533379},
{ 7.066315, 0.014993, 7.066308, 22.055786, 0.467568, 0.000000}}},
{ V, +3, {{ 9.958480, 1.596350, 1.483442,-10.846044, 17.332867, 0.474921},
{ 6.763041, 0.056895, 17.750029, 0.328826, 0.388013, 0.000000}}},
{ V, +5, {{ 15.575018, 8.448095, 1.612040, -9.721855, 1.534029, 0.552676},
{ 0.682708, 5.566640, 10.527077, 0.907961, 0.066667, 0.000000}}},
{ Cr, +2, {{ 10.598877, 1.565858, 2.728280, 0.098064, 6.959321, 0.049870},
{ 6.151846, 0.023519, 17.432816, 54.002388, 0.426301, 0.000000}}},
{ Cr, +3, {{ 7.989310, 1.765079, 2.627125, 1.829380, 6.980908, -0.192123},
{ 6.068867, 0.018342, 6.068887, 16.309284, 0.420864, 0.000000}}},
{ Mn, +2, {{ 11.287712, 26.042414, 3.058096, 0.090258, 7.088306,-24.566132},
{ 5.506225, 0.000774, 16.158575, 54.766354, 0.375580, 0.000000}}},
{ Mn, +3, {{ 6.926972, 2.081342, 11.128379, 2.375107, -0.419287, -0.093713},
{ 0.378315, 0.015054, 5.379957, 14.429586, 0.004939, 0.000000}}},
{ Mn, +4, {{ 12.409131, 7.466993, 1.809947,-12.138477, 10.780248, 0.672146},
{ 0.300400, 0.112814, 12.520756, 0.168653, 5.173237, 0.000000}}},
{ Fe, +2, {{ 11.776765, 11.165097, 3.533495, 0.165345, 7.036932, -9.676919},
{ 4.912232, 0.001748, 14.166556, 42.381958, 0.341324, 0.000000}}},
{ Fe, +3, {{ 9.721638, 63.403847, 2.141347, 2.629274, 7.033846,-61.930725},
{ 4.869297, 0.000293, 4.867602, 13.539076, 0.338520, 0.000000}}},
{ Co, +2, {{ 6.993840, 26.285812, 12.254289, 0.246114, 4.017407,-24.796852},
{ 0.310779, 0.000684, 4.400528, 35.741447, 12.536393, 0.000000}}},
{ Co, +3, {{ 6.861739, 2.678570, 12.281889, 3.501741, -0.179384, -1.147345},
{ 0.309794, 0.008142, 4.331703, 11.914167, 11.914167, 0.000000}}},
{ Ni, +2, {{ 12.519017, 37.832058, 4.387257, 0.661552, 6.949072,-36.344471},
{ 3.933053, 0.000442, 10.449184, 23.860998, 0.283723, 0.000000}}},
{ Ni, +3, {{ 13.579366, 1.902844, 12.859268, 3.811005, -6.838595, -0.317618},
{ 0.313140, 0.012621, 3.906407, 10.894311, 0.344379, 0.000000}}},
{ Cu, +1, {{ 12.960763, 16.342150, 1.110102, 5.520682, 6.915452,-14.849320},
{ 3.576010, 0.000975, 29.523218, 10.114283, 0.261326, 0.000000}}},
{ Cu, +2, {{ 11.895569, 16.344978, 5.799817, 1.048804, 6.789088,-14.878383},
{ 3.378519, 0.000924, 8.133653, 20.526524, 0.254741, 0.000000}}},
{ Zn, +2, {{ 13.340772, 10.428857, 5.544489, 0.762295, 6.869172, -8.945248},
{ 3.215913, 0.001413, 8.542680, 21.891756, 0.239215, 0.000000}}},
{ Ga, +3, {{ 13.123875, 35.288189, 6.126979, 0.611551, 6.724807,-33.875122},
{ 2.809960, 0.000323, 6.831534, 16.784311, 0.212002, 0.000000}}},
{ Ge, +4, {{ 6.876636, 6.779091, 9.969591, 3.135857, 0.152389, 1.086542},
{ 2.025174, 0.176650, 3.573822, 7.685848, 16.677574, 0.000000}}},
{ Br, -1, {{ 17.714310, 6.466926, 6.947385, 4.402674, -0.697279, 1.152674},
{ 2.122554, 19.050768, 0.152708, 58.690361, 58.690372, 0.000000}}},
{ Rb, +1, {{ 17.684320, 7.761588, 6.680874, 2.668883, 0.070974, 1.133263},
{ 1.710209, 14.919863, 0.128542, 31.654478, 0.128543, 0.000000}}},
{ Sr, +2, {{ 17.694973, 1.275762, 6.154252, 9.234786, 0.515995, 1.125309},
{ 1.550888, 30.133041, 0.118774, 13.821799, 0.118774, 0.000000}}},
{ Y, +3, {{ 46.660366, 10.369686, 4.623042,-62.170834, 17.471146, 19.023842},
{ -0.019971, 13.180257, 0.176398, -0.016727, 1.467348, 0.000000}}},
{ Zr, +4, {{ 6.802956, 17.699253, 10.650647, -0.248108, 0.250338, 0.827902},
{ 0.096228, 1.296127, 11.240715, -0.219259, -0.219021, 0.000000}}},
{ Nb, +3, {{ 17.714323, 1.675213, 7.483963, 8.322464, 11.143573, -8.339573},
{ 1.172419, 30.102791, 0.080255, -0.002983, 10.456687, 0.000000}}},
{ Nb, +5, {{ 17.580206, 7.633277, 10.793497, 0.180884, 67.837921,-68.024780},
{ 1.165852, 0.078558, 9.507652, 31.621656, -0.000438, 0.000000}}},
{ Mo, +3, {{ 7.447050, 17.778122, 11.886068, 1.997905, 1.789626, -1.898764},
{ 0.072000, 1.073145, 9.834720, 28.221746, -0.011674, 0.000000}}},
{ Mo, +5, {{ 7.929879, 17.667669, 11.515987, 0.500402, 77.444084,-78.056595},
{ 0.068856, 1.068064, 9.046229, 26.558945, -0.000473, 0.000000}}},
{ Mo, +6, {{ 34.757683, 9.653037, 6.584769,-18.628115, 2.490594, 1.141916},
{ 1.301770, 7.123843, 0.094097, 1.617443, 12.335434, 0.000000}}},
{ Ru, +3, {{ 17.894758, 13.579529, 10.729251, 2.474095, 48.227997,-51.905243},
{ 0.902827, 8.740579, 0.045125, 24.764954, -0.001699, 0.000000}}},
{ Ru, +4, {{ 17.845776, 13.455084, 10.229087, 1.653524, 14.059795,-17.241762},
{ 0.901070, 8.482392, 0.045972, 23.015272, -0.004889, 0.000000}}},
{ Rh, +3, {{ 17.758621, 14.569813, 5.298320, 2.533579, 0.879753, 0.960843},
{ 0.841779, 8.319533, 0.069050, 23.709131, 0.069050, 0.000000}}},
{ Rh, +4, {{ 17.716188, 14.446654, 5.185801, 1.703448, 0.989992, 0.959941},
{ 0.840572, 8.100647, 0.068995, 22.357307, 0.068995, 0.000000}}},
{ Pd, +2, {{ 6.122282, 15.651012, 3.513508, 9.060790, 8.771199, 0.879336},
{ 0.062424, 8.018296, 24.784275, 0.776457, 0.776457, 0.000000}}},
{ Pd, +4, {{ 6.152421,-96.069023, 31.622141, 81.578255, 17.801403, 0.915874},
{ 0.063951, 11.090354, 13.466152, 9.758302, 0.783014, 0.000000}}},
{ Ag, +1, {{ 6.091192, 4.019526, 16.948174, 4.258638, 13.889437, 0.785127},
{ 0.056305, 0.719340, 7.758938, 27.368349, 0.719340, 0.000000}}},
{ Ag, +2, {{ 6.401808, 48.699802, 4.799859,-32.332523, 16.356710, 1.068247},
{ 0.068167, 0.942270, 20.639496, 1.100365, 6.883131, 0.000000}}},
{ Cd, +2, {{ 6.093711, 43.909691, 17.041306,-39.675117, 17.958918, 0.664795},
{ 0.050624, 8.654143, 15.621396, 11.082067, 0.667591, 0.000000}}},
{ In, +3, {{ 6.206277, 18.497746, 3.078131, 10.524613, 7.401234, 0.293677},
{ 0.041357, 6.605563, 18.792250, 0.608082, 0.608082, 0.000000}}},
{ Sn, +2, {{ 6.353672, 4.770377, 14.672025, 4.235959, 18.002131, -0.042519},
{ 0.034720, 6.167891, 6.167879, 29.006456, 0.561774, 0.000000}}},
{ Sn, +4, {{ 15.445732, 6.420892, 4.562980, 1.713385, 18.033537, -0.172219},
{ 6.280898, 0.033144, 6.280899, 17.983601, 0.557980, 0.000000}}},
{ Sb, +3, {{ 10.189171, 57.461918, 19.356573, 4.862206,-45.394096, 1.516108},
{ 0.089485, 0.375256, 5.357987, 22.153736, 0.297768, 0.000000}}},
{ Sb, +5, {{ 17.920622, 6.647932, 12.724075, 1.555545, 7.600591, -0.445371},
{ 0.522315, 0.029487, 5.718210, 16.433775, 5.718204, 0.000000}}},
{ I, -1, {{ 20.010330, 17.835524, 8.104130, 2.231118, 9.158548, -3.341004},
{ 4.565931, 0.444266, 32.430672, 95.149040, 0.014906, 0.000000}}},
{ Cs, +1, {{ 19.939056, 24.967621, 10.375884, 0.454243, 17.660248,-19.394306},
{ 3.770511, 0.004040, 25.311275, 76.537766, 0.384730, 0.000000}}},
{ Ba, +2, {{ 19.750200, 17.513683, 10.884892, 0.321585, 65.149834,-59.618172},
{ 3.430748, 0.361590, 21.358307, 70.309402, 0.001418, 0.000000}}},
{ La, +3, {{ 19.688887, 17.345703, 11.356296, 0.099418, 82.358124,-76.846909},
{ 3.146211, 0.339586, 18.753832, 90.345459, 0.001072, 0.000000}}},
{ Ce, +3, {{ 26.593231, 85.866432, -6.677695, 12.111847, 17.401903,-80.313423},
{ 3.280381, 0.001012, 4.313575, 17.868504, 0.326962, 0.000000}}},
{ Ce, +4, {{ 17.457533, 25.659941, 11.691037, 19.695251,-16.994749, -3.515096},
{ 0.311812, -0.003793, 16.568687, 2.886395, -0.008931, 0.000000}}},
{ Pr, +3, {{ 20.879841, 36.035797, 12.135341, 0.283103, 17.167803,-30.500784},
{ 2.870897, 0.002364, 16.615236, 53.909359, 0.306993, 0.000000}}},
{ Pr, +4, {{ 17.496082, 21.538509, 20.403114, 12.062211, -7.492043, -9.016722},
{ 0.294457, -0.002742, 2.772886, 15.804613, -0.013556, 0.000000}}},
{ Nd, +3, {{ 17.120077, 56.038139, 21.468307, 10.000671, 2.905866,-50.541992},
{ 0.291295, 0.001421, 2.743681, 14.581367, 22.485098, 0.000000}}},
{ Pm, +3, {{ 22.221066, 17.068142, 12.805423, 0.435687, 52.238770,-46.767181},
{ 2.635767, 0.277039, 14.927315, 45.768017, 0.001455, 0.000000}}},
{ Sm, +3, {{ 15.618565, 19.538092, 13.398946, -4.358811, 24.490461, -9.714854},
{ 0.006001, 0.306379, 14.979594, 0.748825, 2.454492, 0.000000}}},
{ Eu, +2, {{ 23.899035, 31.657497, 12.955752, 1.700576, 16.992199,-26.204315},
{ 2.467332, 0.002230, 13.625002, 35.089481, 0.253136, 0.000000}}},
{ Eu, +3, {{ 17.758327, 33.498665, 24.067188, 13.436883, -9.019134,-19.768026},
{ 0.244474, -0.003901, 2.487526, 14.568011, -0.015628, 0.000000}}},
{ Gd, +3, {{ 24.344999, 16.945311, 13.866931, 0.481674, 93.506378,-88.147179},
{ 2.333971, 0.239215, 12.982995, 43.876347, 0.000673, 0.000000}}},
{ Tb, +3, {{ 24.878252, 16.856016, 13.663937, 1.279671, 39.271294,-33.950317},
{ 2.223301, 0.227290, 11.812528, 29.910065, 0.001527, 0.000000}}},
{ Dy, +3, {{ 16.864344, 90.383461, 13.675473, 1.687078, 25.540651,-85.150650},
{ 0.216275, 0.000593, 11.121207, 26.250975, 2.135930, 0.000000}}},
{ Ho, +3, {{ 16.837524, 63.221336, 13.703766, 2.061602, 26.202621,-58.026505},
{ 0.206873, 0.000796, 10.500283, 24.031883, 2.055060, 0.000000}}},
{ Er, +3, {{ 16.810127, 22.681061, 13.864114, 2.294506, 26.864477,-17.513460},
{ 0.198293, 0.002126, 9.973341, 22.836388, 1.979442, 0.000000}}},
{ Tm, +3, {{ 16.787500, 15.350905, 14.182357, 2.299111, 27.573771,-10.192087},
{ 0.190852, 0.003036, 9.602934, 22.526880, 1.912862, 0.000000}}},
{ Yb, +2, {{ 28.443794, 16.849527, 14.165081, 3.445311, 28.308853,-23.214935},
{ 1.863896, 0.183811, 9.225469, 23.691355, 0.001463, 0.000000}}},
{ Yb, +3, {{ 28.191629, 16.828087, 14.167848, 2.744962, 23.171774,-18.103676},
{ 1.842889, 0.182788, 9.045957, 20.799847, 0.001759, 0.000000}}},
{ Lu, +3, {{ 28.828693, 16.823227, 14.247617, 3.079559, 25.647667,-20.626528},
{ 1.776641, 0.175560, 8.575531, 19.693701, 0.001453, 0.000000}}},
{ Hf, +4, {{ 29.267378, 16.792543, 14.785310, 2.184128, 23.791996,-18.820383},
{ 1.697911, 0.168313, 8.190025, 18.277578, 0.001431, 0.000000}}},
{ Ta, +5, {{ 29.539469, 16.741854, 15.182070, 1.642916, 16.437447,-11.542459},
{ 1.612934, 0.160460, 7.654408, 17.070732, 0.001858, 0.000000}}},
{ W, +6, {{ 29.729357, 17.247808, 15.184488, 1.154652, 0.739335, 3.945157},
{ 1.501648, 0.140803, 6.880573, 14.299601, 14.299618, 0.000000}}},
{ Os, +4, {{ 17.113485, 15.792370, 23.342392, 4.090271, 7.671292, 3.988390},
{ 0.131850, 7.288542, 1.389307, 19.629425, 1.389307, 0.000000}}},
{ Ir, +3, {{ 31.537575, 16.363338, 15.597141, 5.051404, 1.436935, 4.009459},
{ 1.334144, 7.451918, 0.127514, 21.705648, 0.127515, 0.000000}}},
{ Ir, +4, {{ 30.391249, 16.146996, 17.019068, 4.458904, 0.975372, 4.006865},
{ 1.328519, 7.181766, 0.127337, 19.060146, 1.328519, 0.000000}}},
{ Pt, +2, {{ 31.986849, 17.249048, 15.269374, 5.760234, 1.694079, 4.032512},
{ 1.281143, 7.625512, 0.123571, 24.190826, 0.123571, 0.000000}}},
{ Pt, +4, {{ 41.932713, 16.339224, 17.653894, 6.012420,-12.036877, 4.094551},
{ 1.111409, 6.466086, 0.128917, 16.954155, 0.778721, 0.000000}}},
{ Au, +1, {{ 32.124306, 16.716476, 16.814100, 7.311565, 0.993064, 4.040792},
{ 1.216073, 7.165378, 0.118715, 20.442486, 53.095985, 0.000000}}},
{ Au, +3, {{ 31.704271, 17.545767, 16.819551, 5.522640, 0.361725, 4.042679},
{ 1.215561, 7.220506, 0.118812, 20.050970, 1.215562, 0.000000}}},
{ Hg, +1, {{ 28.866837, 19.277540, 16.776051, 6.281459, 3.710289, 4.068430},
{ 1.173967, 7.583842, 0.115351, 29.055994, 1.173968, 0.000000}}},
{ Hg, +2, {{ 32.411079, 18.690371, 16.711773, 9.974835, -3.847611, 4.052869},
{ 1.162980, 7.329806, 0.114518, 22.009489, 22.009493, 0.000000}}},
{ Tl, +1, {{ 32.295044, 16.570049, 17.991013, 1.535355, 7.554591, 4.054030},
{ 1.101544, 0.110020, 6.528559, 52.495068, 20.338634, 0.000000}}},
{ Tl, +3, {{ 32.525639, 19.139185, 17.100321, 5.891115, 12.599463, -9.256075},
{ 1.094966, 6.900992, 0.103667, 18.489614, -0.001401, 0.000000}}},
{ Pb, +2, {{ 27.392647, 16.496822, 19.984501, 6.813923, 5.233910, 4.065623},
{ 1.058874, 0.106305, 6.708123, 24.395554, 1.058874, 0.000000}}},
{ Pb, +4, {{ 32.505657, 20.014240, 14.645661, 5.029499, 1.760138, 4.044678},
{ 1.047035, 6.670321, 0.105279, 16.525040, 0.105279, 0.000000}}},
{ Bi, +3, {{ 32.461437, 19.438683, 16.302486, 7.322662, 0.431704, 4.043703},
{ 0.997930, 6.038867, 0.101338, 18.371586, 46.361046, 0.000000}}},
{ Bi, +5, {{ 16.734028, 20.580494, 9.452623, 61.155834,-34.041023, 4.113663},
{ 0.105076, 4.773282, 11.762162, 1.211775, 1.619408, 0.000000}}},
{ Ra, +2, {{ 4.986228, 32.474945, 21.947443, 11.800013, 10.807292, 3.956572},
{ 0.082597, 0.791468, 4.608034, 24.792431, 0.082597, 0.000000}}},
{ Ac, +3, {{ 15.584983, 32.022125, 21.456327, 0.757593, 12.341252, 3.838984},
{ 0.077438, 0.739963, 4.040735, 47.525002, 19.406845, 0.000000}}},
{ Th, +4, {{ 15.515445, 32.090691, 13.996399, 12.918157, 7.635514, 3.831122},
{ 0.074499, 0.711663, 3.871044, 18.596891, 3.871044, 0.000000}}},
{ U, +3, {{ 15.360309, 32.395657, 21.961290, 1.325894, 14.251453, 3.706622},
{ 0.067815, 0.654643, 3.643409, 39.604965, 16.330570, 0.000000}}},
{ U, +4, {{ 15.355091, 32.235306, 0.557745, 14.396367, 21.751173, 3.705863},
{ 0.067789, 0.652613, 42.354237, 15.908239, 3.553231, 0.000000}}},
{ U, +6, {{ 15.333844, 31.770849, 21.274414, 13.872636, 0.048519, 3.700591},
{ 0.067644, 0.646384, 3.317894, 14.650250, 75.339699, 0.000000}}},
{ Np, +3, {{ 15.378152, 32.572132, 22.206125, 1.413295, 14.828381, 3.603370},
{ 0.064613, 0.631420, 3.561936, 37.875511, 15.546129, 0.000000}}},
{ Np, +4, {{ 15.373926, 32.423019, 21.969994, 0.662078, 14.969350, 3.603039},
{ 0.064597, 0.629658, 3.476389, 39.438942, 15.135764, 0.000000}}},
{ Np, +6, {{ 15.359986, 31.992825, 21.412458, 0.066574, 14.568174, 3.600942},
{ 0.064528, 0.624505, 3.253441, 67.658318, 13.980832, 0.000000}}},
{ Pu, +3, {{ 15.356004, 32.769127, 22.680210, 1.351055, 15.416232, 3.428895},
{ 0.060590, 0.604663, 3.491509, 37.260635, 14.981921, 0.000000}}},
{ Pu, +4, {{ 15.416219, 32.610569, 22.256662, 0.719495, 15.518152, 3.480408},
{ 0.061456, 0.607938, 3.411848, 37.628792, 14.464360, 0.000000}}},
{ Pu, +6, {{ 15.436506, 32.289719, 14.726737, 15.012391, 7.024677, 3.502325},
{ 0.061815, 0.606541, 3.245363, 13.616438, 3.245364, 0.000000}}}
};
SFDataArrayElement kELSFData[] = {
{H, 0, {{ 0.034900, 0.120100, 0.197000, 0.057300, 0.119500 },
{ 0.534700, 3.586700, 12.347100, 18.952499, 38.626900 }}},
{He, 0, {{ 0.031700, 0.083800, 0.152600, 0.133400, 0.016400 },
{ 0.250700, 1.475100, 4.493800, 12.664600, 31.165300 }}},
{Li, 0, {{ 0.075000, 0.224900, 0.554800, 1.495400, 0.935400 },
{ 0.386400, 2.938300, 15.382900, 53.554501, 138.733704 }}},
{Be, 0, {{ 0.078000, 0.221000, 0.674000, 1.386700, 0.692500 },
{ 0.313100, 2.238100, 10.151700, 30.906099, 78.327301 }}},
{B, 0, {{ 0.090900, 0.255100, 0.773800, 1.213600, 0.460600 },
{ 0.299500, 2.115500, 8.381600, 24.129200, 63.131401 }}},
{C, 0, {{ 0.089300, 0.256300, 0.757000, 1.048700, 0.357500 },
{ 0.246500, 1.710000, 6.409400, 18.611300, 50.252300 }}},
{N, 0, {{ 0.102200, 0.321900, 0.798200, 0.819700, 0.171500 },
{ 0.245100, 1.748100, 6.192500, 17.389400, 48.143101 }}},
{O, 0, {{ 0.097400, 0.292100, 0.691000, 0.699000, 0.203900 },
{ 0.206700, 1.381500, 4.694300, 12.710500, 32.472599 }}},
{F, 0, {{ 0.108300, 0.317500, 0.648700, 0.584600, 0.142100 },
{ 0.205700, 1.343900, 4.278800, 11.393200, 28.788099 }}},
{Ne, 0, {{ 0.126900, 0.353500, 0.558200, 0.467400, 0.146000 },
{ 0.220000, 1.377900, 4.020300, 9.493400, 23.127800 }}},
{Na, 0, {{ 0.214200, 0.685300, 0.769200, 1.658900, 1.448200 },
{ 0.333400, 2.344600, 10.083000, 48.303699, 138.270004 }}},
{Mg, 0, {{ 0.231400, 0.686600, 0.967700, 2.188200, 1.133900 },
{ 0.327800, 2.272000, 10.924100, 39.289799, 101.974800 }}},
{Al, 0, {{ 0.239000, 0.657300, 1.201100, 2.558600, 1.231200 },
{ 0.313800, 2.106300, 10.416300, 34.455200, 98.534401 }}},
{Si, 0, {{ 0.251900, 0.637200, 1.379500, 2.508200, 1.050000 },
{ 0.307500, 2.017400, 9.674600, 29.374399, 80.473198 }}},
{P, 0, {{ 0.254800, 0.610600, 1.454100, 2.320400, 0.847700 },
{ 0.290800, 1.874000, 8.517600, 24.343399, 63.299599 }}},
{S, 0, {{ 0.249700, 0.562800, 1.389900, 2.186500, 0.771500 },
{ 0.268100, 1.671100, 7.026700, 19.537701, 50.388802 }}},
{Cl, 0, {{ 0.244300, 0.539700, 1.391900, 2.019700, 0.662100 },
{ 0.246800, 1.524200, 6.153700, 16.668699, 42.308601 }}},
{Ar, 0, {{ 0.238500, 0.501700, 1.342800, 1.889900, 0.607900 },
{ 0.228900, 1.369400, 5.256100, 14.092800, 35.536098 }}},
{K, 0, {{ 0.411500, 1.403100, 2.278400, 2.674200, 2.216200 },
{ 0.370300, 3.387400, 13.102900, 68.959198, 194.432907 }}},
{Ca, 0, {{ 0.405400, 1.388000, 2.160200, 3.753200, 2.206300 },
{ 0.349900, 3.099100, 11.960800, 53.935299, 142.389206 }}},
{Sc, 0, {{ 0.378700, 1.218100, 2.059400, 3.261800, 2.387000 },
{ 0.313300, 2.585600, 9.581300, 41.768799, 116.728203 }}},
{Ti, 0, {{ 0.382500, 1.259800, 2.000800, 3.061700, 2.069400 },
{ 0.304000, 2.486300, 9.278300, 39.075100, 109.458298 }}},
{V, 0, {{ 0.387600, 1.275000, 1.910900, 2.831400, 1.897900 },
{ 0.296700, 2.378000, 8.798100, 35.952801, 101.720100 }}},
{Cr, 0, {{ 0.404600, 1.369600, 1.894100, 2.080000, 1.219600 },
{ 0.298600, 2.395800, 9.140600, 37.470100, 113.712097 }}},
{Mn, 0, {{ 0.379600, 1.209400, 1.781500, 2.542000, 1.593700 },
{ 0.269900, 2.045500, 7.472600, 31.060400, 91.562202 }}},
{Fe, 0, {{ 0.394600, 1.272500, 1.703100, 2.314000, 1.479500 },
{ 0.271700, 2.044300, 7.600700, 29.971399, 86.226501 }}},
{Co, 0, {{ 0.411800, 1.316100, 1.649300, 2.193000, 1.283000 },
{ 0.274200, 2.037200, 7.720500, 29.968000, 84.938301 }}},
{Ni, 0, {{ 0.386000, 1.176500, 1.545100, 2.073000, 1.381400 },
{ 0.247800, 1.766000, 6.310700, 25.220400, 74.314598 }}},
{Cu, 0, {{ 0.431400, 1.320800, 1.523600, 1.467100, 0.856200 },
{ 0.269400, 1.922300, 7.347400, 28.989201, 90.624603 }}},
{Zn, 0, {{ 0.428800, 1.264600, 1.447200, 1.829400, 1.093400 },
{ 0.259300, 1.799800, 6.750000, 25.586000, 73.528397 }}},
{Ga, 0, {{ 0.481800, 1.403200, 1.656100, 2.460500, 1.105400 },
{ 0.282500, 1.978500, 8.754600, 32.523800, 98.552299 }}},
{Ge, 0, {{ 0.465500, 1.301400, 1.608800, 2.699800, 1.300300 },
{ 0.264700, 1.792600, 7.607100, 26.554100, 77.523804 }}},
{As, 0, {{ 0.451700, 1.222900, 1.585200, 2.795800, 1.263800 },
{ 0.249300, 1.643600, 6.815400, 22.368099, 62.039001 }}},
{Se, 0, {{ 0.447700, 1.167800, 1.584300, 2.808700, 1.195600 },
{ 0.240500, 1.544200, 6.323100, 19.461000, 52.023300 }}},
{Br, 0, {{ 0.479800, 1.194800, 1.869500, 2.695300, 0.820300 },
{ 0.250400, 1.596300, 6.965300, 19.849199, 50.323299 }}},
{Kr, 0, {{ 0.454600, 1.099300, 1.769600, 2.706800, 0.867200 },
{ 0.230900, 1.427900, 5.944900, 16.675200, 42.224300 }}},
{Rb, 0, {{ 1.016000, 2.852800, 3.546600, -7.780400, 12.114800 },
{ 0.485300, 5.092500, 25.785101, 130.451508, 138.677505 }}},
{Sr, 0, {{ 0.670300, 1.492600, 3.336800, 4.460000, 3.150100 },
{ 0.319000, 2.228700, 10.350400, 52.329102, 151.221603 }}},
{Y, 0, {{ 0.689400, 1.547400, 3.245000, 4.212600, 2.976400 },
{ 0.318900, 2.290400, 10.006200, 44.077099, 125.012001 }}},
{Zr, 0, {{ 0.671900, 1.468400, 3.166800, 3.955700, 2.892000 },
{ 0.303600, 2.124900, 8.923600, 36.845798, 108.204903 }}},
{Nb, 0, {{ 0.612300, 1.267700, 3.034800, 3.384100, 2.368300 },
{ 0.270900, 1.768300, 7.248900, 27.946501, 98.562401 }}},
{Mo, 0, {{ 0.677300, 1.479800, 3.178800, 3.082400, 1.838400 },
{ 0.292000, 2.060600, 8.112900, 30.533600, 100.065804 }}},
{Tc, 0, {{ 0.708200, 1.639200, 3.199300, 3.432700, 1.871100 },
{ 0.297600, 2.210600, 8.524600, 33.145599, 96.637703 }}},
{Ru, 0, {{ 0.673500, 1.493400, 3.096600, 2.725400, 1.559700 },
{ 0.277300, 1.971600, 7.324900, 26.689100, 90.558098 }}},
{Rh, 0, {{ 0.641300, 1.369000, 2.985400, 2.695200, 1.543300 },
{ 0.258000, 1.772100, 6.385400, 23.254900, 85.151703 }}},
{Pd, 0, {{ 0.590400, 1.177500, 2.651900, 2.287500, 0.868900 },
{ 0.232400, 1.501900, 5.159100, 15.542800, 46.821301 }}},
{Ag, 0, {{ 0.637700, 1.379000, 2.829400, 2.363100, 1.455300 },
{ 0.246600, 1.697400, 5.765600, 20.094299, 76.737198 }}},
{Cd, 0, {{ 0.636400, 1.424700, 2.780200, 2.597300, 1.788600 },
{ 0.240700, 1.682300, 5.658800, 20.721901, 69.110901 }}},
{In, 0, {{ 0.676800, 1.658900, 2.774000, 3.183500, 2.132600 },
{ 0.252200, 1.854500, 6.293600, 25.145700, 84.544800 }}},
{Sn, 0, {{ 0.722400, 1.961000, 2.716100, 3.560300, 1.897200 },
{ 0.265100, 2.060400, 7.301100, 27.549299, 81.334900 }}},
{Sb, 0, {{ 0.710600, 1.924700, 2.614900, 3.832200, 1.889900 },
{ 0.256200, 1.964600, 6.885200, 24.764799, 68.916801 }}},
{Te, 0, {{ 0.694700, 1.869000, 2.535600, 4.001300, 1.895500 },
{ 0.245900, 1.854200, 6.441100, 22.173000, 59.220600 }}},
{I, 0, {{ 0.704700, 1.948400, 2.594000, 4.152600, 1.505700 },
{ 0.245500, 1.863800, 6.763900, 21.800699, 56.439499 }}},
{Xe, 0, {{ 0.673700, 1.790800, 2.412900, 4.210000, 1.705800 },
{ 0.230500, 1.689000, 5.821800, 18.392799, 47.249599 }}},
{Cs, 0, {{ 1.270400, 3.801800, 5.661800, 0.920500, 4.810500 },
{ 0.435600, 4.205800, 23.434200, 136.778305, 171.756104 }}},
{Ba, 0, {{ 0.904900, 2.607600, 4.849800, 5.160300, 4.738800 },
{ 0.306600, 2.436300, 12.182100, 54.613499, 161.997803 }}},
{La, 0, {{ 0.840500, 2.386300, 4.613900, 5.151400, 4.794900 },
{ 0.279100, 2.141000, 10.340000, 41.914799, 132.020401 }}},
{Ce, 0, {{ 0.855100, 2.391500, 4.577200, 5.027800, 4.511800 },
{ 0.280500, 2.120000, 10.180800, 42.063301, 130.989304 }}},
{Pr, 0, {{ 0.909600, 2.531300, 4.526600, 4.637600, 4.369000 },
{ 0.293900, 2.247100, 10.826600, 48.884201, 147.602005 }}},
{Nd, 0, {{ 0.880700, 2.418300, 4.444800, 4.685800, 4.172500 },
{ 0.280200, 2.083600, 10.035700, 47.450600, 146.997604 }}},
{Pm, 0, {{ 0.947100, 2.546300, 4.352300, 4.478900, 3.908000 },
{ 0.297700, 2.227600, 10.576200, 49.361900, 145.358002 }}},
{Sm, 0, {{ 0.969900, 2.583700, 4.277800, 4.457500, 3.598500 },
{ 0.300300, 2.244700, 10.648700, 50.799400, 146.417892 }}},
{Eu, 0, {{ 0.869400, 2.241300, 3.919600, 3.969400, 4.549800 },
{ 0.265300, 1.859000, 8.399800, 36.739700, 125.708900 }}},
{Gd, 0, {{ 0.967300, 2.470200, 4.114800, 4.497200, 3.209900 },
{ 0.290900, 2.101400, 9.706700, 43.426998, 125.947403 }}},
{Tb, 0, {{ 0.932500, 2.367300, 3.879100, 3.967400, 3.799600 },
{ 0.276100, 1.951100, 8.929600, 41.593700, 131.012207 }}},
{Dy, 0, {{ 0.950500, 2.370500, 3.821800, 4.047100, 3.445100 },
{ 0.277300, 1.946900, 8.886200, 43.093800, 133.139603 }}},
{Ho, 0, {{ 0.924800, 2.242800, 3.618200, 3.791000, 3.791200 },
{ 0.266000, 1.818300, 7.965500, 33.112900, 101.813904 }}},
{Er, 0, {{ 1.037300, 2.482400, 3.655800, 3.892500, 3.005600 },
{ 0.294400, 2.079700, 9.415600, 45.805599, 132.772003 }}},
{Tm, 0, {{ 1.007500, 2.378700, 3.544000, 3.693200, 3.175900 },
{ 0.281600, 1.948600, 8.716200, 41.841999, 125.031998 }}},
{Yb, 0, {{ 1.034700, 2.391100, 3.461900, 3.655600, 3.005200 },
{ 0.285500, 1.967900, 8.761900, 42.330399, 125.649902 }}},
{Lu, 0, {{ 0.992700, 2.243600, 3.355400, 3.781300, 3.099400 },
{ 0.270100, 1.807300, 7.811200, 34.484901, 103.352600 }}},
{Hf, 0, {{ 1.029500, 2.291100, 3.411000, 3.949700, 2.492500 },
{ 0.276100, 1.862500, 8.096100, 34.271198, 98.529503 }}},
{Ta, 0, {{ 1.019000, 2.229100, 3.409700, 3.925200, 2.267900 },
{ 0.269400, 1.796200, 7.694400, 31.094200, 91.108902 }}},
{W, 0, {{ 0.985300, 2.116700, 3.357000, 3.798100, 2.279800 },
{ 0.256900, 1.674500, 7.009800, 26.923401, 81.390999 }}},
{Re, 0, {{ 0.991400, 2.085800, 3.453100, 3.881200, 1.852600 },
{ 0.254800, 1.651800, 6.884500, 26.723400, 81.721497 }}},
{Os, 0, {{ 0.981300, 2.032200, 3.366500, 3.623500, 1.974100 },
{ 0.248700, 1.597300, 6.473700, 23.281700, 70.925400 }}},
{Ir, 0, {{ 1.019400, 2.064500, 3.442500, 3.491400, 1.697600 },
{ 0.255400, 1.647500, 6.596600, 23.226900, 70.027199 }}},
{Pt, 0, {{ 0.914800, 1.809600, 3.213400, 3.295300, 1.575400 },
{ 0.226300, 1.381300, 5.324300, 17.598700, 60.017101 }}},
{Au, 0, {{ 0.967400, 1.891600, 3.399300, 3.052400, 1.260700 },
{ 0.235800, 1.471200, 5.675800, 18.711901, 61.528599 }}},
{Hg, 0, {{ 1.003300, 1.946900, 3.439600, 3.154800, 1.418000 },
{ 0.241300, 1.529800, 5.800900, 19.452000, 60.575298 }}},
{Tl, 0, {{ 1.068900, 2.103800, 3.603900, 3.492700, 1.828300 },
{ 0.254000, 1.671500, 6.350900, 23.153099, 78.709900 }}},
{Pb, 0, {{ 1.089100, 2.186700, 3.616000, 3.803100, 1.899400 },
{ 0.255200, 1.717400, 6.513100, 23.917000, 74.703903 }}},
{Bi, 0, {{ 1.100700, 2.230600, 3.568900, 4.154900, 2.038200 },
{ 0.254600, 1.735100, 6.494800, 23.646400, 70.377998 }}},
{Po, 0, {{ 1.156800, 2.435300, 3.645900, 4.406400, 1.717900 },
{ 0.264800, 1.878600, 7.174900, 25.176600, 69.282097 }}},
{At, 0, {{ 1.090900, 2.197600, 3.383100, 4.670000, 2.127700 },
{ 0.246600, 1.670700, 6.019700, 20.765699, 57.266300 }}},
{Rn, 0, {{ 1.075600, 2.163000, 3.317800, 4.885200, 2.048900 },
{ 0.240200, 1.616900, 5.764400, 19.456800, 52.500900 }}},
{Fr, 0, {{ 1.428200, 3.508100, 5.676700, 4.196400, 3.894600 },
{ 0.318300, 2.688900, 13.481600, 54.386600, 200.832108 }}},
{Ra, 0, {{ 1.312700, 3.124300, 5.298800, 5.389100, 5.413300 },
{ 0.288700, 2.289700, 10.827600, 43.538898, 145.610901 }}},
{Ac, 0, {{ 1.312800, 3.102100, 5.338500, 5.961100, 4.756200 },
{ 0.286100, 2.250900, 10.528700, 41.779598, 128.297302 }}},
{Th, 0, {{ 1.255300, 2.917800, 5.086200, 6.120600, 4.712200 },
{ 0.270100, 2.063600, 9.305100, 34.597698, 107.919998 }}},
{Pa, 0, {{ 1.321800, 3.144400, 5.437100, 5.644400, 4.010700 },
{ 0.282700, 2.225000, 10.245400, 41.116199, 124.444901 }}},
{U, 0, {{ 1.338200, 3.204300, 5.455800, 5.483900, 3.634200 },
{ 0.283800, 2.245200, 10.251900, 41.725101, 124.902298 }}},
{Np, 0, {{ 1.519300, 4.005300, 6.532700, -0.140200, 6.748900 },
{ 0.321300, 2.820600, 14.887800, 68.910301, 81.725700 }}},
{Pu, 0, {{ 1.351700, 3.293700, 5.321300, 4.646600, 3.571400 },
{ 0.281300, 2.241800, 9.995200, 42.793900, 132.173904 }}},
{Am, 0, {{ 1.213500, 2.796200, 4.754500, 4.573100, 4.478600 },
{ 0.248300, 1.843700, 7.542100, 29.384100, 112.457901 }}},
{Cm, 0, {{ 1.293700, 3.110000, 5.039300, 4.754600, 3.503100 },
{ 0.263800, 2.034100, 8.710100, 35.299198, 109.497200 }}},
{Bk, 0, {{ 1.291500, 3.102300, 4.930900, 4.600900, 3.466100 },
{ 0.261100, 2.002300, 8.437700, 34.155899, 105.891098 }}},
{Cf, 0, {{ 1.208900, 2.739100, 4.348200, 4.004700, 4.649700 },
{ 0.242100, 1.748700, 6.726200, 23.215300, 80.310799 }}}
};
} // namespace data
// --------------------------------------------------------------------
// AtomTypeTraits
AtomTypeTraits::AtomTypeTraits(const std::string& symbol)
: mInfo(nullptr)
{
for (auto& i: data::kKnownAtoms)
{
if (cif::iequals(i.symbol, symbol))
{
mInfo = &i;
break;
}
}
if (mInfo == nullptr)
throw std::invalid_argument("Not a known element: " + symbol);
}
AtomTypeTraits::AtomTypeTraits(AtomType t)
{
if (t < H or t >= data::kKnownAtomsCount)
throw std::invalid_argument("atomType out of range");
mInfo = &data::kKnownAtoms[t];
assert(mInfo->type == t);
}
bool AtomTypeTraits::isElement(const std::string& symbol)
{
bool result = false;
for (auto& i: data::kKnownAtoms)
{
if (cif::iequals(i.symbol, symbol))
{
result = true;
break;
}
}
return result;
}
bool AtomTypeTraits::isMetal(const std::string& symbol)
{
bool result = false;
for (auto& i: data::kKnownAtoms)
{
if (cif::iequals(i.symbol, symbol))
{
result = i.metal;
break;
}
}
return result;
}
auto AtomTypeTraits::wksf(int charge) const -> const SFData&
{
for (auto& sf: data::kWKSFData)
{
if (sf.symbol == mInfo->type and sf.charge == charge)
return sf.sf;
}
if (charge != 0)
{
// Oops, not found. Fall back to zero charge and see if we can use that
if (cif::VERBOSE > 0)
std::cerr << "No scattering factor found for " << name() << " with charge " << charge << " will try to fall back to zero charge..." << std::endl;
for (auto& sf: data::kWKSFData)
{
if (sf.symbol == mInfo->type and sf.charge == 0)
return sf.sf;
}
}
throw std::runtime_error("No scattering factor found for " + name() + std::to_string(charge));
}
auto AtomTypeTraits::elsf() const -> const SFData&
{
for (auto& sf: data::kELSFData)
{
if (sf.symbol == mInfo->type)
return sf.sf;
}
throw std::runtime_error("No scattering factor found for " + name());
}
// ionic radii
float AtomTypeTraits::crystal_ionic_radius(int charge) const
{
float result = data::kNA;
if (charge >= -3 and charge <= 8)
{
for (auto &r : data::kCrystalIonicRadii)
{
if (r.type != mInfo->type)
continue;
result = r.radii[charge < 0 ? charge + 3 : charge + 2] / 100.0f;
break;
}
}
return result;
}
float AtomTypeTraits::effective_ionic_radius(int charge) const
{
float result = data::kNA;
if (charge >= -3 and charge <= 8)
{
for (auto &r : data::kEffectiveIonicRadii)
{
if (r.type != mInfo->type)
continue;
result = r.radii[charge < 0 ? charge + 3 : charge + 2] / 100.0f;
break;
}
}
return result;
}
}
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cassert>
#include <algorithm>
#include <fstream>
#include <mutex>
#include <cif++/cif.hpp>
#include <cif++/structure/BondMap.hpp>
#include <cif++/structure/Compound.hpp>
#include <cif++/utilities.hpp>
namespace mmcif
{
namespace
{
union IDType
{
IDType()
: id_n(0)
{
}
IDType(const IDType &rhs)
: id_n(rhs.id_n)
{
}
IDType(const std::string &s)
: IDType()
{
assert(s.length() <= 4);
if (s.length() > 4)
throw BondMapException("Atom ID '" + s + "' is too long");
std::copy(s.begin(), s.end(), id_s);
}
IDType &operator=(const IDType &rhs)
{
id_n = rhs.id_n;
return *this;
}
IDType &operator=(const std::string &s)
{
id_n = 0;
assert(s.length() <= 4);
if (s.length() > 4)
throw BondMapException("Atom ID '" + s + "' is too long");
std::copy(s.begin(), s.end(), id_s);
return *this;
}
bool operator<(const IDType &rhs) const
{
return id_n < rhs.id_n;
}
bool operator<=(const IDType &rhs) const
{
return id_n <= rhs.id_n;
}
bool operator==(const IDType &rhs) const
{
return id_n == rhs.id_n;
}
bool operator!=(const IDType &rhs) const
{
return id_n != rhs.id_n;
}
char id_s[4];
uint32_t id_n;
};
static_assert(sizeof(IDType) == 4, "atom_id_type should be 4 bytes");
} // namespace
// --------------------------------------------------------------------
struct CompoundBondInfo
{
IDType mID;
std::set<std::tuple<uint32_t, uint32_t>> mBonded;
bool bonded(uint32_t a1, uint32_t a2) const
{
return mBonded.count({a1, a2}) > 0;
}
};
// --------------------------------------------------------------------
class CompoundBondMap
{
public:
static CompoundBondMap &instance()
{
static std::unique_ptr<CompoundBondMap> s_instance(new CompoundBondMap);
return *s_instance;
}
bool bonded(const std::string &compoundID, const std::string &atomID1, const std::string &atomID2);
private:
CompoundBondMap() {}
uint32_t getAtomID(const std::string &atomID)
{
IDType id(atomID);
uint32_t result;
auto i = mAtomIDIndex.find(id);
if (i == mAtomIDIndex.end())
{
result = uint32_t(mAtomIDIndex.size());
mAtomIDIndex[id] = result;
}
else
result = i->second;
return result;
}
std::map<IDType, uint32_t> mAtomIDIndex;
std::vector<CompoundBondInfo> mCompounds;
std::mutex mMutex;
};
bool CompoundBondMap::bonded(const std::string &compoundID, const std::string &atomID1, const std::string &atomID2)
{
std::lock_guard lock(mMutex);
using namespace std::literals;
IDType id(compoundID);
uint32_t a1 = getAtomID(atomID1);
uint32_t a2 = getAtomID(atomID2);
if (a1 > a2)
std::swap(a1, a2);
for (auto &bi : mCompounds)
{
if (bi.mID != id)
continue;
return bi.bonded(a1, a2);
}
bool result = false;
// not found in our cache, calculate
CompoundBondInfo bondInfo{id};
auto compound = mmcif::CompoundFactory::instance().create(compoundID);
if (not compound)
{
if (cif::VERBOSE >= 0)
std::cerr << "Missing compound bond info for " << compoundID << std::endl;
}
else
{
for (auto &atom : compound->bonds())
{
uint32_t ca1 = getAtomID(atom.atomID[0]);
uint32_t ca2 = getAtomID(atom.atomID[1]);
if (ca1 > ca2)
std::swap(ca1, ca2);
bondInfo.mBonded.insert({ca1, ca2});
result = result or (a1 == ca1 and a2 == ca2);
}
}
mCompounds.push_back(bondInfo);
return result;
}
// --------------------------------------------------------------------
BondMap::BondMap(const Structure &p)
{
auto &compoundBondInfo = CompoundBondMap::instance();
auto atoms = p.atoms();
dim = uint32_t(atoms.size());
// bond = std::vector<bool>(dim * (dim - 1), false);
for (auto &atom : atoms)
index[atom.id()] = uint32_t(index.size());
auto bindAtoms = [this](const std::string &a, const std::string &b)
{
uint32_t ixa = index[a];
uint32_t ixb = index[b];
bond.insert(key(ixa, ixb));
};
auto linkAtoms = [this, &bindAtoms](const std::string &a, const std::string &b)
{
bindAtoms(a, b);
link[a].insert(b);
link[b].insert(a);
};
cif::v2::datablock &db = p.datablock();
// collect all compounds first
std::set<std::string> compounds;
for (auto c : db["chem_comp"])
compounds.insert(c["id"].as<std::string>());
// make sure we also have all residues in the polyseq
for (auto m : db["entity_poly_seq"])
{
std::string c = m["mon_id"].as<std::string>();
if (compounds.count(c))
continue;
if (cif::VERBOSE > 1)
std::cerr << "Warning: mon_id " << c << " is missing in the chem_comp category" << std::endl;
compounds.insert(c);
}
cif::Progress progress(compounds.size(), "Creating bond map");
// some helper indices to speed things up a bit
std::map<std::tuple<std::string, int, std::string, std::string>, std::string> atomMapByAsymSeqAndAtom;
for (auto &a : p.atoms())
{
auto key = make_tuple(a.labelAsymID(), a.labelSeqID(), a.labelAtomID(), a.authSeqID());
atomMapByAsymSeqAndAtom[key] = a.id();
}
// first link all residues in a polyseq
std::string lastAsymID, lastAuthSeqID;
int lastSeqID = 0;
for (const auto &[asymID, seqID, authSeqID] : db["pdbx_poly_seq_scheme"].rows<std::string, int, std::string>("asym_id", "seq_id", "pdb_seq_num"))
{
if (asymID != lastAsymID) // first in a new sequece
{
lastAsymID = asymID;
lastSeqID = seqID;
lastAuthSeqID = authSeqID;
continue;
}
auto kc = make_tuple(asymID, lastSeqID, "C", lastAuthSeqID);
auto kn = make_tuple(asymID, seqID, "N", authSeqID);
if (atomMapByAsymSeqAndAtom.count(kc) and atomMapByAsymSeqAndAtom.count(kn))
{
auto c = atomMapByAsymSeqAndAtom.at(kc);
auto n = atomMapByAsymSeqAndAtom.at(kn);
bindAtoms(c, n);
}
// if (not(c.empty() or n.empty()))
lastSeqID = seqID;
lastAuthSeqID = authSeqID;
}
for (auto l : db["struct_conn"])
{
std::string asym1, asym2, atomId1, atomId2;
int seqId1 = 0, seqId2 = 0;
std::string authSeqId1, authSeqId2;
cif::v2::tie(asym1, asym2, atomId1, atomId2, seqId1, seqId2, authSeqId1, authSeqId2) =
l.get("ptnr1_label_asym_id", "ptnr2_label_asym_id",
"ptnr1_label_atom_id", "ptnr2_label_atom_id",
"ptnr1_label_seq_id", "ptnr2_label_seq_id",
"ptnr1_auth_seq_id", "ptnr2_auth_seq_id");
auto ka = make_tuple(asym1, seqId1, atomId1, authSeqId1);
auto kb = make_tuple(asym2, seqId2, atomId2, authSeqId2);
if (atomMapByAsymSeqAndAtom.count(ka) and atomMapByAsymSeqAndAtom.count(kb))
{
auto a = atomMapByAsymSeqAndAtom.at(ka);
auto b = atomMapByAsymSeqAndAtom.at(kb);
linkAtoms(a, b);
}
// std::string a = atomMapByAsymSeqAndAtom.at(make_tuple(asym1, seqId1, atomId1, authSeqId1));
// std::string b = atomMapByAsymSeqAndAtom.at(make_tuple(asym2, seqId2, atomId2, authSeqId2));
// if (not(a.empty() or b.empty()))
// linkAtoms(a, b);
}
// then link all atoms in the compounds
for (auto c : compounds)
{
if (c == "HOH" or c == "H2O" or c == "WAT")
{
if (cif::VERBOSE > 0)
std::cerr << "skipping water in bond map calculation" << std::endl;
continue;
}
auto bonded = [c, &compoundBondInfo](const Atom &a, const Atom &b)
{
auto label_a = a.labelAtomID();
auto label_b = b.labelAtomID();
return compoundBondInfo.bonded(c, label_a, label_b);
};
// loop over poly_seq_scheme
for (auto r : db["pdbx_poly_seq_scheme"].find(cif::v2::key("mon_id") == c))
{
std::string asymID;
int seqID;
cif::v2::tie(asymID, seqID) = r.get("asym_id", "seq_id");
std::vector<Atom> rAtoms;
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
[&](auto &a)
{ return a.labelAsymID() == asymID and a.labelSeqID() == seqID; });
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
{
for (uint32_t j = i + 1; j < rAtoms.size(); ++j)
{
if (bonded(rAtoms[i], rAtoms[j]))
bindAtoms(rAtoms[i].id(), rAtoms[j].id());
}
}
}
// loop over pdbx_nonpoly_scheme
for (auto r : db["pdbx_nonpoly_scheme"].find(cif::v2::key("mon_id") == c))
{
std::string asymID;
cif::v2::tie(asymID) = r.get("asym_id");
std::vector<Atom> rAtoms;
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
[&](auto &a)
{ return a.labelAsymID() == asymID; });
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
{
for (uint32_t j = i + 1; j < rAtoms.size(); ++j)
{
if (bonded(rAtoms[i], rAtoms[j]))
{
uint32_t ixa = index[rAtoms[i].id()];
uint32_t ixb = index[rAtoms[j].id()];
bond.insert(key(ixa, ixb));
}
}
}
}
// loop over pdbx_branch_scheme
for (const auto &[asym_id, pdb_seq_num] : db["pdbx_branch_scheme"].find<std::string, std::string>(cif::v2::key("mon_id") == c, "asym_id", "pdb_seq_num"))
{
std::vector<Atom> rAtoms;
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
[id = asym_id, nr = pdb_seq_num](const Atom &a)
{ return a.labelAsymID() == id and a.authSeqID() == nr; });
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
{
for (uint32_t j = i + 1; j < rAtoms.size(); ++j)
{
if (bonded(rAtoms[i], rAtoms[j]))
{
uint32_t ixa = index[rAtoms[i].id()];
uint32_t ixb = index[rAtoms[j].id()];
bond.insert(key(ixa, ixb));
}
}
}
}
}
// start by creating an index for single bonds
std::multimap<uint32_t, uint32_t> b1_2;
for (auto &bk : bond)
{
uint32_t a, b;
std::tie(a, b) = dekey(bk);
b1_2.insert({a, b});
b1_2.insert({b, a});
}
std::multimap<uint32_t, uint32_t> b1_3;
for (uint32_t i = 0; i < dim; ++i)
{
auto a = b1_2.equal_range(i);
std::vector<uint32_t> s;
for (auto j = a.first; j != a.second; ++j)
s.push_back(j->second);
for (size_t si1 = 0; si1 + 1 < s.size(); ++si1)
{
for (size_t si2 = si1 + 1; si2 < s.size(); ++si2)
{
uint32_t x = s[si1];
uint32_t y = s[si2];
if (isBonded(x, y))
continue;
b1_3.insert({x, y});
b1_3.insert({y, x});
}
}
}
for (uint32_t i = 0; i < dim; ++i)
{
auto a1 = b1_2.equal_range(i);
auto a2 = b1_3.equal_range(i);
for (auto ai1 = a1.first; ai1 != a1.second; ++ai1)
{
for (auto ai2 = a2.first; ai2 != a2.second; ++ai2)
{
uint32_t b1 = ai1->second;
uint32_t b2 = ai2->second;
if (isBonded(b1, b2))
continue;
bond_1_4.insert(key(b1, b2));
}
}
}
}
std::vector<std::string> BondMap::linked(const Atom &a) const
{
auto i = link.find(a.id());
std::vector<std::string> result;
if (i != link.end())
result = std::vector<std::string>(i->second.begin(), i->second.end());
return result;
}
std::vector<std::string> BondMap::atomIDsForCompound(const std::string &compoundID)
{
std::vector<std::string> result;
auto *compound = mmcif::CompoundFactory::instance().create(compoundID);
if (compound == nullptr)
throw BondMapException("Missing bond information for compound " + compoundID);
for (auto &compAtom : compound->atoms())
result.push_back(compAtom.id);
return result;
}
} // namespace mmcif
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <map>
#include <mutex>
#include <numeric>
#include <shared_mutex>
#include <filesystem>
#include <fstream>
#include <cif++/cif.hpp>
#include <cif++/utilities.hpp>
#include <cif++/structure/Compound.hpp>
// #include <cif++/point.hpp>
namespace fs = std::filesystem;
namespace mmcif
{
// --------------------------------------------------------------------
std::string to_string(BondType bondType)
{
switch (bondType)
{
case BondType::sing: return "sing";
case BondType::doub: return "doub";
case BondType::trip: return "trip";
case BondType::quad: return "quad";
case BondType::arom: return "arom";
case BondType::poly: return "poly";
case BondType::delo: return "delo";
case BondType::pi: return "pi";
}
throw std::invalid_argument("Invalid bondType");
}
BondType from_string(const std::string &bondType)
{
if (cif::iequals(bondType, "sing"))
return BondType::sing;
if (cif::iequals(bondType, "doub"))
return BondType::doub;
if (cif::iequals(bondType, "trip"))
return BondType::trip;
if (cif::iequals(bondType, "quad"))
return BondType::quad;
if (cif::iequals(bondType, "arom"))
return BondType::arom;
if (cif::iequals(bondType, "poly"))
return BondType::poly;
if (cif::iequals(bondType, "delo"))
return BondType::delo;
if (cif::iequals(bondType, "pi"))
return BondType::pi;
throw std::invalid_argument("Invalid bondType: " + bondType);
}
// --------------------------------------------------------------------
// Compound helper classes
struct CompoundAtomLess
{
bool operator()(const CompoundAtom &a, const CompoundAtom &b) const
{
int d = a.id.compare(b.id);
if (d == 0)
d = a.typeSymbol - b.typeSymbol;
return d < 0;
}
};
struct CompoundBondLess
{
bool operator()(const CompoundBond &a, const CompoundBond &b) const
{
int d = a.atomID[0].compare(b.atomID[0]);
if (d == 0)
d = a.atomID[1].compare(b.atomID[1]);
if (d == 0)
d = static_cast<int>(a.type) - static_cast<int>(b.type);
return d < 0;
}
};
// --------------------------------------------------------------------
// Compound
Compound::Compound(cif::datablock &db)
{
auto &chemComp = db["chem_comp"];
if (chemComp.size() != 1)
throw std::runtime_error("Invalid compound file, chem_comp should contain a single row");
cif::tie(mID, mName, mType, mFormula, mFormulaWeight, mFormalCharge) =
chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge");
// The name should not contain newline characters since that triggers validation errors later on
cif::replace_all(mName, "\n", "");
mGroup = "non-polymer";
auto &chemCompAtom = db["chem_comp_atom"];
for (auto row : chemCompAtom)
{
CompoundAtom atom;
std::string typeSymbol;
cif::tie(atom.id, typeSymbol, atom.charge, atom.aromatic, atom.leavingAtom, atom.stereoConfig, atom.x, atom.y, atom.z) =
row.get("atom_id", "type_symbol", "charge", "pdbx_aromatic_flag", "pdbx_leaving_atom_flag", "pdbx_stereo_config",
"model_Cartn_x", "model_Cartn_y", "model_Cartn_z");
atom.typeSymbol = AtomTypeTraits(typeSymbol).type();
mAtoms.push_back(std::move(atom));
}
auto &chemCompBond = db["chem_comp_bond"];
for (auto row : chemCompBond)
{
CompoundBond bond;
std::string valueOrder;
cif::tie(bond.atomID[0], bond.atomID[1], valueOrder, bond.aromatic, bond.stereoConfig) = row.get("atom_id_1", "atom_id_2", "value_order", "pdbx_aromatic_flag", "pdbx_stereo_config");
bond.type = from_string(valueOrder);
mBonds.push_back(std::move(bond));
}
}
Compound::Compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group)
: mID(id)
, mName(name)
, mType(type)
, mGroup(group)
{
auto &chemCompAtom = db["chem_comp_atom"];
for (auto row : chemCompAtom)
{
CompoundAtom atom;
std::string typeSymbol;
cif::tie(atom.id, typeSymbol, atom.charge, atom.x, atom.y, atom.z) =
row.get("atom_id", "type_symbol", "charge", "x", "y", "z");
atom.typeSymbol = AtomTypeTraits(typeSymbol).type();
mFormalCharge += atom.charge;
mFormulaWeight += AtomTypeTraits(atom.typeSymbol).weight();
mAtoms.push_back(std::move(atom));
}
auto &chemCompBond = db["chem_comp_bond"];
for (auto row : chemCompBond)
{
CompoundBond bond;
std::string btype;
cif::tie(bond.atomID[0], bond.atomID[1], btype, bond.aromatic) = row.get("atom_id_1", "atom_id_2", "type", "aromatic");
using cif::iequals;
if (iequals(btype, "single"))
bond.type = BondType::sing;
else if (iequals(btype, "double"))
bond.type = BondType::doub;
else if (iequals(btype, "triple"))
bond.type = BondType::trip;
else if (iequals(btype, "deloc") or iequals(btype, "aromat") or iequals(btype, "aromatic"))
bond.type = BondType::delo;
else
{
if (cif::VERBOSE > 0)
std::cerr << "Unimplemented chem_comp_bond.type " << btype << " in " << id << std::endl;
bond.type = BondType::sing;
}
mBonds.push_back(std::move(bond));
}
}
CompoundAtom Compound::getAtomByID(const std::string &atomID) const
{
CompoundAtom result = {};
for (auto &a : mAtoms)
{
if (a.id == atomID)
{
result = a;
break;
}
}
if (result.id != atomID)
throw std::out_of_range("No atom " + atomID + " in Compound " + mID);
return result;
}
bool Compound::atomsBonded(const std::string &atomId_1, const std::string &atomId_2) const
{
auto i = find_if(mBonds.begin(), mBonds.end(),
[&](const CompoundBond &b) {
return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2) or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
});
return i != mBonds.end();
}
// --------------------------------------------------------------------
// a factory class to generate compounds
CIFPP_EXPORT const std::map<std::string, char> kAAMap{
{"ALA", 'A'},
{"ARG", 'R'},
{"ASN", 'N'},
{"ASP", 'D'},
{"CYS", 'C'},
{"GLN", 'Q'},
{"GLU", 'E'},
{"GLY", 'G'},
{"HIS", 'H'},
{"ILE", 'I'},
{"LEU", 'L'},
{"LYS", 'K'},
{"MET", 'M'},
{"PHE", 'F'},
{"PRO", 'P'},
{"SER", 'S'},
{"THR", 'T'},
{"TRP", 'W'},
{"TYR", 'Y'},
{"VAL", 'V'},
{"GLX", 'Z'},
{"ASX", 'B'}};
CIFPP_EXPORT const std::map<std::string, char> kBaseMap{
{"A", 'A'},
{"C", 'C'},
{"G", 'G'},
{"T", 'T'},
{"U", 'U'},
{"DA", 'A'},
{"DC", 'C'},
{"DG", 'G'},
{"DT", 'T'}};
// --------------------------------------------------------------------
class CompoundFactoryImpl : public std::enable_shared_from_this<CompoundFactoryImpl>
{
public:
CompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next);
CompoundFactoryImpl(const fs::path &file, std::shared_ptr<CompoundFactoryImpl> next);
virtual ~CompoundFactoryImpl()
{
for (auto c: mCompounds)
delete c;
}
Compound *get(std::string id)
{
std::shared_lock lock(mMutex);
cif::to_upper(id);
Compound *result = nullptr;
// walk the list, see if any of us has the compound already
for (auto impl = shared_from_this(); impl; impl = impl->mNext)
{
for (auto cmp : impl->mCompounds)
{
if (cmp->id() == id)
{
result = cmp;
break;
}
}
if (result)
break;
}
if (result == nullptr and mMissing.count(id) == 0)
{
for (auto impl = shared_from_this(); impl; impl = impl->mNext)
{
result = impl->create(id);
if (result != nullptr)
break;
}
if (result == nullptr)
mMissing.insert(id);
}
return result;
}
std::shared_ptr<CompoundFactoryImpl> next() const
{
return mNext;
}
bool isKnownPeptide(const std::string &resName)
{
return mKnownPeptides.count(resName) or
(mNext and mNext->isKnownPeptide(resName));
}
bool isKnownBase(const std::string &resName)
{
return mKnownBases.count(resName) or
(mNext and mNext->isKnownBase(resName));
}
protected:
virtual Compound *create(const std::string &id)
{
// For the base class we assume every compound is preloaded
return nullptr;
}
std::shared_timed_mutex mMutex;
std::vector<Compound *> mCompounds;
std::set<std::string> mKnownPeptides;
std::set<std::string> mKnownBases;
std::set<std::string> mMissing;
std::shared_ptr<CompoundFactoryImpl> mNext;
};
// --------------------------------------------------------------------
CompoundFactoryImpl::CompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next)
: mNext(next)
{
for (const auto &[key, value] : kAAMap)
mKnownPeptides.insert(key);
for (const auto &[key, value] : kBaseMap)
mKnownBases.insert(key);
}
CompoundFactoryImpl::CompoundFactoryImpl(const fs::path &file, std::shared_ptr<CompoundFactoryImpl> next)
: mNext(next)
{
cif::file cifFile(file);
auto &compList = cifFile["comp_list"];
if (not compList.empty()) // So this is a CCP4 restraints file, special handling
{
auto &chemComp = compList["chem_comp"];
for (const auto &[id, name, group] : chemComp.rows<std::string, std::string, std::string>("id", "name", "group"))
{
std::string type;
// known groups are (counted from ccp4 monomer dictionary)
// D-pyranose
// DNA
// L-PEPTIDE LINKING
// L-SACCHARIDE
// L-peptide
// L-pyranose
// M-peptide
// NON-POLYMER
// P-peptide
// RNA
// furanose
// non-polymer
// non_polymer
// peptide
// pyranose
// saccharide
if (cif::iequals(id, "gly"))
type = "peptide linking";
else if (cif::iequals(group, "l-peptide") or cif::iequals(group, "L-peptide linking") or cif::iequals(group, "peptide") or cif::iequals(group, "p-peptide"))
type = "L-peptide linking";
else if (cif::iequals(group, "DNA"))
type = "DNA linking";
else if (cif::iequals(group, "RNA"))
type = "RNA linking";
else
type = "non-polymer";
auto &db = cifFile["comp_" + id];
mCompounds.push_back(new Compound(db, id, name, type, group));
}
}
else
{
// A CCD components file, validate it first
cifFile.load_dictionary("mmcif_pdbx");
if (not cifFile.is_valid())
throw std::runtime_error("Invalid compound file");
for (auto &db : cifFile)
mCompounds.push_back(new Compound(db));
}
}
// --------------------------------------------------------------------
// Version for the default compounds, based on the cached components.cif file from CCD
class CCDCompoundFactoryImpl : public CompoundFactoryImpl
{
public:
CCDCompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next, const fs::path& file)
: CompoundFactoryImpl(next)
, mCompoundsFile(file)
{
}
CCDCompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next)
: CompoundFactoryImpl(next)
{
}
Compound *create(const std::string &id) override;
cif::parser::datablock_index mIndex;
fs::path mCompoundsFile;
};
Compound *CCDCompoundFactoryImpl::create(const std::string &id)
{
Compound *result = nullptr;
std::unique_ptr<std::istream> ccd;
if (mCompoundsFile.empty())
{
ccd = cif::load_resource("components.cif");
if (not ccd)
throw std::runtime_error("Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data.");
}
else
ccd.reset(new std::ifstream(mCompoundsFile));
cif::file file;
if (mIndex.empty())
{
if (cif::VERBOSE > 1)
{
std::cout << "Creating component index "
<< "...";
std::cout.flush();
}
cif::parser parser(*ccd, file);
mIndex = parser.index_datablocks();
if (cif::VERBOSE > 1)
std::cout << " done" << std::endl;
// reload the resource, perhaps this should be improved...
if (mCompoundsFile.empty())
{
ccd = cif::load_resource("components.cif");
if (not ccd)
throw std::runtime_error("Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data.");
}
else
ccd.reset(new std::ifstream(mCompoundsFile));
}
if (cif::VERBOSE > 1)
{
std::cout << "Loading component " << id << "...";
std::cout.flush();
}
cif::parser parser(*ccd, file);
parser.parse_single_datablock(id, mIndex);
if (cif::VERBOSE > 1)
std::cout << " done" << std::endl;
if (not file.empty())
{
auto &db = file.front();
if (db.name() == id)
{
result = new Compound(db);
std::shared_lock lock(mMutex);
mCompounds.push_back(result);
}
}
if (result == nullptr and cif::VERBOSE > 0)
std::cerr << "Could not locate compound " << id << " in the CCD components file" << std::endl;
return result;
}
// --------------------------------------------------------------------
// Version for the default compounds, based on the data found in CCP4's monomers lib
class CCP4CompoundFactoryImpl : public CompoundFactoryImpl
{
public:
CCP4CompoundFactoryImpl(const fs::path &clibd_mon, std::shared_ptr<CompoundFactoryImpl> next = nullptr);
Compound *create(const std::string &id) override;
private:
cif::file mFile;
fs::path mCLIBD_MON;
};
CCP4CompoundFactoryImpl::CCP4CompoundFactoryImpl(const fs::path &clibd_mon, std::shared_ptr<CompoundFactoryImpl> next)
: CompoundFactoryImpl(next)
, mFile((clibd_mon / "list" / "mon_lib_list.cif").string())
, mCLIBD_MON(clibd_mon)
{
const std::regex peptideRx("(?:[lmp]-)?peptide", std::regex::icase);
auto &chemComps = mFile["comp_list"]["chem_comp"];
for (const auto &[group, threeLetterCode] : chemComps.rows<std::string, std::string>("group", "three_letter_code"))
{
if (std::regex_match(group, peptideRx))
mKnownPeptides.insert(threeLetterCode);
else if (cif::iequals(group, "DNA") or cif::iequals(group, "RNA"))
mKnownBases.insert(threeLetterCode);
}
}
Compound *CCP4CompoundFactoryImpl::create(const std::string &id)
{
Compound *result = nullptr;
auto &cat = mFile["comp_list"]["chem_comp"];
auto rs = cat.find(cif::key("three_letter_code") == id);
if (rs.size() == 1)
{
auto row = rs.front();
std::string name, group;
uint32_t numberAtomsAll, numberAtomsNh;
cif::tie(name, group, numberAtomsAll, numberAtomsNh) =
row.get("name", "group", "number_atoms_all", "number_atoms_nh");
fs::path resFile = mCLIBD_MON / cif::to_lower_copy(id.substr(0, 1)) / (id + ".cif");
if (not fs::exists(resFile) and (id == "COM" or id == "CON" or "PRN")) // seriously...
resFile = mCLIBD_MON / cif::to_lower_copy(id.substr(0, 1)) / (id + '_' + id + ".cif");
if (fs::exists(resFile))
{
cif::file cf(resFile.string());
// locate the datablock
auto &db = cf["comp_" + id];
std::string type;
// known groups are (counted from ccp4 monomer dictionary)
// D-pyranose
// DNA
// L-PEPTIDE LINKING
// L-SACCHARIDE
// L-peptide
// L-pyranose
// M-peptide
// NON-POLYMER
// P-peptide
// RNA
// furanose
// non-polymer
// non_polymer
// peptide
// pyranose
// saccharide
if (cif::iequals(id, "gly"))
type = "peptide linking";
else if (cif::iequals(group, "l-peptide") or cif::iequals(group, "L-peptide linking") or cif::iequals(group, "peptide") or cif::iequals(group, "p-peptide"))
type = "L-peptide linking";
else if (cif::iequals(group, "DNA"))
type = "DNA linking";
else if (cif::iequals(group, "RNA"))
type = "RNA linking";
else
type = "non-polymer";
mCompounds.push_back(new Compound(db, id, name, type, group));
result = mCompounds.back();
}
}
return result;
}
// --------------------------------------------------------------------
std::unique_ptr<CompoundFactory> CompoundFactory::sInstance;
thread_local std::unique_ptr<CompoundFactory> CompoundFactory::tlInstance;
bool CompoundFactory::sUseThreadLocalInstance;
void CompoundFactory::init(bool useThreadLocalInstanceOnly)
{
sUseThreadLocalInstance = useThreadLocalInstanceOnly;
}
CompoundFactory::CompoundFactory()
: mImpl(nullptr)
{
auto ccd = cif::load_resource("components.cif");
if (ccd)
mImpl.reset(new CCDCompoundFactoryImpl(mImpl));
else if (cif::VERBOSE > 0)
std::cerr << "CCD components.cif file was not found" << std::endl;
const char *clibd_mon = getenv("CLIBD_MON");
if (clibd_mon != nullptr and fs::is_directory(clibd_mon))
mImpl.reset(new CCP4CompoundFactoryImpl(clibd_mon));
else if (cif::VERBOSE > 0)
std::cerr << "CCP4 monomers library not found, CLIBD_MON is not defined" << std::endl;
}
CompoundFactory::~CompoundFactory()
{
}
CompoundFactory &CompoundFactory::instance()
{
if (sUseThreadLocalInstance)
{
if (not tlInstance)
tlInstance.reset(new CompoundFactory());
return *tlInstance;
}
else
{
if (not sInstance)
sInstance.reset(new CompoundFactory());
return *sInstance;
}
}
void CompoundFactory::clear()
{
if (sUseThreadLocalInstance)
tlInstance.reset(nullptr);
else
sInstance.reset();
}
void CompoundFactory::setDefaultDictionary(const fs::path &inDictFile)
{
if (not fs::exists(inDictFile))
throw std::runtime_error("file not found: " + inDictFile.string());
try
{
mImpl.reset(new CCDCompoundFactoryImpl(mImpl, inDictFile));
}
catch (const std::exception &)
{
if (cif::VERBOSE >= 0)
std::cerr << "Error loading dictionary " << inDictFile << std::endl;
throw;
}
}
void CompoundFactory::pushDictionary(const fs::path &inDictFile)
{
if (not fs::exists(inDictFile))
throw std::runtime_error("file not found: " + inDictFile.string());
// ifstream file(inDictFile);
// if (not file.is_open())
// throw std::runtime_error("Could not open peptide list " + inDictFile);
try
{
mImpl.reset(new CompoundFactoryImpl(inDictFile, mImpl));
}
catch (const std::exception &)
{
if (cif::VERBOSE >= 0)
std::cerr << "Error loading dictionary " << inDictFile << std::endl;
throw;
}
}
void CompoundFactory::popDictionary()
{
if (mImpl)
mImpl = mImpl->next();
}
const Compound *CompoundFactory::create(std::string id)
{
// static bool warned = false;
// if (mImpl and warned == false)
// {
// std::cerr << "Warning: no compound information library was found, resulting data may be incorrect or incomplete" << std::endl;
// warned = true;
// }
return mImpl ? mImpl->get(id) : nullptr;
}
bool CompoundFactory::isKnownPeptide(const std::string &resName) const
{
return mImpl ? mImpl->isKnownPeptide(resName) : kAAMap.count(resName) > 0;
}
bool CompoundFactory::isKnownBase(const std::string &resName) const
{
return mImpl ? mImpl->isKnownBase(resName) : kBaseMap.count(resName) > 0;
}
} // namespace mmcif
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cif++/structure/Structure.hpp>
#include <filesystem>
#include <fstream>
#include <iomanip>
#include <numeric>
#include <gxrio.hpp>
#if __cpp_lib_format
#include <format>
#else
#include <boost/format.hpp>
#endif
#include <cif++/pdb/Cif2PDB.hpp>
#include <cif++/cif/parser.hpp>
#include <cif++/pdb/PDB2Cif.hpp>
// #include <cif++/AtomShape.hpp>
namespace fs = std::filesystem;
extern int cif::VERBOSE;
namespace mmcif
{
// --------------------------------------------------------------------
// Atom
Atom::AtomImpl::AtomImpl(cif::Datablock &db, const std::string &id, cif::Row row)
: mDb(db)
, mID(id)
, mRefcount(1)
, mRow(row)
, mCompound(nullptr)
{
prefetch();
}
// constructor for a symmetry copy of an atom
Atom::AtomImpl::AtomImpl(const AtomImpl &impl, const Point &loc, const std::string &sym_op)
: mDb(impl.mDb)
, mID(impl.mID)
, mType(impl.mType)
, mAtomID(impl.mAtomID)
, mCompID(impl.mCompID)
, mAsymID(impl.mAsymID)
, mSeqID(impl.mSeqID)
, mAltID(impl.mAltID)
, mAuthSeqID(impl.mAuthSeqID)
, mLocation(loc)
, mRefcount(1)
, mRow(impl.mRow)
, mCachedRefs(impl.mCachedRefs)
, mCompound(impl.mCompound)
, mSymmetryCopy(true)
, mSymmetryOperator(sym_op)
{
}
void Atom::AtomImpl::prefetch()
{
// Prefetch some data
std::string symbol;
cif::tie(symbol, mAtomID, mCompID, mAsymID, mSeqID, mAltID, mAuthSeqID) =
mRow.get("type_symbol", "label_atom_id", "label_comp_id", "label_asym_id", "label_seq_id", "label_alt_id", "auth_seq_id");
if (symbol != "X")
mType = AtomTypeTraits(symbol).type();
float x, y, z;
cif::tie(x, y, z) = mRow.get("Cartn_x", "Cartn_y", "Cartn_z");
mLocation = Point(x, y, z);
}
int Atom::AtomImpl::compare(const AtomImpl &b) const
{
int d = mAsymID.compare(b.mAsymID);
if (d == 0)
d = mSeqID - b.mSeqID;
if (d == 0)
d = mAuthSeqID.compare(b.mAuthSeqID);
if (d == 0)
d = mAtomID.compare(b.mAtomID);
return d;
}
bool Atom::AtomImpl::getAnisoU(float anisou[6]) const
{
bool result = false;
auto cat = mDb.get("atom_site_anisotrop");
if (cat)
{
for (auto r : cat->find(cif::Key("id") == mID))
{
cif::tie(anisou[0], anisou[1], anisou[2], anisou[3], anisou[4], anisou[5]) =
r.get("U[1][1]", "U[1][2]", "U[1][3]", "U[2][2]", "U[2][3]", "U[3][3]");
result = true;
break;
}
}
return result;
}
int Atom::AtomImpl::charge() const
{
auto formalCharge = mRow["pdbx_formal_charge"].as<std::optional<int>>();
if (not formalCharge.has_value())
{
auto c = compound();
if (c != nullptr and c->atoms().size() == 1)
formalCharge = c->atoms().front().charge;
}
return formalCharge.value_or(0);
}
void Atom::AtomImpl::moveTo(const Point &p)
{
assert(not mSymmetryCopy);
if (mSymmetryCopy)
throw std::runtime_error("Moving symmetry copy");
if (not mClone)
{
#if __cpp_lib_format
mRow.assign("Cartn_x", std::format("{:.3f}", p.getX()), true, false);
mRow.assign("Cartn_y", std::format("{:.3f}", p.getY()), true, false);
mRow.assign("Cartn_z", std::format("{:.3f}", p.getZ()), true, false);
#else
mRow.assign("Cartn_x", (boost::format("%.3f") % p.getX()).str(), true, false);
mRow.assign("Cartn_y", (boost::format("%.3f") % p.getY()).str(), true, false);
mRow.assign("Cartn_z", (boost::format("%.3f") % p.getZ()).str(), true, false);
#endif
}
mLocation = p;
}
const Compound *Atom::AtomImpl::compound() const
{
if (mCompound == nullptr)
{
std::string compID = get_property("label_comp_id");
mCompound = CompoundFactory::instance().create(compID);
}
return mCompound;
}
const std::string Atom::AtomImpl::get_property(const std::string_view name) const
{
for (auto &&[tag, ref] : mCachedRefs)
{
if (tag == name)
return ref.as<std::string>();
}
mCachedRefs.emplace_back(name, const_cast<cif::Row &>(mRow)[name]);
return std::get<1>(mCachedRefs.back()).as<std::string>();
}
void Atom::AtomImpl::set_property(const std::string_view name, const std::string &value)
{
for (auto &&[tag, ref] : mCachedRefs)
{
if (tag != name)
continue;
ref = value;
return;
}
mCachedRefs.emplace_back(name, mRow[name]);
std::get<1>(mCachedRefs.back()) = value;
}
Atom::Atom(cif::Datablock &db, cif::Row &row)
: Atom(std::make_shared<AtomImpl>(db, row["id"].as<std::string>(), row))
{
}
Atom::Atom(const Atom &rhs, const Point &loc, const std::string &sym_op)
: Atom(std::make_shared<AtomImpl>(*rhs.mImpl, loc, sym_op))
{
}
const cif::Row Atom::getRowAniso() const
{
auto &db = mImpl->mDb;
auto cat = db.get("atom_site_anisotrop");
if (not cat)
return {};
else
return cat->find1(cif::Key("id") == mImpl->mID);
}
float Atom::uIso() const
{
float result;
if (not get_property<std::string>("U_iso_or_equiv").empty())
result = get_property<float>("U_iso_or_equiv");
else if (not get_property<std::string>("B_iso_or_equiv").empty())
result = get_property<float>("B_iso_or_equiv") / static_cast<float>(8 * kPI * kPI);
else
throw std::runtime_error("Missing B_iso or U_iso");
return result;
}
std::string Atom::labelID() const
{
return mImpl->mCompID + '_' + mImpl->mAsymID + '_' + std::to_string(mImpl->mSeqID) + ':' + mImpl->mAtomID;
}
std::string Atom::pdbID() const
{
return get_property<std::string>("auth_comp_id") + '_' +
get_property<std::string>("auth_asym_id") + '_' +
get_property<std::string>("auth_seq_id") +
get_property<std::string>("pdbx_PDB_ins_code");
}
const Compound &Atom::compound() const
{
auto result = impl().compound();
if (result == nullptr)
{
if (cif::VERBOSE > 0)
std::cerr << "Compound not found: '" << get_property<std::string>("label_comp_id") << '\'' << std::endl;
throw std::runtime_error("no compound");
}
return *result;
}
int Atom::charge() const
{
return impl().charge();
}
float Atom::occupancy() const
{
return get_property<float>("occupancy");
}
std::string Atom::labelEntityID() const
{
return get_property<std::string>("label_entity_id");
}
std::string Atom::authAtomID() const
{
return get_property<std::string>("auth_atom_id");
}
std::string Atom::authCompID() const
{
return get_property<std::string>("auth_comp_id");
}
std::string Atom::authAsymID() const
{
return get_property<std::string>("auth_asym_id");
}
std::string Atom::pdbxAuthInsCode() const
{
return get_property<std::string>("pdbx_PDB_ins_code");
}
std::string Atom::pdbxAuthAltID() const
{
return get_property<std::string>("pdbx_auth_alt_id");
}
void Atom::translate(Point t)
{
auto loc = location();
loc += t;
location(loc);
}
void Atom::rotate(Quaternion q)
{
auto loc = location();
loc.rotate(q);
location(loc);
}
void Atom::translateAndRotate(Point t, Quaternion q)
{
auto loc = location();
loc += t;
loc.rotate(q);
location(loc);
}
void Atom::translateRotateAndTranslate(Point t1, Quaternion q, Point t2)
{
auto loc = location();
loc += t1;
loc.rotate(q);
loc += t2;
location(loc);
}
bool Atom::operator==(const Atom &rhs) const
{
if (mImpl == rhs.mImpl)
return true;
if (not(mImpl and rhs.mImpl))
return false;
return &mImpl->mDb == &rhs.mImpl->mDb and mImpl->mID == rhs.mImpl->mID;
}
std::ostream &operator<<(std::ostream &os, const Atom &atom)
{
os << atom.labelCompID() << ' ' << atom.labelAsymID() << ':' << atom.labelSeqID() << ' ' << atom.labelAtomID();
if (atom.isAlternate())
os << '(' << atom.labelAltID() << ')';
if (atom.authAsymID() != atom.labelAsymID() or atom.authSeqID() != std::to_string(atom.labelSeqID()) or atom.pdbxAuthInsCode().empty() == false)
os << " [" << atom.authAsymID() << ':' << atom.authSeqID() << atom.pdbxAuthInsCode() << ']';
return os;
}
// --------------------------------------------------------------------
// residue
Residue::Residue(Residue &&rhs)
: mStructure(rhs.mStructure)
, mCompoundID(std::move(rhs.mCompoundID))
, mAsymID(std::move(rhs.mAsymID))
, mSeqID(rhs.mSeqID)
, mAuthSeqID(rhs.mAuthSeqID)
, mAtoms(std::move(rhs.mAtoms))
{
// std::cerr << "move constructor residue" << std::endl;
rhs.mStructure = nullptr;
}
Residue &Residue::operator=(Residue &&rhs)
{
// std::cerr << "move assignment residue" << std::endl;
mStructure = rhs.mStructure;
rhs.mStructure = nullptr;
mCompoundID = std::move(rhs.mCompoundID);
mAsymID = std::move(rhs.mAsymID);
mSeqID = rhs.mSeqID;
mAuthSeqID = rhs.mAuthSeqID;
mAtoms = std::move(rhs.mAtoms);
return *this;
}
Residue::~Residue()
{
// std::cerr << "~Residue" << std::endl;
}
std::string Residue::entityID() const
{
std::string result;
if (not mAtoms.empty())
result = mAtoms.front().labelEntityID();
else if (mStructure != nullptr and not mAsymID.empty())
{
using namespace cif::literals;
auto &db = mStructure->datablock();
result = db["struct_asym"].find1<std::string>("id"_key == mAsymID, "entity_id");
}
return result;
}
EntityType Residue::entityType() const
{
assert(mStructure);
return mStructure->getEntityTypeForEntityID(entityID());
}
std::string Residue::authInsCode() const
{
assert(mStructure);
std::string result;
if (not mAtoms.empty())
result = mAtoms.front().get_property<std::string>("pdbx_PDB_ins_code");
return result;
}
std::string Residue::authAsymID() const
{
assert(mStructure);
std::string result;
if (not mAtoms.empty())
result = mAtoms.front().get_property<std::string>("auth_asym_id");
return result;
}
std::string Residue::authSeqID() const
{
return mAuthSeqID;
}
const Compound &Residue::compound() const
{
auto result = CompoundFactory::instance().create(mCompoundID);
if (result == nullptr)
throw std::runtime_error("Failed to create compound " + mCompoundID);
return *result;
}
AtomView &Residue::atoms()
{
if (mStructure == nullptr)
throw std::runtime_error("Invalid Residue object");
return mAtoms;
}
const AtomView &Residue::atoms() const
{
if (mStructure == nullptr)
throw std::runtime_error("Invalid Residue object");
return mAtoms;
}
std::string Residue::unique_alt_id() const
{
if (mStructure == nullptr)
throw std::runtime_error("Invalid Residue object");
auto firstAlt = std::find_if(mAtoms.begin(), mAtoms.end(), [](auto &a)
{ return not a.labelAltID().empty(); });
return firstAlt != mAtoms.end() ? firstAlt->labelAltID() : "";
}
void Residue::addAtom(Atom &atom)
{
atom.set_property("label_comp_id", mCompoundID);
atom.set_property("label_asym_id", mAsymID);
if (mSeqID != 0)
atom.set_property("label_seq_id", std::to_string(mSeqID));
atom.set_property("auth_seq_id", mAuthSeqID);
mAtoms.push_back(atom);
}
AtomView Residue::unique_atoms() const
{
if (mStructure == nullptr)
throw std::runtime_error("Invalid Residue object");
AtomView result;
std::string firstAlt;
for (auto &atom : mAtoms)
{
auto alt = atom.labelAltID();
if (alt.empty())
{
result.push_back(atom);
continue;
}
if (firstAlt.empty())
firstAlt = alt;
else if (alt != firstAlt)
{
if (cif::VERBOSE > 0)
std::cerr << "skipping alternate atom " << atom << std::endl;
continue;
}
result.push_back(atom);
}
return result;
}
std::set<std::string> Residue::getAlternateIDs() const
{
std::set<std::string> result;
for (auto a : mAtoms)
{
auto alt = a.labelAltID();
if (not alt.empty())
result.insert(alt);
}
return result;
}
Atom Residue::atomByID(const std::string &atomID) const
{
Atom result;
for (auto &a : mAtoms)
{
if (a.labelAtomID() == atomID)
{
result = a;
break;
}
}
if (not result and cif::VERBOSE > 1)
std::cerr << "Atom with atom_id " << atomID << " not found in residue " << mAsymID << ':' << mSeqID << std::endl;
return result;
}
// Residue is a single entity if the atoms for the asym with mAsymID is equal
// to the number of atoms in this residue... hope this is correct....
bool Residue::isEntity() const
{
auto &db = mStructure->datablock();
auto a1 = db["atom_site"].find(cif::Key("label_asym_id") == mAsymID);
// auto a2 = atoms();
auto &a2 = mAtoms;
return a1.size() == a2.size();
}
std::string Residue::authID() const
{
return authAsymID() + authSeqID() + authInsCode();
}
std::string Residue::labelID() const
{
if (mCompoundID == "HOH")
return mAsymID + mAuthSeqID;
else
return mAsymID + std::to_string(mSeqID);
}
std::tuple<Point, float> Residue::centerAndRadius() const
{
std::vector<Point> pts;
for (auto &a : mAtoms)
pts.push_back(a.location());
auto center = Centroid(pts);
float radius = 0;
for (auto &pt : pts)
{
float d = static_cast<float>(Distance(pt, center));
if (radius < d)
radius = d;
}
return std::make_tuple(center, radius);
}
bool Residue::hasAlternateAtoms() const
{
return std::find_if(mAtoms.begin(), mAtoms.end(), [](const Atom &atom)
{ return atom.isAlternate(); }) != mAtoms.end();
}
std::set<std::string> Residue::getAtomIDs() const
{
std::set<std::string> ids;
for (auto a : mAtoms)
ids.insert(a.labelAtomID());
return ids;
}
AtomView Residue::getAtomsByID(const std::string &atomID) const
{
AtomView atoms;
for (auto a : mAtoms)
{
if (a.labelAtomID() == atomID)
atoms.push_back(a);
}
return atoms;
}
std::ostream &operator<<(std::ostream &os, const Residue &res)
{
os << res.compoundID() << ' ' << res.asymID() << ':' << res.seqID();
if (res.authAsymID() != res.asymID() or res.authSeqID() != std::to_string(res.seqID()))
os << " [" << res.authAsymID() << ':' << res.authSeqID() << ']';
return os;
}
// --------------------------------------------------------------------
// monomer
Monomer::Monomer(const Polymer &polymer, size_t index, int seqID, const std::string &authSeqID, const std::string &compoundID)
: Residue(*polymer.structure(), compoundID, polymer.asymID(), seqID, authSeqID)
, mPolymer(&polymer)
, mIndex(index)
{
}
Monomer::Monomer(Monomer &&rhs)
: Residue(std::move(rhs))
, mPolymer(rhs.mPolymer)
, mIndex(rhs.mIndex)
{
rhs.mPolymer = nullptr;
}
Monomer &Monomer::operator=(Monomer &&rhs)
{
Residue::operator=(std::move(rhs));
mPolymer = rhs.mPolymer;
rhs.mPolymer = nullptr;
mIndex = rhs.mIndex;
return *this;
}
bool Monomer::is_first_in_chain() const
{
return mIndex == 0;
}
bool Monomer::is_last_in_chain() const
{
return mIndex + 1 == mPolymer->size();
}
bool Monomer::has_alpha() const
{
return mIndex >= 1 and mIndex + 2 < mPolymer->size();
}
bool Monomer::has_kappa() const
{
return mIndex >= 2 and mIndex + 2 < mPolymer->size();
}
float Monomer::phi() const
{
float result = 360;
try
{
if (mIndex > 0)
{
auto &prev = mPolymer->operator[](mIndex - 1);
if (prev.mSeqID + 1 == mSeqID)
result = static_cast<float>(DihedralAngle(prev.C().location(), N().location(), CAlpha().location(), C().location()));
}
}
catch (const std::exception &ex)
{
if (cif::VERBOSE > 0)
std::cerr << ex.what() << std::endl;
}
return result;
}
float Monomer::psi() const
{
float result = 360;
try
{
if (mIndex + 1 < mPolymer->size())
{
auto &next = mPolymer->operator[](mIndex + 1);
if (mSeqID + 1 == next.mSeqID)
result = static_cast<float>(DihedralAngle(N().location(), CAlpha().location(), C().location(), next.N().location()));
}
}
catch (const std::exception &ex)
{
if (cif::VERBOSE > 0)
std::cerr << ex.what() << std::endl;
}
return result;
}
float Monomer::alpha() const
{
float result = 360;
try
{
if (mIndex >= 1 and mIndex + 2 < mPolymer->size())
{
auto &prev = mPolymer->operator[](mIndex - 1);
auto &next = mPolymer->operator[](mIndex + 1);
auto &nextNext = mPolymer->operator[](mIndex + 2);
result = static_cast<float>(DihedralAngle(prev.CAlpha().location(), CAlpha().location(), next.CAlpha().location(), nextNext.CAlpha().location()));
}
}
catch (const std::exception &ex)
{
if (cif::VERBOSE > 0)
std::cerr << ex.what() << std::endl;
}
return result;
}
float Monomer::kappa() const
{
float result = 360;
try
{
if (mIndex >= 2 and mIndex + 2 < mPolymer->size())
{
auto &prevPrev = mPolymer->operator[](mIndex - 2);
auto &nextNext = mPolymer->operator[](mIndex + 2);
if (prevPrev.mSeqID + 4 == nextNext.mSeqID)
{
double ckap = CosinusAngle(CAlpha().location(), prevPrev.CAlpha().location(), nextNext.CAlpha().location(), CAlpha().location());
double skap = std::sqrt(1 - ckap * ckap);
result = static_cast<float>(std::atan2(skap, ckap) * 180 / kPI);
}
}
}
catch (const std::exception &ex)
{
if (cif::VERBOSE > 0)
std::cerr << "When trying to calculate kappa for " << asymID() << ':' << seqID() << ": "
<< ex.what() << std::endl;
}
return result;
}
float Monomer::tco() const
{
float result = 0.0;
try
{
if (mIndex > 0)
{
auto &prev = mPolymer->operator[](mIndex - 1);
if (prev.mSeqID + 1 == mSeqID)
result = static_cast<float>(CosinusAngle(C().location(), O().location(), prev.C().location(), prev.O().location()));
}
}
catch (const std::exception &ex)
{
if (cif::VERBOSE > 0)
std::cerr << "When trying to calculate tco for " << asymID() << ':' << seqID() << ": "
<< ex.what() << std::endl;
}
return result;
}
float Monomer::omega() const
{
float result = 360;
try
{
if (not is_last_in_chain())
result = omega(*this, mPolymer->operator[](mIndex + 1));
}
catch (const std::exception &ex)
{
if (cif::VERBOSE > 0)
std::cerr << "When trying to calculate omega for " << asymID() << ':' << seqID() << ": "
<< ex.what() << std::endl;
}
return result;
}
const std::map<std::string, std::vector<std::string>> kChiAtomsMap = {
{"ASP", {"CG", "OD1"}},
{"ASN", {"CG", "OD1"}},
{"ARG", {"CG", "CD", "NE", "CZ"}},
{"HIS", {"CG", "ND1"}},
{"GLN", {"CG", "CD", "OE1"}},
{"GLU", {"CG", "CD", "OE1"}},
{"SER", {"OG"}},
{"THR", {"OG1"}},
{"LYS", {"CG", "CD", "CE", "NZ"}},
{"TYR", {"CG", "CD1"}},
{"PHE", {"CG", "CD1"}},
{"LEU", {"CG", "CD1"}},
{"TRP", {"CG", "CD1"}},
{"CYS", {"SG"}},
{"ILE", {"CG1", "CD1"}},
{"MET", {"CG", "SD", "CE"}},
{"MSE", {"CG", "SE", "CE"}},
{"PRO", {"CG", "CD"}},
{"VAL", {"CG1"}}};
size_t Monomer::nrOfChis() const
{
size_t result = 0;
auto i = kChiAtomsMap.find(mCompoundID);
if (i != kChiAtomsMap.end())
result = i->second.size();
return result;
}
float Monomer::chi(size_t nr) const
{
float result = 0;
try
{
auto i = kChiAtomsMap.find(mCompoundID);
if (i != kChiAtomsMap.end() and nr < i->second.size())
{
std::vector<std::string> atoms{"N", "CA", "CB"};
atoms.insert(atoms.end(), i->second.begin(), i->second.end());
// in case we have a positive chiral volume we need to swap atoms
if (chiralVolume() > 0)
{
if (mCompoundID == "LEU")
atoms.back() = "CD2";
if (mCompoundID == "VAL")
atoms.back() = "CG2";
}
result = static_cast<float>(DihedralAngle(
atomByID(atoms[nr + 0]).location(),
atomByID(atoms[nr + 1]).location(),
atomByID(atoms[nr + 2]).location(),
atomByID(atoms[nr + 3]).location()));
}
}
catch (const std::exception &e)
{
if (cif::VERBOSE > 0)
std::cerr << e.what() << std::endl;
result = 0;
}
return result;
}
bool Monomer::isCis() const
{
bool result = false;
if (mIndex + 1 < mPolymer->size())
{
auto &next = mPolymer->operator[](mIndex + 1);
result = Monomer::isCis(*this, next);
}
return result;
}
bool Monomer::isComplete() const
{
int seen = 0;
for (auto &a : mAtoms)
{
if (a.labelAtomID() == "CA")
seen |= 1;
else if (a.labelAtomID() == "C")
seen |= 2;
else if (a.labelAtomID() == "N")
seen |= 4;
else if (a.labelAtomID() == "O")
seen |= 8;
// else if (a.labelAtomID() == "OXT") seen |= 16;
}
return seen == 15;
}
bool Monomer::hasAlternateBackboneAtoms() const
{
bool result = false;
for (auto &a : mAtoms)
{
if (not a.isAlternate())
continue;
auto atomID = a.labelAtomID();
if (atomID == "CA" or atomID == "C" or atomID == "N" or atomID == "O")
{
result = true;
break;
}
}
return result;
}
float Monomer::chiralVolume() const
{
float result = 0;
if (mCompoundID == "LEU")
{
auto centre = atomByID("CG");
auto atom1 = atomByID("CB");
auto atom2 = atomByID("CD1");
auto atom3 = atomByID("CD2");
result = DotProduct(atom1.location() - centre.location(),
CrossProduct(atom2.location() - centre.location(), atom3.location() - centre.location()));
}
else if (mCompoundID == "VAL")
{
auto centre = atomByID("CB");
auto atom1 = atomByID("CA");
auto atom2 = atomByID("CG1");
auto atom3 = atomByID("CG2");
result = DotProduct(atom1.location() - centre.location(),
CrossProduct(atom2.location() - centre.location(), atom3.location() - centre.location()));
}
return result;
}
bool Monomer::areBonded(const Monomer &a, const Monomer &b, float errorMargin)
{
bool result = false;
try
{
Point atoms[4] = {
a.atomByID("CA").location(),
a.atomByID("C").location(),
b.atomByID("N").location(),
b.atomByID("CA").location()};
auto distanceCACA = Distance(atoms[0], atoms[3]);
double omega = DihedralAngle(atoms[0], atoms[1], atoms[2], atoms[3]);
bool cis = std::abs(omega) <= 30.0;
float maxCACADistance = cis ? 3.0f : 3.8f;
result = std::abs(distanceCACA - maxCACADistance) < errorMargin;
}
catch (...)
{
}
return result;
}
float Monomer::omega(const mmcif::Monomer &a, const mmcif::Monomer &b)
{
float result = 360;
try
{
result = static_cast<float>(DihedralAngle(
a.atomByID("CA").location(),
a.atomByID("C").location(),
b.atomByID("N").location(),
b.atomByID("CA").location()));
}
catch (...)
{
}
return result;
}
bool Monomer::isCis(const mmcif::Monomer &a, const mmcif::Monomer &b)
{
return omega(a, b) < 30.0f;
}
// --------------------------------------------------------------------
// polymer
Polymer::Polymer(const Structure &s, const std::string &entityID, const std::string &asymID)
: mStructure(const_cast<Structure *>(&s))
, mEntityID(entityID)
, mAsymID(asymID)
, mPolySeq(s.category("pdbx_poly_seq_scheme"), cif::Key("asym_id") == mAsymID and cif::Key("entity_id") == mEntityID)
{
std::map<size_t, size_t> ix;
reserve(mPolySeq.size());
for (auto r : mPolySeq)
{
int seqID;
std::string compoundID, authSeqID;
cif::tie(seqID, authSeqID, compoundID) = r.get("seq_id", "auth_seq_num", "mon_id");
size_t index = size();
// store only the first
if (not ix.count(seqID))
{
ix[seqID] = index;
emplace_back(*this, index, seqID, authSeqID, compoundID);
}
else if (cif::VERBOSE > 0)
{
Monomer m{*this, index, seqID, authSeqID, compoundID};
std::cerr << "Dropping alternate residue " << m << std::endl;
}
}
}
std::string Polymer::chainID() const
{
return mPolySeq.front()["pdb_strand_id"].as<std::string>();
}
Monomer &Polymer::getBySeqID(int seqID)
{
for (auto &m : *this)
if (m.seqID() == seqID)
return m;
throw std::runtime_error("Monomer with seqID " + std::to_string(seqID) + " not found in polymer " + mAsymID);
}
const Monomer &Polymer::getBySeqID(int seqID) const
{
for (auto &m : *this)
if (m.seqID() == seqID)
return m;
throw std::runtime_error("Monomer with seqID " + std::to_string(seqID) + " not found in polymer " + mAsymID);
}
int Polymer::Distance(const Monomer &a, const Monomer &b) const
{
int result = std::numeric_limits<int>::max();
if (a.asymID() == b.asymID())
{
int ixa = std::numeric_limits<int>::max(), ixb = std::numeric_limits<int>::max();
int ix = 0, f = 0;
for (auto &m : *this)
{
if (m.seqID() == a.seqID())
ixa = ix, ++f;
if (m.seqID() == b.seqID())
ixb = ix, ++f;
if (f == 2)
{
result = std::abs(ixa - ixb);
break;
}
}
}
return result;
}
// --------------------------------------------------------------------
Sugar::Sugar(const Branch &branch, const std::string &compoundID,
const std::string &asymID, int authSeqID)
: Residue(branch.structure(), compoundID, asymID, 0, std::to_string(authSeqID))
, mBranch(&branch)
{
}
Sugar::Sugar(Sugar &&rhs)
: Residue(std::forward<Residue>(rhs))
, mBranch(rhs.mBranch)
{
}
Sugar &Sugar::operator=(Sugar &&rhs)
{
if (this != &rhs)
{
Residue::operator=(std::forward<Residue>(rhs));
mBranch = rhs.mBranch;
}
return *this;
}
// bool Sugar::hasLinkedSugarAtLeavingO(int leavingO) const
// {
// return false;
// }
// Sugar &Sugar::operator[](int leavingO)
// {
// throw std::logic_error("not implemented");
// }
// const Sugar &Sugar::operator[](int leavingO) const
// {
// throw std::logic_error("not implemented");
// }
std::string Sugar::name() const
{
std::string result;
if (mCompoundID == "MAN")
result += "alpha-D-mannopyranose";
else if (mCompoundID == "BMA")
result += "beta-D-mannopyranose";
else if (mCompoundID == "NAG")
result += "2-acetamido-2-deoxy-beta-D-glucopyranose";
else if (mCompoundID == "NDG")
result += "2-acetamido-2-deoxy-alpha-D-glucopyranose";
else if (mCompoundID == "FUC")
result += "alpha-L-fucopyranose";
else if (mCompoundID == "FUL")
result += "beta-L-fucopyranose";
else
{
auto compound = CompoundFactory::instance().create(mCompoundID);
if (compound)
result += compound->name();
else
result += mCompoundID;
}
return result;
}
Branch::Branch(Structure &structure, const std::string &asymID)
: mStructure(&structure)
, mAsymID(asymID)
{
using namespace cif::literals;
auto &db = structure.datablock();
auto &struct_asym = db["struct_asym"];
auto &branch_scheme = db["pdbx_branch_scheme"];
auto &branch_link = db["pdbx_entity_branch_link"];
for (const auto &[entity_id] : struct_asym.find<std::string>("id"_key == asymID, "entity_id"))
{
for (const auto &[comp_id, num] : branch_scheme.find<std::string, int>(
"asym_id"_key == asymID, "mon_id", "pdb_seq_num"))
{
emplace_back(*this, comp_id, asymID, num);
}
for (const auto &[num1, num2, atom1, atom2] : branch_link.find<size_t, size_t, std::string, std::string>(
"entity_id"_key == entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"))
{
if (not cif::iequals(atom1, "c1"))
throw std::runtime_error("invalid pdbx_entity_branch_link");
auto &s1 = at(num1 - 1);
auto &s2 = at(num2 - 1);
s1.setLink(s2.atomByID(atom2));
}
break;
}
}
void Branch::linkAtoms()
{
using namespace cif::literals;
auto &db = mStructure->datablock();
auto &branch_link = db["pdbx_entity_branch_link"];
auto entity_id = front().entityID();
for (const auto &[num1, num2, atom1, atom2] : branch_link.find<size_t, size_t, std::string, std::string>(
"entity_id"_key == entity_id, "entity_branch_list_num_1", "entity_branch_list_num_2", "atom_id_1", "atom_id_2"))
{
if (not cif::iequals(atom1, "c1"))
throw std::runtime_error("invalid pdbx_entity_branch_link");
auto &s1 = at(num1 - 1);
auto &s2 = at(num2 - 1);
s1.setLink(s2.atomByID(atom2));
}
}
std::string Branch::name() const
{
return empty() ? "" : name(front());
}
std::string Branch::name(const Sugar &s) const
{
using namespace cif::literals;
std::string result;
for (auto &sn : *this)
{
if (not sn.getLink() or sn.getLink().authSeqID() != s.authSeqID())
continue;
auto n = name(sn) + "-(1-" + sn.getLink().labelAtomID().substr(1) + ')';
result = result.empty() ? n : result + "-[" + n + ']';
}
if (not result.empty() and result.back() != ']')
result += '-';
return result + s.name();
}
float Branch::weight() const
{
return std::accumulate(begin(), end(), 0.f, [](float sum, const Sugar &s)
{
auto compound = mmcif::CompoundFactory::instance().create(s.compoundID());
if (compound)
sum += compound->formulaWeight();
return sum; });
}
// --------------------------------------------------------------------
// File
void File::load(const std::filesystem::path &path)
{
gxrio::ifstream in(path);
auto ext = path.extension().string();
if (ext == ".gz" or ext = ".xz")
ext = path.stem().extension().string();
if (ext == ".pdb" or ext == ".ent")
ReadPDBFile(in, *this);
else
cif::File::load(in);
// validate, otherwise lots of functionality won't work
loadDictionary("mmcif_pdbx");
if (not isValid() and cif::VERBOSE >= 0)
std::cerr << "Invalid mmCIF file" << (cif::VERBOSE > 0 ? "." : " use --verbose option to see errors") << std::endl;
}
void File::save(const std::filesystem::path &path)
{
gxrio::ostream outFile(path);
auto ext = path.extension().string();
if (ext == ".gz" or ext = ".xz")
ext = path.stem().extension().string();
if (ext == ".pdb" or ext == ".ent")
WritePDBFile(outFile, data());
else
cif::File::save(outFile);
}
// --------------------------------------------------------------------
// Structure
Structure::Structure(cif::Datablock &db, size_t modelNr, StructureOpenOptions options)
: mDb(db)
, mModelNr(modelNr)
{
auto &atomCat = db["atom_site"];
loadAtomsForModel(options);
// Check to see if we should actually load another model?
if (mAtoms.empty() and mModelNr == 1)
{
std::optional<size_t> model_nr;
cif::tie(model_nr) = atomCat.front().get("pdbx_PDB_model_num");
if (model_nr and *model_nr != mModelNr)
{
if (cif::VERBOSE > 0)
std::cerr << "No atoms loaded for model 1, trying model " << *model_nr << std::endl;
mModelNr = *model_nr;
loadAtomsForModel(options);
}
}
if (mAtoms.empty())
{
if (cif::VERBOSE >= 0)
std::cerr << "Warning: no atoms loaded" << std::endl;
}
else
loadData();
}
void Structure::loadAtomsForModel(StructureOpenOptions options)
{
auto &db = datablock();
auto &atomCat = db["atom_site"];
for (auto &a : atomCat)
{
std::string id, type_symbol;
std::optional<size_t> model_nr;
cif::tie(id, type_symbol, model_nr) = a.get("id", "type_symbol", "pdbx_PDB_model_num");
if (model_nr and *model_nr != mModelNr)
continue;
if ((options bitand StructureOpenOptions::SkipHydrogen) and type_symbol == "H")
continue;
emplace_atom(std::make_shared<Atom::AtomImpl>(db, id, a));
}
}
Structure::Structure(const Structure &s)
: mDb(s.mDb)
, mModelNr(s.mModelNr)
{
mAtoms.reserve(s.mAtoms.size());
for (auto &atom : s.mAtoms)
emplace_atom(atom.clone());
loadData();
}
Structure::~Structure()
{
}
void Structure::loadData()
{
auto &polySeqScheme = category("pdbx_poly_seq_scheme");
for (const auto &[asymID, entityID] : polySeqScheme.rows<std::string,std::string>("asym_id", "entity_id"))
{
if (mPolymers.empty() or mPolymers.back().asymID() != asymID or mPolymers.back().entityID() != entityID)
mPolymers.emplace_back(*this, entityID, asymID);
}
auto &branchScheme = category("pdbx_branch_scheme");
for (const auto &[asymID] : branchScheme.rows<std::string>("asym_id"))
{
if (mBranches.empty() or mBranches.back().asymID() != asymID)
mBranches.emplace_back(*this, asymID);
}
auto &nonPolyScheme = category("pdbx_nonpoly_scheme");
for (const auto&[asymID, monID, pdbSeqNum] : nonPolyScheme.rows<std::string,std::string,std::string>("asym_id", "mon_id", "pdb_seq_num"))
mNonPolymers.emplace_back(*this, monID, asymID, 0, pdbSeqNum);
// place atoms in residues
using key_type = std::tuple<std::string, int, std::string>;
std::map<key_type, Residue *> resMap;
for (auto &poly : mPolymers)
{
for (auto &res : poly)
resMap[{res.asymID(), res.seqID(), res.authSeqID()}] = &res;
}
for (auto &res : mNonPolymers)
resMap[{res.asymID(), res.seqID(), res.mAuthSeqID}] = &res;
std::set<std::string> sugars;
for (auto &branch : mBranches)
{
for (auto &sugar : branch)
{
resMap[{sugar.asymID(), sugar.seqID(), sugar.authSeqID()}] = &sugar;
sugars.insert(sugar.compoundID());
}
}
for (auto &atom : mAtoms)
{
key_type k(atom.labelAsymID(), atom.labelSeqID(), atom.authSeqID());
auto ri = resMap.find(k);
if (ri == resMap.end())
{
if (cif::VERBOSE > 0)
std::cerr << "Missing residue for atom " << atom << std::endl;
// see if it might match a non poly
for (auto &res : mNonPolymers)
{
if (res.asymID() != atom.labelAsymID())
continue;
res.addAtom(atom);
break;
}
continue;
}
ri->second->addAtom(atom);
}
for (auto &branch : mBranches)
branch.linkAtoms();
}
EntityType Structure::getEntityTypeForEntityID(const std::string entityID) const
{
using namespace cif::literals;
auto &db = datablock();
auto &entity = db["entity"];
auto entityType = entity.find1<std::string>("id"_key == entityID, "type");
EntityType result;
if (cif::iequals(entityType, "polymer"))
result = EntityType::Polymer;
else if (cif::iequals(entityType, "non-polymer"))
result = EntityType::NonPolymer;
else if (cif::iequals(entityType, "macrolide"))
result = EntityType::Macrolide;
else if (cif::iequals(entityType, "water"))
result = EntityType::Water;
else if (cif::iequals(entityType, "branched"))
result = EntityType::Branched;
else
throw std::runtime_error("Unknown entity type " + entityType);
return result;
}
EntityType Structure::getEntityTypeForAsymID(const std::string asymID) const
{
using namespace cif::literals;
auto &db = datablock();
auto &struct_asym = db["struct_asym"];
auto entityID = struct_asym.find1<std::string>("id"_key == asymID, "entity_id");
return getEntityTypeForEntityID(entityID);
}
AtomView Structure::waters() const
{
using namespace cif::literals;
AtomView result;
auto &db = datablock();
// Get the entity id for water. Watch out, structure may not have water at all
auto &entityCat = db["entity"];
for (const auto &[waterEntityID] : entityCat.find<std::string>("type"_key == "water", "id"))
{
for (auto &a : mAtoms)
{
if (a.get_property<std::string>("label_entity_id") == waterEntityID)
result.push_back(a);
}
break;
}
return result;
}
Atom Structure::getAtomByID(const std::string &id) const
{
assert(mAtoms.size() == mAtomIndex.size());
int L = 0, R = mAtoms.size() - 1;
while (L <= R)
{
int i = (L + R) / 2;
const Atom &atom = mAtoms[mAtomIndex[i]];
int d = atom.id().compare(id);
if (d == 0)
return atom;
if (d < 0)
L = i + 1;
else
R = i - 1;
}
throw std::out_of_range("Could not find atom with id " + id);
}
Atom Structure::getAtomByLabel(const std::string &atomID, const std::string &asymID, const std::string &compID, int seqID, const std::string &altID)
{
for (auto &a : mAtoms)
{
if (a.labelAtomID() == atomID and
a.labelAsymID() == asymID and
a.labelCompID() == compID and
a.labelSeqID() == seqID and
a.labelAltID() == altID)
{
return a;
}
}
throw std::out_of_range("Could not find atom with specified label");
}
Atom Structure::getAtomByPosition(Point p) const
{
double distance = std::numeric_limits<double>::max();
size_t index = std::numeric_limits<size_t>::max();
for (size_t i = 0; i < mAtoms.size(); ++i)
{
auto &a = mAtoms.at(i);
auto d = Distance(a.location(), p);
if (d < distance)
{
distance = d;
index = i;
}
}
if (index < mAtoms.size())
return mAtoms.at(index);
return {};
}
Atom Structure::getAtomByPositionAndType(Point p, std::string_view type, std::string_view res_type) const
{
double distance = std::numeric_limits<double>::max();
size_t index = std::numeric_limits<size_t>::max();
for (size_t i = 0; i < mAtoms.size(); ++i)
{
auto &a = mAtoms.at(i);
if (a.labelCompID() != res_type)
continue;
if (a.labelAtomID() != type)
continue;
auto d = Distance(a.location(), p);
if (d < distance)
{
distance = d;
index = i;
}
}
if (index < mAtoms.size())
return mAtoms.at(index);
return {};
}
Polymer &Structure::getPolymerByAsymID(const std::string &asymID)
{
for (auto &poly : mPolymers)
{
if (poly.asymID() != asymID)
continue;
return poly;
}
throw std::runtime_error("Polymer with asym id " + asymID + " not found");
}
Residue &Structure::getResidue(const std::string &asymID, int seqID, const std::string &authSeqID)
{
if (seqID == 0)
{
for (auto &res : mNonPolymers)
{
if (res.asymID() == asymID and (authSeqID.empty() or res.authSeqID() == authSeqID))
return res;
}
}
for (auto &poly : mPolymers)
{
if (poly.asymID() != asymID)
continue;
for (auto &res : poly)
{
if (res.seqID() == seqID)
return res;
}
}
for (auto &branch : mBranches)
{
if (branch.asymID() != asymID)
continue;
for (auto &sugar : branch)
{
if (sugar.asymID() == asymID and sugar.authSeqID() == authSeqID)
return sugar;
}
}
std::string desc = asymID;
if (seqID != 0)
desc += "/" + std::to_string(seqID);
if (not authSeqID.empty())
desc += "-" + authSeqID;
throw std::out_of_range("Could not find residue " + desc);
}
Residue &Structure::getResidue(const std::string &asymID, const std::string &compID, int seqID, const std::string &authSeqID)
{
if (seqID == 0)
{
for (auto &res : mNonPolymers)
{
if (res.asymID() == asymID and res.authSeqID() == authSeqID and res.compoundID() == compID)
return res;
}
}
for (auto &poly : mPolymers)
{
if (poly.asymID() != asymID)
continue;
for (auto &res : poly)
{
if (res.seqID() == seqID and res.compoundID() == compID)
return res;
}
}
for (auto &branch : mBranches)
{
if (branch.asymID() != asymID)
continue;
for (auto &sugar : branch)
{
if (sugar.asymID() == asymID and sugar.authSeqID() == authSeqID and sugar.compoundID() == compID)
return sugar;
}
}
std::string desc = asymID;
if (seqID != 0)
desc += "/" + std::to_string(seqID);
if (not authSeqID.empty())
desc += "-" + authSeqID;
throw std::out_of_range("Could not find residue " + desc + " of type " + compID);
}
Branch &Structure::getBranchByAsymID(const std::string &asymID)
{
for (auto &branch : mBranches)
{
if (branch.asymID() == asymID)
return branch;
}
throw std::runtime_error("Branch not found for asym id " + asymID);
}
std::string Structure::insertCompound(const std::string &compoundID, bool isEntity)
{
using namespace cif::literals;
auto compound = CompoundFactory::instance().create(compoundID);
if (compound == nullptr)
throw std::runtime_error("Trying to insert unknown compound " + compoundID + " (not found in CCD)");
cif::Datablock &db = datablock();
auto &chemComp = db["chem_comp"];
auto r = chemComp.find(cif::Key("id") == compoundID);
if (r.empty())
{
chemComp.emplace({{"id", compoundID},
{"name", compound->name()},
{"formula", compound->formula()},
{"formula_weight", compound->formulaWeight()},
{"type", compound->type()}});
}
std::string entity_id;
if (isEntity)
{
auto &pdbxEntityNonpoly = db["pdbx_entity_nonpoly"];
try
{
entity_id = pdbxEntityNonpoly.find1<std::string>("comp_id"_key == compoundID, "entity_id");
}
catch (const std::exception &ex)
{
auto &entity = db["entity"];
entity_id = entity.getUniqueID("");
entity.emplace({{"id", entity_id},
{"type", "non-polymer"},
{"pdbx_description", compound->name()},
{"formula_weight", compound->formulaWeight()}});
pdbxEntityNonpoly.emplace({{"entity_id", entity_id},
{"name", compound->name()},
{"comp_id", compoundID}});
}
}
return entity_id;
}
// --------------------------------------------------------------------
Atom &Structure::emplace_atom(Atom &&atom)
{
int L = 0, R = mAtomIndex.size() - 1;
while (L <= R)
{
int i = (L + R) / 2;
const Atom &ai = mAtoms[mAtomIndex[i]];
int d = ai.id().compare(atom.id());
if (d == 0)
throw std::runtime_error("Duplicate atom ID " + atom.id());
if (d < 0)
L = i + 1;
else
R = i - 1;
}
mAtomIndex.insert(mAtomIndex.begin() + R + 1, mAtoms.size());
return mAtoms.emplace_back(std::move(atom));
}
void Structure::removeAtom(Atom &a, bool removeFromResidue)
{
using namespace cif::literals;
cif::Datablock &db = datablock();
auto &atomSites = db["atom_site"];
atomSites.erase("id"_key == a.id());
if (removeFromResidue)
{
try
{
auto &res = getResidue(a);
res.mAtoms.erase(std::remove(res.mAtoms.begin(), res.mAtoms.end(), a), res.mAtoms.end());
}
catch (const std::exception &ex)
{
if (cif::VERBOSE > 0)
std::cerr << "Error removing atom from residue: " << ex.what() << std::endl;
}
}
assert(mAtomIndex.size() == mAtoms.size());
#ifndef NDEBUG
bool removed = false;
#endif
int L = 0, R = mAtomIndex.size() - 1;
while (L <= R)
{
int i = (L + R) / 2;
const Atom &atom = mAtoms[mAtomIndex[i]];
int d = atom.id().compare(a.id());
if (d == 0)
{
mAtoms.erase(mAtoms.begin() + mAtomIndex[i]);
auto ai = mAtomIndex[i];
mAtomIndex.erase(mAtomIndex.begin() + i);
for (auto &j : mAtomIndex)
{
if (j > ai)
--j;
}
#ifndef NDEBUG
removed = true;
#endif
break;
}
if (d < 0)
L = i + 1;
else
R = i - 1;
}
#ifndef NDEBUG
assert(removed);
#endif
}
void Structure::swapAtoms(Atom a1, Atom a2)
{
cif::Datablock &db = datablock();
auto &atomSites = db["atom_site"];
try
{
auto r1 = atomSites.find1(cif::Key("id") == a1.id());
auto r2 = atomSites.find1(cif::Key("id") == a2.id());
auto l1 = r1["label_atom_id"];
auto l2 = r2["label_atom_id"];
l1.swap(l2);
std::swap(a1.mImpl->mAtomID, a2.mImpl->mAtomID);
auto l3 = r1["auth_atom_id"];
auto l4 = r2["auth_atom_id"];
l3.swap(l4);
}
catch (const std::exception &ex)
{
std::throw_with_nested(std::runtime_error("Failed to swap atoms"));
}
}
void Structure::moveAtom(Atom a, Point p)
{
a.location(p);
}
void Structure::changeResidue(Residue &res, const std::string &newCompound,
const std::vector<std::tuple<std::string, std::string>> &remappedAtoms)
{
using namespace cif::literals;
cif::Datablock &db = datablock();
std::string asymID = res.asymID();
const auto compound = CompoundFactory::instance().create(newCompound);
if (not compound)
throw std::runtime_error("Unknown compound " + newCompound);
// First make sure the compound is already known or insert it.
// And if the residue is an entity, we must make sure it exists
std::string entityID;
if (res.isEntity())
{
// create a copy of the entity first
auto &entity = db["entity"];
try
{
entityID = entity.find1<std::string>("type"_key == "non-polymer" and "pdbx_description"_key == compound->name(), "id");
}
catch (const std::exception &ex)
{
entityID = entity.getUniqueID("");
entity.emplace({{"id", entityID},
{"type", "non-polymer"},
{"pdbx_description", compound->name()},
{"formula_weight", compound->formulaWeight()}});
}
auto &pdbxEntityNonpoly = db["pdbx_entity_nonpoly"];
pdbxEntityNonpoly.emplace({{"entity_id", entityID},
{"name", compound->name()},
{"comp_id", newCompound}});
auto &pdbxNonPolyScheme = db["pdbx_nonpoly_scheme"];
for (auto &nps : pdbxNonPolyScheme.find("asym_id"_key == asymID))
{
nps.assign("mon_id", newCompound, true);
nps.assign("auth_mon_id", newCompound, true);
nps.assign("entity_id", entityID, true);
}
// create rest
auto &chemComp = db["chem_comp"];
if (not chemComp.exists(cif::Key("id") == newCompound))
{
chemComp.emplace({{"id", newCompound},
{"name", compound->name()},
{"formula", compound->formula()},
{"formula_weight", compound->formulaWeight()},
{"type", compound->type()}});
}
// update the struct_asym for the new entity
db["struct_asym"].update_value("id"_key == asymID, "entity_id", entityID);
}
else
insertCompound(newCompound, false);
res.setCompoundID(newCompound);
auto &atomSites = db["atom_site"];
auto atoms = res.atoms();
for (const auto &[a1, a2] : remappedAtoms)
{
auto i = find_if(atoms.begin(), atoms.end(), [id = a1](const Atom &a)
{ return a.labelAtomID() == id; });
if (i == atoms.end())
{
if (cif::VERBOSE >= 0)
std::cerr << "Missing atom for atom ID " << a1 << std::endl;
continue;
}
auto r = atomSites.find(cif::Key("id") == i->id());
if (r.size() != 1)
continue;
if (a2.empty() or a2 == ".")
removeAtom(*i);
else if (a1 != a2)
{
auto ra = r.front();
ra["label_atom_id"] = a2;
ra["auth_atom_id"] = a2;
ra["type_symbol"] = AtomTypeTraits(compound->getAtomByID(a2).typeSymbol).symbol();
}
}
for (auto a : atoms)
{
atomSites.update_value(cif::Key("id") == a.id(), "label_comp_id", newCompound);
atomSites.update_value(cif::Key("id") == a.id(), "auth_comp_id", newCompound);
}
}
void Structure::removeResidue(Residue &res)
{
using namespace cif::literals;
cif::Datablock &db = datablock();
auto atoms = res.atoms();
switch (res.entityType())
{
case EntityType::Polymer:
{
Monomer &monomer = dynamic_cast<Monomer &>(res);
db["pdbx_poly_seq_scheme"].erase(
"asym_id"_key == res.asymID() and
"seq_id"_key == res.seqID());
for (auto &poly : mPolymers)
poly.erase(std::remove(poly.begin(), poly.end(), monomer), poly.end());
break;
}
case EntityType::NonPolymer:
db["pdbx_nonpoly_scheme"].erase("asym_id"_key == res.asymID());
db["struct_asym"].erase("id"_key == res.asymID());
mNonPolymers.erase(std::remove(mNonPolymers.begin(), mNonPolymers.end(), res), mNonPolymers.end());
break;
case EntityType::Water:
db["pdbx_nonpoly_scheme"].erase("asym_id"_key == res.asymID());
mNonPolymers.erase(std::remove(mNonPolymers.begin(), mNonPolymers.end(), res), mNonPolymers.end());
break;
case EntityType::Branched:
{
Sugar &sugar = dynamic_cast<Sugar&>(res);
removeSugar(sugar);
atoms.clear();
break;
}
case EntityType::Macrolide:
// TODO: Fix this?
throw std::runtime_error("no support for macrolides yet");
}
for (auto atom : atoms)
removeAtom(atom, false);
}
void Structure::removeSugar(Sugar &sugar)
{
using namespace cif::literals;
std::string asym_id = sugar.asymID();
Branch &branch = getBranchByAsymID(asym_id);
auto si = std::find(branch.begin(), branch.end(), sugar);
if (si == branch.end())
throw std::runtime_error("Sugar not part of branch");
size_t six = si - branch.begin();
if (six == 0) // first sugar, means the death of this branch
removeBranch(branch);
else
{
std::set<size_t> dix;
std::stack<size_t> test;
test.push(sugar.num());
while (not test.empty())
{
auto tix = test.top();
test.pop();
if (dix.count(tix))
continue;
dix.insert(tix);
for (auto atom : branch[tix - 1].atoms())
removeAtom(atom, false);
for (auto &s : branch)
{
if (s.getLinkNr() == tix)
test.push(s.num());
}
}
branch.erase(remove_if(branch.begin(), branch.end(), [dix](const Sugar &s) { return dix.count(s.num()); }), branch.end());
cif::Datablock &db = datablock();
auto entity_id = createEntityForBranch(branch);
// Update the entity id of the asym
auto &struct_asym = db["struct_asym"];
auto r = struct_asym.find1("id"_key == asym_id);
r["entity_id"] = entity_id;
for (auto &sugar : branch)
{
for (auto atom : sugar.atoms())
atom.set_property("label_entity_id", entity_id);
}
auto &pdbx_branch_scheme = db["pdbx_branch_scheme"];
pdbx_branch_scheme.erase("asym_id"_key == asym_id);
for (auto &sugar : branch)
{
pdbx_branch_scheme.emplace({
{"asym_id", asym_id},
{"entity_id", entity_id},
{"num", sugar.num()},
{"mon_id", sugar.compoundID()},
{"pdb_asym_id", asym_id},
{"pdb_seq_num", sugar.num()},
{"pdb_mon_id", sugar.compoundID()},
// TODO: need fix, collect from nag_atoms?
{"auth_asym_id", asym_id},
{"auth_mon_id", sugar.compoundID()},
{"auth_seq_num", sugar.authSeqID()},
{"hetero", "n"}
});
}
}
}
void Structure::removeBranch(Branch &branch)
{
using namespace cif::literals;
auto &db = datablock();
db["pdbx_branch_scheme"].erase("asym_id"_key == branch.asymID());
db["struct_asym"].erase("id"_key == branch.asymID());
for (auto &sugar : branch)
{
auto atoms = sugar.atoms();
for (auto atom : atoms)
removeAtom(atom);
}
mBranches.erase(remove(mBranches.begin(), mBranches.end(), branch), mBranches.end());
}
std::string Structure::createNonPolyEntity(const std::string &comp_id)
{
return insertCompound(comp_id, true);
}
std::string Structure::createNonpoly(const std::string &entity_id, const std::vector<mmcif::Atom> &atoms)
{
using namespace cif::literals;
cif::Datablock &db = datablock();
auto &struct_asym = db["struct_asym"];
std::string asym_id = struct_asym.getUniqueID();
struct_asym.emplace({
{"id", asym_id},
{"pdbx_blank_PDB_chainid_flag", "N"},
{"pdbx_modified", "N"},
{"entity_id", entity_id},
{"details", "?"}
});
std::string comp_id = db["pdbx_entity_nonpoly"].find1<std::string>("entity_id"_key == entity_id, "comp_id");
auto &atom_site = db["atom_site"];
auto &res = mNonPolymers.emplace_back(*this, comp_id, asym_id, 0, "1");
for (auto &atom : atoms)
{
auto atom_id = atom_site.getUniqueID("");
auto &&[row, inserted] = atom_site.emplace({
{"group_PDB", atom.get_property<std::string>("group_PDB")},
{"id", atom_id},
{"type_symbol", atom.get_property<std::string>("type_symbol")},
{"label_atom_id", atom.get_property<std::string>("label_atom_id")},
{"label_alt_id", atom.get_property<std::string>("label_alt_id")},
{"label_comp_id", comp_id},
{"label_asym_id", asym_id},
{"label_entity_id", entity_id},
{"label_seq_id", "."},
{"pdbx_PDB_ins_code", ""},
{"Cartn_x", atom.get_property<std::string>("Cartn_x")},
{"Cartn_y", atom.get_property<std::string>("Cartn_y")},
{"Cartn_z", atom.get_property<std::string>("Cartn_z")},
{"occupancy", atom.get_property<std::string>("occupancy")},
{"B_iso_or_equiv", atom.get_property<std::string>("B_iso_or_equiv")},
{"pdbx_formal_charge", atom.get_property<std::string>("pdbx_formal_charge")},
{"auth_seq_id", 1},
{"auth_comp_id", comp_id},
{"auth_asym_id", asym_id},
{"auth_atom_id", atom.get_property<std::string>("label_atom_id")},
{"pdbx_PDB_model_num", 1}
});
auto &newAtom = emplace_atom(std::make_shared<Atom::AtomImpl>(db, atom_id, row));
res.addAtom(newAtom);
}
auto &pdbx_nonpoly_scheme = db["pdbx_nonpoly_scheme"];
int ndb_nr = pdbx_nonpoly_scheme.find("asym_id"_key == asym_id and "entity_id"_key == entity_id).size() + 1;
pdbx_nonpoly_scheme.emplace({
{"asym_id", asym_id},
{"entity_id", entity_id},
{"mon_id", comp_id},
{"ndb_seq_num", ndb_nr},
{"pdb_seq_num", res.authSeqID()},
{"auth_seq_num", res.authSeqID()},
{"pdb_mon_id", comp_id},
{"auth_mon_id", comp_id},
{"pdb_strand_id", asym_id},
{"pdb_ins_code", "."},
});
return asym_id;
}
std::string Structure::createNonpoly(const std::string &entity_id, std::vector<std::vector<cif::Item>> &atom_info)
{
using namespace cif::literals;
cif::Datablock &db = datablock();
auto &struct_asym = db["struct_asym"];
std::string asym_id = struct_asym.getUniqueID();
struct_asym.emplace({
{"id", asym_id},
{"pdbx_blank_PDB_chainid_flag", "N"},
{"pdbx_modified", "N"},
{"entity_id", entity_id},
{"details", "?"}
});
std::string comp_id = db["pdbx_entity_nonpoly"].find1<std::string>("entity_id"_key == entity_id, "comp_id");
auto &atom_site = db["atom_site"];
auto &res = mNonPolymers.emplace_back(*this, comp_id, asym_id, 0, "1");
auto appendUnlessSet = [](std::vector<cif::Item> &ai, cif::Item &&i)
{
if (find_if(ai.begin(), ai.end(), [name = i.name()](cif::Item &ci)
{ return ci.name() == name; }) == ai.end())
ai.emplace_back(std::move(i));
};
for (auto &atom : atom_info)
{
auto atom_id = atom_site.getUniqueID("");
appendUnlessSet(atom, {"group_PDB", "HETATM"});
appendUnlessSet(atom, {"id", atom_id});
appendUnlessSet(atom, {"label_comp_id", comp_id});
appendUnlessSet(atom, {"label_asym_id", asym_id});
appendUnlessSet(atom, {"label_seq_id", ""});
appendUnlessSet(atom, {"label_entity_id", entity_id});
appendUnlessSet(atom, {"auth_comp_id", comp_id});
appendUnlessSet(atom, {"auth_asym_id", asym_id});
appendUnlessSet(atom, {"auth_seq_id", 1});
appendUnlessSet(atom, {"pdbx_PDB_model_num", 1});
appendUnlessSet(atom, {"label_alt_id", ""});
auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end());
auto &newAtom = emplace_atom(std::make_shared<Atom::AtomImpl>(db, atom_id, row));
res.addAtom(newAtom);
}
auto &pdbx_nonpoly_scheme = db["pdbx_nonpoly_scheme"];
int ndb_nr = pdbx_nonpoly_scheme.find("asym_id"_key == asym_id and "entity_id"_key == entity_id).size() + 1;
pdbx_nonpoly_scheme.emplace({
{"asym_id", asym_id},
{"entity_id", entity_id},
{"mon_id", comp_id},
{"ndb_seq_num", ndb_nr},
{"pdb_seq_num", res.authSeqID()},
{"auth_seq_num", res.authSeqID()},
{"pdb_mon_id", comp_id},
{"auth_mon_id", comp_id},
{"pdb_strand_id", asym_id},
{"pdb_ins_code", "."},
});
return asym_id;
}
Branch &Structure::createBranch(std::vector<std::vector<cif::Item>> &nag_atoms)
{
// sanity check
for (auto &nag_atom : nag_atoms)
{
for (auto info : nag_atom)
{
if (info.name() == "label_comp_id" and info.value() != "NAG")
throw std::logic_error("The first sugar in a branch should be a NAG");
}
}
using namespace cif::literals;
cif::Datablock &db = datablock();
auto &struct_asym = db["struct_asym"];
std::string asym_id = struct_asym.getUniqueID();
auto &branch = mBranches.emplace_back(*this, asym_id);
auto &sugar = branch.emplace_back(branch, "NAG", asym_id, 1);
auto tmp_entity_id = db["entity"].getUniqueID("");
auto &atom_site = db["atom_site"];
auto appendUnlessSet = [](std::vector<cif::Item> &ai, cif::Item &&i)
{
if (find_if(ai.begin(), ai.end(), [name = i.name()](cif::Item &ci)
{ return ci.name() == name; }) == ai.end())
ai.emplace_back(std::move(i));
};
for (auto &atom : nag_atoms)
{
auto atom_id = atom_site.getUniqueID("");
appendUnlessSet(atom, {"group_PDB", "HETATM"});
appendUnlessSet(atom, {"id", atom_id});
appendUnlessSet(atom, {"label_comp_id", "NAG"});
appendUnlessSet(atom, {"label_asym_id", asym_id});
appendUnlessSet(atom, {"label_seq_id", "."});
appendUnlessSet(atom, {"label_entity_id", tmp_entity_id});
appendUnlessSet(atom, {"auth_comp_id", "NAG"});
appendUnlessSet(atom, {"auth_asym_id", asym_id});
appendUnlessSet(atom, {"auth_seq_id", 1});
appendUnlessSet(atom, {"pdbx_PDB_model_num", 1});
appendUnlessSet(atom, {"label_alt_id", ""});
auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end());
auto &newAtom = emplace_atom(std::make_shared<Atom::AtomImpl>(db, atom_id, row));
sugar.addAtom(newAtom);
}
// now we can create the entity and get the real ID
auto entity_id = createEntityForBranch(branch);
struct_asym.emplace({
{"id", asym_id},
{"pdbx_blank_PDB_chainid_flag", "N"},
{"pdbx_modified", "N"},
{"entity_id", entity_id},
{"details", "?"}
});
for (auto &a : sugar.atoms())
a.set_property("label_entity_id", entity_id);
db["pdbx_branch_scheme"].emplace({
{"asym_id", asym_id},
{"entity_id", entity_id},
{"num", 1},
{"mon_id", "NAG"},
{"pdb_asym_id", asym_id},
{"pdb_seq_num", 1},
{"pdb_mon_id", "NAG"},
// TODO: need fix, collect from nag_atoms?
{"auth_asym_id", asym_id},
{"auth_mon_id", "NAG"},
{"auth_seq_num", 1},
{"hetero", "n"}
});
return branch;
}
Branch &Structure::extendBranch(const std::string &asym_id, std::vector<std::vector<cif::Item>> &atom_info,
int link_sugar, const std::string &link_atom)
{
// sanity check
std::string compoundID;
for (auto &atom : atom_info)
{
for (auto info : atom)
{
if (info.name() != "label_comp_id")
continue;
if (compoundID.empty())
compoundID = info.value();
else if (info.value() != compoundID)
throw std::logic_error("All atoms should be of the same type");
}
}
using namespace cif::literals;
cif::Datablock &db = datablock();
// auto &branch = mBranches.emplace_back(*this, asym_id);
auto tmp_entity_id = db["entity"].getUniqueID("");
auto &atom_site = db["atom_site"];
auto appendUnlessSet = [](std::vector<cif::Item> &ai, cif::Item &&i)
{
if (find_if(ai.begin(), ai.end(), [name = i.name()](cif::Item &ci)
{ return ci.name() == name; }) == ai.end())
ai.emplace_back(std::move(i));
};
auto bi = std::find_if(mBranches.begin(), mBranches.end(), [asym_id](Branch &b)
{ return b.asymID() == asym_id; });
if (bi == mBranches.end())
throw std::logic_error("Create a branch first!");
Branch &branch = *bi;
int sugarNum = branch.size() + 1;
auto &sugar = branch.emplace_back(branch, compoundID, asym_id, sugarNum);
for (auto &atom : atom_info)
{
auto atom_id = atom_site.getUniqueID("");
appendUnlessSet(atom, {"group_PDB", "HETATM"});
appendUnlessSet(atom, {"id", atom_id});
appendUnlessSet(atom, {"label_asym_id", asym_id});
appendUnlessSet(atom, {"label_comp_id", compoundID});
appendUnlessSet(atom, {"label_entity_id", tmp_entity_id});
appendUnlessSet(atom, {"auth_comp_id", compoundID});
appendUnlessSet(atom, {"auth_asym_id", asym_id});
appendUnlessSet(atom, {"pdbx_PDB_model_num", 1});
appendUnlessSet(atom, {"label_alt_id", ""});
auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end());
auto &newAtom = emplace_atom(std::make_shared<Atom::AtomImpl>(db, atom_id, row));
sugar.addAtom(newAtom);
}
sugar.setLink(branch.at(link_sugar - 1).atomByID(link_atom));
auto entity_id = createEntityForBranch(branch);
// Update the entity id of the asym
auto &struct_asym = db["struct_asym"];
auto r = struct_asym.find1("id"_key == asym_id);
r["entity_id"] = entity_id;
for (auto &sugar : branch)
{
for (auto atom : sugar.atoms())
atom.set_property("label_entity_id", entity_id);
}
auto &pdbx_branch_scheme = db["pdbx_branch_scheme"];
pdbx_branch_scheme.erase("asym_id"_key == asym_id);
for (auto &sugar : branch)
{
pdbx_branch_scheme.emplace({
{"asym_id", asym_id},
{"entity_id", entity_id},
{"num", sugar.num()},
{"mon_id", sugar.compoundID()},
{"pdb_asym_id", asym_id},
{"pdb_seq_num", sugar.num()},
{"pdb_mon_id", sugar.compoundID()},
// TODO: need fix, collect from nag_atoms?
{"auth_asym_id", asym_id},
{"auth_mon_id", sugar.compoundID()},
{"auth_seq_num", sugar.authSeqID()},
{"hetero", "n"}
});
}
return branch;
}
std::string Structure::createEntityForBranch(Branch &branch)
{
using namespace cif::literals;
std::string entityName = branch.name(), entityID;
auto &entity = mDb["entity"];
try
{
entityID = entity.find1<std::string>("type"_key == "branched" and "pdbx_description"_key == entityName, "id");
}
catch (const std::exception &e)
{
entityID = entity.getUniqueID("");
if (cif::VERBOSE)
std::cout << "Creating new entity " << entityID << " for branched sugar " << entityName << std::endl;
entity.emplace({{"id", entityID},
{"type", "branched"},
{"src_method", "man"},
{"pdbx_description", entityName},
{"formula_weight", branch.weight()}});
auto &pdbx_entity_branch_list = mDb["pdbx_entity_branch_list"];
for (auto &sugar : branch)
{
pdbx_entity_branch_list.emplace({
{"entity_id", entityID},
{"comp_id", sugar.compoundID()},
{"num", sugar.num()},
{"hetero", "n"}
});
}
auto &pdbx_entity_branch_link = mDb["pdbx_entity_branch_link"];
for (auto &s1 : branch)
{
auto l2 = s1.getLink();
if (not l2)
continue;
auto &s2 = branch.at(std::stoi(l2.authSeqID()) - 1);
auto l1 = s2.atomByID("C1");
pdbx_entity_branch_link.emplace({
{"link_id", pdbx_entity_branch_link.getUniqueID("")},
{"entity_id", entityID},
{"entity_branch_list_num_1", s1.authSeqID()},
{"comp_id_1", s1.compoundID()},
{"atom_id_1", l1.labelAtomID()},
{"leaving_atom_id_1", "O1"},
{"entity_branch_list_num_2", s2.authSeqID()},
{"comp_id_2", s2.compoundID()},
{"atom_id_2", l2.labelAtomID()},
{"leaving_atom_id_2", "H" + l2.labelAtomID()},
{"value_order", "sing"}
});
}
}
return entityID;
}
void Structure::cleanupEmptyCategories()
{
using namespace cif::literals;
cif::Datablock &db = datablock();
auto &atomSite = db["atom_site"];
// Remove chem_comp's for which there are no atoms at all
auto &chem_comp = db["chem_comp"];
cif::RowSet obsoleteChemComps(chem_comp);
for (auto chemComp : chem_comp)
{
std::string compID = chemComp["id"].as<std::string>();
if (atomSite.exists("label_comp_id"_key == compID or "auth_comp_id"_key == compID))
continue;
obsoleteChemComps.push_back(chemComp);
}
for (auto chemComp : obsoleteChemComps)
chem_comp.erase(chemComp);
// similarly, remove entities not referenced by any atom
auto &entities = db["entity"];
cif::RowSet obsoleteEntities(entities);
for (auto entity : entities)
{
std::string entityID = entity["id"].as<std::string>();
if (atomSite.exists("label_entity_id"_key == entityID))
continue;
obsoleteEntities.push_back(entity);
}
for (auto entity : obsoleteEntities)
entities.erase(entity);
// the rest?
for (const char *cat : {"pdbx_entity_nonpoly"})
{
auto &category = db[cat];
cif::RowSet empty(category);
for (auto row : category)
{
if (not category.hasChildren(row) and not category.hasParents(row))
empty.push_back(row);
}
for (auto row : empty)
category.erase(row);
}
// count molecules
for (auto entity : entities)
{
std::string type, id;
cif::tie(type, id) = entity.get("type", "id");
std::optional<size_t> count;
if (type == "polymer")
count = db["entity_poly"].find("entity_id"_key == id).size();
else if (type == "non-polymer" or type == "water")
count = db["pdbx_nonpoly_scheme"].find("entity_id"_key == id).size();
else if (type == "branched")
{
// is this correct?
std::set<std::string> asym_ids;
for (const auto &[asym_id] : db["pdbx_branch_scheme"].find<std::string>("entity_id"_key == id, "asym_id"))
asym_ids.insert(asym_id);
count = asym_ids.size();
}
entity["pdbx_number_of_molecules"] = count;
}
}
void Structure::translate(Point t)
{
for (auto &a : mAtoms)
a.translate(t);
}
void Structure::rotate(Quaternion q)
{
for (auto &a : mAtoms)
a.rotate(q);
}
void Structure::translateAndRotate(Point t, Quaternion q)
{
for (auto &a : mAtoms)
a.translateAndRotate(t, q);
}
void Structure::translateRotateAndTranslate(Point t1, Quaternion q, Point t2)
{
for (auto &a : mAtoms)
a.translateRotateAndTranslate(t1, q, t2);
}
void Structure::validateAtoms() const
{
// validate order
assert(mAtoms.size() == mAtomIndex.size());
for (size_t i = 0; i + i < mAtoms.size(); ++i)
assert(mAtoms[mAtomIndex[i]].id().compare(mAtoms[mAtomIndex[i + 1]].id()) < 0);
std::vector<Atom> atoms = mAtoms;
auto removeAtomFromList = [&atoms](const Atom &a)
{
auto i = std::find(atoms.begin(), atoms.end(), a);
assert(i != atoms.end());
atoms.erase(i);
};
for (auto &poly : mPolymers)
{
for (auto &monomer : poly)
{
for (auto &atom : monomer.atoms())
removeAtomFromList(atom);
}
}
for (auto &branch : mBranches)
{
for (auto &sugar : branch)
{
for (auto &atom : sugar.atoms())
removeAtomFromList(atom);
}
}
for (auto &res : mNonPolymers)
{
for (auto &atom : res.atoms())
removeAtomFromList(atom);
}
assert(atoms.empty());
}
} // namespace mmcif
This source diff could not be displayed because it is too large. You can view the blob instead.
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <atomic>
#include <mutex>
#include <cif++/structure/Symmetry.hpp>
#include <cif++/utilities.hpp>
#include "./SymOpTable_data.hpp"
namespace mmcif
{
// --------------------------------------------------------------------
// Unfortunately, clipper has a different numbering scheme than PDB
// for rotation numbers. So we created a table to map those.
// Perhaps a bit over the top, but hey....
// --------------------------------------------------------------------
int GetSpacegroupNumber(std::string spacegroup)
{
if (spacegroup == "P 21 21 2 A")
spacegroup = "P 21 21 2 (a)";
else if (spacegroup.empty())
throw std::runtime_error("No spacegroup, cannot continue");
int result = 0;
const size_t N = kNrOfSpaceGroups;
int32_t L = 0, R = static_cast<int32_t>(N - 1);
while (L <= R)
{
int32_t i = (L + R) / 2;
int d = spacegroup.compare(kSpaceGroups[i].name);
if (d > 0)
L = i + 1;
else if (d < 0)
R = i - 1;
else
{
result = kSpaceGroups[i].nr;
break;
}
}
// not found, see if we can find a match based on xHM name
if (result == 0)
{
for (size_t i = 0; i < kNrOfSpaceGroups; ++i)
{
auto& sp = kSpaceGroups[i];
if (sp.xHM == spacegroup)
{
result = sp.nr;
break;
}
}
}
if (result == 0)
throw std::runtime_error("Spacegroup name " + spacegroup + " was not found in table");
return result;
}
// --------------------------------------------------------------------
int GetSpacegroupNumber(std::string spacegroup, SpacegroupName type)
{
if (spacegroup == "P 21 21 2 A")
spacegroup = "P 21 21 2 (a)";
else if (spacegroup.empty())
throw std::runtime_error("No spacegroup, cannot continue");
int result = 0;
if (type == SpacegroupName::full)
{
const size_t N = kNrOfSpaceGroups;
int32_t L = 0, R = static_cast<int32_t>(N - 1);
while (L <= R)
{
int32_t i = (L + R) / 2;
int d = spacegroup.compare(kSpaceGroups[i].name);
if (d > 0)
L = i + 1;
else if (d < 0)
R = i - 1;
else
{
result = kSpaceGroups[i].nr;
break;
}
}
}
else if (type == SpacegroupName::xHM)
{
for (auto &sg : kSpaceGroups)
{
if (sg.xHM == spacegroup)
{
result = sg.nr;
break;
}
}
}
else
{
for (auto &sg : kSpaceGroups)
{
if (sg.Hall == spacegroup)
{
result = sg.nr;
break;
}
}
}
// not found, see if we can find a match based on xHM name
if (result == 0)
throw std::runtime_error("Spacegroup name " + spacegroup + " was not found in table");
return result;
}
}
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cif++/structure/TlsParser.hpp>
namespace cif
{
const int
kResidueNrWildcard = std::numeric_limits<int>::min(),
kNoSeqNum = std::numeric_limits<int>::max() - 1;
// --------------------------------------------------------------------
// We parse selection statements and create a selection expression tree
// which is then interpreted by setting the selected flag for the
// residues. After that, the selected ranges are collected and printed.
struct TLSResidue
{
std::string chainID;
int seqNr;
char iCode;
std::string name;
bool selected;
std::string asymID;
int seqID;
bool operator==(const TLSResidue& rhs) const
{
return chainID == rhs.chainID and
seqNr == rhs.seqNr and
iCode == rhs.iCode and
iequals(name, rhs.name) and
selected == rhs.selected;
}
};
void DumpSelection(const std::vector<TLSResidue>& selected, std::size_t indentLevel)
{
std::string indent(indentLevel * 2, ' ');
auto i = selected.begin();
bool first = true;
// First print in PDB space
while (i != selected.end())
{
auto b = std::find_if(i, selected.end(), [](auto s) -> bool { return s.selected; });
if (b == selected.end())
break;
if (first)
std::cout << indent << "PDB:" << std::endl;
first = false;
auto e = std::find_if(b, selected.end(), [b](auto s) -> bool { return s.chainID != b->chainID or not s.selected; });
std::cout << indent << " >> " << b->chainID << ' ' << b->seqNr << ':' << (e - 1)->seqNr << std::endl;
i = e;
}
// Then in mmCIF space
if (not first)
std::cout << indent << "mmCIF:" << std::endl;
i = selected.begin();
while (i != selected.end())
{
auto b = std::find_if(i, selected.end(), [](auto s) -> bool { return s.selected; });
if (b == selected.end())
break;
auto e = std::find_if(b, selected.end(), [b](auto s) -> bool { return s.asymID != b->asymID or not s.selected; });
std::string asymID = b->asymID;
int from = b->seqID, to = from;
for (auto j = b + 1; j != e; ++j)
{
if (j->seqID == to + 1)
to = j->seqID;
else if (j->seqID != to) // probably an insertion code
{
if (from == kNoSeqNum or to == kNoSeqNum)
std::cout << indent << " >> " << asymID << std::endl;
else
std::cout << indent << " >> " << asymID << ' ' << from << ':' << to << std::endl;
asymID = b->asymID;
from = to = b->seqID;
}
}
if (from == kNoSeqNum or to == kNoSeqNum)
std::cout << indent << " >> " << asymID << std::endl;
else
std::cout << indent << " >> " << asymID << ' ' << from << ':' << to << std::endl;
i = e;
}
if (first)
{
if (isatty(STDOUT_FILENO))
std::cout << indent << cif::coloured("Empty selection") << std::endl;
else
std::cout << indent << cif::coloured("Empty selection") << std::endl;
}
}
std::vector<std::tuple<std::string,int,int>> TLSSelection::GetRanges(datablock& db, bool pdbNamespace) const
{
std::vector<TLSResidue> selected;
// Collect the residues from poly seq scheme...
for (auto r: db["pdbx_poly_seq_scheme"])
{
std::string chain, seqNr, iCode, name;
std::string asymID;
int seqID;
if (pdbNamespace)
cif::tie(chain, seqNr, iCode, name, asymID, seqID) = r.get("pdb_strand_id", "pdb_seq_num", "pdb_ins_code", "pdb_comp_id", "asym_id", "seq_id");
else
{
cif::tie(chain, seqNr, name) = r.get("asym_id", "seq_id", "mon_id");
asymID = chain;
seqID = stoi(seqNr);
}
if (seqNr.empty())
continue;
if (iCode.length() > 1)
throw std::runtime_error("invalid iCode");
selected.push_back({chain, stoi(seqNr), iCode[0], name, false, asymID, seqID});
}
// ... those from the nonpoly scheme
for (auto r: db["pdbx_nonpoly_scheme"])
{
std::string chain, seqNr, iCode, name, asymID;
if (pdbNamespace)
{
cif::tie(chain, seqNr, iCode, name, asymID) = r.get("pdb_strand_id", "pdb_seq_num", "pdb_ins_code", "pdb_mon_id", "asym_id");
if (seqNr.empty())
continue;
}
else
{
cif::tie(chain, name) = r.get("asym_id", "mon_id");
asymID = chain;
seqNr = "0";
}
if (iequals(name, "HOH") or iequals(name, "H2O"))
continue;
if (iCode.length() > 1)
throw std::runtime_error("invalid iCode");
selected.push_back({chain, stoi(seqNr), iCode[0], name, false, asymID, kNoSeqNum});
}
// selected might consist of multiple ranges
// output per chain
stable_sort(selected.begin(), selected.end(), [](auto& a, auto& b) -> bool
{
int d = a.chainID.compare(b.chainID);
if (d == 0)
d = a.seqNr - b.seqNr;
return d < 0;
});
CollectResidues(db, selected);
std::vector<std::tuple<std::string,int,int>> result;
auto i = selected.begin();
while (i != selected.end())
{
auto b = std::find_if(i, selected.end(), [](auto s) -> bool { return s.selected; });
if (b == selected.end())
break;
auto e = std::find_if(b, selected.end(), [b](auto s) -> bool { return s.asymID != b->asymID or not s.selected; });
// return ranges with strict increasing sequence numbers.
// So when there's a gap in the sequence we split the range.
// Beware of iCodes though
result.push_back(make_tuple(b->asymID, b->seqID, b->seqID));
for (auto j = b + 1; j != e; ++j)
{
if (j->seqID == std::get<2>(result.back()) + 1)
std::get<2>(result.back()) = j->seqID;
else if (j->seqID != std::get<2>(result.back())) // probably an insertion code
result.push_back(make_tuple(b->asymID, j->seqID, j->seqID));
}
i = e;
}
return result;
}
struct TLSSelectionNot : public TLSSelection
{
TLSSelectionNot(TLSSelectionPtr selection)
: selection(selection.release()) {}
virtual void CollectResidues(datablock& db, std::vector<TLSResidue>& residues, std::size_t indentLevel) const
{
selection->CollectResidues(db, residues, indentLevel + 1);
for (auto& r: residues)
r.selected = not r.selected;
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "NOT" << std::endl;
DumpSelection(residues, indentLevel);
}
}
TLSSelectionPtr selection;
};
struct TLSSelectionAll : public TLSSelection
{
TLSSelectionAll() {}
virtual void CollectResidues(datablock& db, std::vector<TLSResidue>& residues, std::size_t indentLevel) const
{
for (auto& r: residues)
r.selected = true;
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "ALL" << std::endl;
DumpSelection(residues, indentLevel);
}
}
};
struct TLSSelectionChain : public TLSSelectionAll
{
TLSSelectionChain(const std::string& chainID)
: m_chain(chainID) {}
virtual void CollectResidues(datablock& db, std::vector<TLSResidue>& residues, std::size_t indentLevel) const
{
bool allChains = m_chain == "*";
for (auto& r: residues)
r.selected = allChains or r.chainID == m_chain;
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "CHAIN " << m_chain << std::endl;
DumpSelection(residues, indentLevel);
}
}
std::string m_chain;
};
struct TLSSelectionResID : public TLSSelectionAll
{
TLSSelectionResID(int seqNr, char iCode)
: m_seq_nr(seqNr), m_icode(iCode) {}
virtual void CollectResidues(datablock& db, std::vector<TLSResidue>& residues, std::size_t indentLevel) const
{
for (auto& r: residues)
r.selected = r.seqNr == m_seq_nr and r.iCode == m_icode;
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "ResID " << m_seq_nr << (m_icode ? std::string { m_icode} : "") << std::endl;
DumpSelection(residues, indentLevel);
}
}
int m_seq_nr;
char m_icode;
};
struct TLSSelectionRangeSeq : public TLSSelectionAll
{
TLSSelectionRangeSeq(int first, int last)
: m_first(first), m_last(last) {}
virtual void CollectResidues(datablock& db, std::vector<TLSResidue>& residues, std::size_t indentLevel) const
{
for (auto& r: residues)
{
r.selected = ((r.seqNr >= m_first or m_first == kResidueNrWildcard) and
(r.seqNr <= m_last or m_last == kResidueNrWildcard));
}
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "Range " << m_first << ':' << m_last << std::endl;
DumpSelection(residues, indentLevel);
}
}
int m_first, m_last;
};
struct TLSSelectionRangeID : public TLSSelectionAll
{
TLSSelectionRangeID(int first, int last, char icodeFirst = 0, char icodeLast = 0)
: m_first(first), m_last(last), m_icode_first(icodeFirst), m_icode_last(icodeLast) {}
virtual void CollectResidues(datablock& db, std::vector<TLSResidue>& residues, std::size_t indentLevel) const
{
// need to do this per chain
std::set<std::string> chains;
for (auto& r: residues)
chains.insert(r.chainID);
for (std::string chain: chains)
{
auto f = std::find_if(residues.begin(), residues.end(),
[=,this](auto r) -> bool {
return r.chainID == chain and r.seqNr == m_first and r.iCode == m_icode_first;
});
auto l = std::find_if(residues.begin(), residues.end(),
[=,this](auto r) -> bool {
return r.chainID == chain and r.seqNr == m_last and r.iCode == m_icode_last;
});
if (f != residues.end() and l != residues.end() and f <= l)
{
++l;
for (; f != l; ++f)
f->selected = true;
}
}
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "Through " << m_first << ':' << m_last << std::endl;
DumpSelection(residues, indentLevel);
}
}
int m_first, m_last;
char m_icode_first, m_icode_last;
};
struct TLSSelectionUnion : public TLSSelection
{
TLSSelectionUnion(TLSSelectionPtr& lhs, TLSSelectionPtr& rhs)
: lhs(lhs.release()), rhs(rhs.release()) {}
TLSSelectionUnion(TLSSelectionPtr& lhs, TLSSelectionPtr&& rhs)
: lhs(lhs.release()), rhs(rhs.release()) {}
virtual void CollectResidues(datablock& db, std::vector<TLSResidue>& residues, std::size_t indentLevel) const
{
auto a = residues;
for_each(a.begin(), a.end(), [](auto& r) { r.selected = false; });
auto b = residues;
for_each(b.begin(), b.end(), [](auto& r) { r.selected = false; });
lhs->CollectResidues(db, a, indentLevel + 1);
rhs->CollectResidues(db, b, indentLevel + 1);
for (auto ai = a.begin(), bi = b.begin(), ri = residues.begin(); ri != residues.end(); ++ai, ++bi, ++ri)
ri->selected = ai->selected or bi->selected;
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "Union" << std::endl;
DumpSelection(residues, indentLevel);
}
}
TLSSelectionPtr lhs;
TLSSelectionPtr rhs;
};
struct TLSSelectionIntersection : public TLSSelection
{
TLSSelectionIntersection(TLSSelectionPtr& lhs, TLSSelectionPtr& rhs)
: lhs(lhs.release()), rhs(rhs.release()) {}
TLSSelectionIntersection(TLSSelectionPtr& lhs, TLSSelectionPtr&& rhs)
: lhs(lhs.release()), rhs(rhs.release()) {}
virtual void CollectResidues(datablock& db, std::vector<TLSResidue>& residues, std::size_t indentLevel) const
{
auto a = residues;
for_each(a.begin(), a.end(), [](auto& r) { r.selected = false; });
auto b = residues;
for_each(b.begin(), b.end(), [](auto& r) { r.selected = false; });
lhs->CollectResidues(db, a, indentLevel + 1);
rhs->CollectResidues(db, b, indentLevel + 1);
for (auto ai = a.begin(), bi = b.begin(), ri = residues.begin(); ri != residues.end(); ++ai, ++bi, ++ri)
ri->selected = ai->selected and bi->selected;
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "Intersection" << std::endl;
DumpSelection(residues, indentLevel);
}
}
TLSSelectionPtr lhs;
TLSSelectionPtr rhs;
};
struct TLSSelectionByName : public TLSSelectionAll
{
public:
TLSSelectionByName(const std::string& resname)
: m_name(resname) {}
virtual void CollectResidues(datablock& db, std::vector<TLSResidue>& residues, std::size_t indentLevel) const
{
for (auto& r: residues)
r.selected = r.name == m_name;
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "Name " << m_name << std::endl;
DumpSelection(residues, indentLevel);
}
}
std::string m_name;
};
struct TLSSelectionByElement : public TLSSelectionAll
{
public:
TLSSelectionByElement(const std::string& element)
: m_element(element) {}
virtual void CollectResidues(datablock& db, std::vector<TLSResidue>& residues, std::size_t indentLevel) const
{
// rationale... We want to select residues only. So we select
// residues that have just a single atom of type m_element.
// And we assume these have as residue name... m_element.
// ... Right?
for (auto& r: residues)
r.selected = iequals(r.name, m_element);
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "Element " << m_element << std::endl;
DumpSelection(residues, indentLevel);
}
}
std::string m_element;
};
// --------------------------------------------------------------------
class TLSSelectionParserImpl
{
public:
TLSSelectionParserImpl(const std::string& selection)
: m_selection(selection), m_p(m_selection.begin()), m_end(m_selection.end()) {}
virtual TLSSelectionPtr Parse() = 0;
protected:
virtual int GetNextToken() = 0;
virtual void Match(int token);
virtual std::string ToString(int token) = 0;
std::string m_selection;
std::string::iterator m_p, m_end;
int m_lookahead = 0;
std::string m_token;
};
void TLSSelectionParserImpl::Match(int token)
{
if (m_lookahead == token)
m_lookahead = GetNextToken();
else
{
std::string expected;
if (token >= 256)
expected = ToString(token);
else
expected = { char(token) };
std::string found;
if (m_lookahead >= 256)
found = ToString(m_lookahead) + " (" + m_token + ')';
else
found = { char(m_lookahead) };
throw std::runtime_error("Expected " + expected + " but found " + found);
}
}
// --------------------------------------------------------------------
class TLSSelectionParserImplPhenix : public TLSSelectionParserImpl
{
public:
TLSSelectionParserImplPhenix(const std::string& selection)
: TLSSelectionParserImpl(selection)
{
m_lookahead = GetNextToken();
}
virtual TLSSelectionPtr Parse();
private:
TLSSelectionPtr ParseAtomSelection();
TLSSelectionPtr ParseTerm();
TLSSelectionPtr ParseFactor();
enum TOKEN {
pt_NONE = 0,
pt_IDENT = 256,
pt_STRING,
pt_NUMBER,
pt_RESID,
pt_EOLN,
pt_KW_ALL,
pt_KW_CHAIN,
pt_KW_RESSEQ,
pt_KW_RESID,
pt_KW_ICODE,
pt_KW_RESNAME,
pt_KW_ELEMENT,
pt_KW_AND,
pt_KW_OR,
pt_KW_NOT,
pt_KW_PDB,
pt_KW_ENTRY,
pt_KW_THROUGH
};
virtual int GetNextToken();
virtual std::string ToString(int token);
int m_value_i;
std::string m_value_s;
char m_icode;
};
int TLSSelectionParserImplPhenix::GetNextToken()
{
int result = pt_NONE;
enum STATE {
st_START,
st_RESID = 200,
st_NUM = 300,
st_IDENT = 400,
st_QUOTED = 500,
st_DQUOTED = 550,
st_OTHER = 600
};
int state = st_START;
m_value_i = 0;
m_icode = 0;
m_value_s.clear();
auto s = m_p;
auto start = state;
m_token.clear();
auto restart = [&]()
{
switch (start)
{
case st_START: state = start = st_RESID; break;
case st_RESID: state = start = st_NUM; break;
case st_NUM: state = start = st_IDENT; break;
case st_IDENT: state = start = st_QUOTED; break;
case st_QUOTED: state = start = st_DQUOTED; break;
case st_DQUOTED:state = start = st_OTHER; break;
}
m_token.clear();
m_p = s;
};
auto retract = [&]()
{
--m_p;
m_token.pop_back();
};
while (result == pt_NONE)
{
char ch = *m_p++;
if (m_p > m_end)
ch = 0;
else
m_token += ch;
switch (state)
{
// start block
case st_START:
if (ch == 0)
result = pt_EOLN;
else if (isspace(ch))
{
m_token.clear();
++s;
}
else
restart();
break;
// RESID block
case st_RESID:
if (ch == '-')
state = st_RESID + 1;
else if (isdigit(ch))
{
m_value_i = (ch - '0');
state = st_RESID + 2;
}
else
restart();
break;
case st_RESID + 1:
if (isdigit(ch))
{
m_value_i = -(ch - '0');
state = st_RESID + 2;
}
else
restart();
break;
case st_RESID + 2:
if (isdigit(ch))
m_value_i = 10 * m_value_i + (m_value_i < 0 ? -1 : 1) * (ch - '0');
else if (isalpha(ch))
{
m_icode = ch;
state = st_RESID + 3;
}
else
restart();
break;
case st_RESID + 3:
if (isalnum(ch))
restart();
else
{
retract();
result = pt_RESID;
}
break;
// NUM block
case st_NUM:
if (ch == '-')
state = st_NUM + 1;
else if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM + 2;
}
else
restart();
break;
case st_NUM + 1:
if (isdigit(ch))
{
m_value_i = -(ch - '0');
state = st_NUM + 2;
}
else
restart();
break;
case st_NUM + 2:
if (isdigit(ch))
m_value_i = 10 * m_value_i + (m_value_i < 0 ? -1 : 1) * (ch - '0');
else if (not isalpha(ch))
{
result = pt_NUMBER;
retract();
}
else
restart();
break;
// IDENT block
case st_IDENT:
if (isalnum(ch))
{
m_value_s = { ch };
state = st_IDENT + 1;
}
else
restart();
break;
case st_IDENT + 1:
if (isalnum(ch) or ch == '\'')
m_value_s += ch;
else
{
--m_p;
result = pt_IDENT;
}
break;
// QUOTED block
case st_QUOTED:
if (ch == '\'')
{
m_value_s.clear();
state = st_QUOTED + 1;
}
else
restart();
break;
case st_QUOTED + 1:
if (ch == '\'')
result = pt_STRING;
else if (ch == 0)
throw std::runtime_error("Unexpected end of selection, missing quote character?");
else
m_value_s += ch;
break;
// QUOTED block
case st_DQUOTED:
if (ch == '\"')
{
m_value_s.clear();
state = st_DQUOTED + 1;
}
else
restart();
break;
case st_DQUOTED + 1:
if (ch == '\"')
result = pt_STRING;
else if (ch == 0)
throw std::runtime_error("Unexpected end of selection, missing quote character?");
else
m_value_s += ch;
break;
// OTHER block
case st_OTHER:
result = ch;
break;
}
}
if (result == pt_IDENT)
{
if (iequals(m_value_s, "CHAIN"))
result = pt_KW_CHAIN;
else if (iequals(m_value_s, "ALL"))
result = pt_KW_ALL;
else if (iequals(m_value_s, "AND"))
result = pt_KW_AND;
else if (iequals(m_value_s, "OR"))
result = pt_KW_OR;
else if (iequals(m_value_s, "NOT"))
result = pt_KW_NOT;
else if (iequals(m_value_s, "RESSEQ"))
result = pt_KW_RESSEQ;
else if (iequals(m_value_s, "RESID") or iequals(m_value_s, "RESI"))
result = pt_KW_RESID;
else if (iequals(m_value_s, "RESNAME"))
result = pt_KW_RESNAME;
else if (iequals(m_value_s, "ELEMENT"))
result = pt_KW_ELEMENT;
else if (iequals(m_value_s, "PDB"))
result = pt_KW_PDB;
else if (iequals(m_value_s, "ENTRY"))
result = pt_KW_ENTRY;
else if (iequals(m_value_s, "THROUGH"))
result = pt_KW_THROUGH;
}
return result;
}
std::string TLSSelectionParserImplPhenix::ToString(int token)
{
switch (token)
{
case pt_IDENT: return "identifier";
case pt_STRING: return "string";
case pt_NUMBER: return "number";
case pt_RESID: return "resid";
case pt_EOLN: return "end of line";
case pt_KW_ALL: return "ALL";
case pt_KW_CHAIN: return "CHAIN";
case pt_KW_RESSEQ: return "RESSEQ";
case pt_KW_RESID: return "RESID";
case pt_KW_RESNAME: return "RESNAME";
case pt_KW_ELEMENT: return "ELEMENT";
case pt_KW_AND: return "AND";
case pt_KW_OR: return "OR";
case pt_KW_NOT: return "NOT";
case pt_KW_PDB: return "PDB";
case pt_KW_ENTRY: return "ENTRY";
case pt_KW_THROUGH: return "THROUGH";
default: return "character";
}
}
TLSSelectionPtr TLSSelectionParserImplPhenix::Parse()
{
if (m_lookahead == pt_KW_PDB)
{
Match(pt_KW_PDB);
// Match(pt_KW_ENTRY);
throw std::runtime_error("Unimplemented PDB ENTRY specification");
}
TLSSelectionPtr result = ParseAtomSelection();
bool extraParenthesis = false;
if (m_lookahead == ')')
{
extraParenthesis = true;
m_lookahead = GetNextToken();
}
Match(pt_EOLN);
if (extraParenthesis and cif::VERBOSE > 0)
std::cerr << "WARNING: too many closing parenthesis in TLS selection statement" << std::endl;
return result;
}
TLSSelectionPtr TLSSelectionParserImplPhenix::ParseAtomSelection()
{
TLSSelectionPtr result = ParseTerm();
while (m_lookahead == pt_KW_OR)
{
Match(pt_KW_OR);
result.reset(new TLSSelectionUnion(result, ParseTerm()));
}
return result;
}
TLSSelectionPtr TLSSelectionParserImplPhenix::ParseTerm()
{
TLSSelectionPtr result = ParseFactor();
while (m_lookahead == pt_KW_AND)
{
Match(pt_KW_AND);
result.reset(new TLSSelectionIntersection(result, ParseFactor()));
}
return result;
}
TLSSelectionPtr TLSSelectionParserImplPhenix::ParseFactor()
{
TLSSelectionPtr result;
switch (m_lookahead)
{
case '(':
Match('(');
result = ParseAtomSelection();
if (m_lookahead == pt_EOLN and cif::VERBOSE > 0)
std::cerr << "WARNING: missing closing parenthesis in TLS selection statement" << std::endl;
else
Match(')');
break;
case pt_KW_NOT:
Match(pt_KW_NOT);
result.reset(new TLSSelectionNot(ParseAtomSelection()));
break;
case pt_KW_CHAIN:
{
Match(pt_KW_CHAIN);
std::string chainID = m_value_s;
if (m_lookahead == pt_NUMBER) // sigh
{
chainID = std::to_string(m_value_i);
Match(pt_NUMBER);
}
else
Match(m_lookahead == pt_STRING ? pt_STRING : pt_IDENT);
result.reset(new TLSSelectionChain(chainID));
break;
}
case pt_KW_RESNAME:
{
Match(pt_KW_RESNAME);
std::string name = m_value_s;
Match(pt_IDENT);
result.reset(new TLSSelectionByName(name));
break;
}
case pt_KW_ELEMENT:
{
Match(pt_KW_ELEMENT);
std::string element = m_value_s;
Match(pt_IDENT);
result.reset(new TLSSelectionByElement(element));
break;
}
case pt_KW_RESSEQ:
{
Match(pt_KW_RESSEQ);
int from = m_value_i;
Match(pt_NUMBER);
int to = from;
if (m_lookahead == ':')
{
Match(':');
to = m_value_i;
Match(pt_NUMBER);
}
result.reset(new TLSSelectionRangeSeq(from, to));
break;
}
case pt_KW_RESID:
{
Match(pt_KW_RESID);
int from, to;
char icode_from = 0, icode_to = 0;
bool through = false;
from = to = m_value_i;
if (m_lookahead == pt_NUMBER)
Match(pt_NUMBER);
else
{
icode_from = m_icode;
Match(pt_RESID);
}
if (m_lookahead == ':' or m_lookahead == pt_KW_THROUGH or m_lookahead == '-')
{
through = m_lookahead == pt_KW_THROUGH;
Match(m_lookahead);
to = m_value_i;
if (m_lookahead == pt_NUMBER)
Match(pt_NUMBER);
else
{
icode_to = m_icode;
Match(pt_RESID);
}
if (through)
result.reset(new TLSSelectionRangeID(from, to, icode_from, icode_to));
else
{
if (cif::VERBOSE > 0 and (icode_from or icode_to))
std::cerr << "Warning, ignoring insertion codes" << std::endl;
result.reset(new TLSSelectionRangeSeq(from, to));
}
}
else
result.reset(new TLSSelectionResID(from, icode_from));
break;
}
case pt_KW_ALL:
Match(pt_KW_ALL);
result.reset(new TLSSelectionAll());
break;
default:
throw std::runtime_error("Unexpected token " + ToString(m_lookahead) + " (" + m_token + ')');
}
return result;
}
// --------------------------------------------------------------------
class TLSSelectionParserImplBuster : public TLSSelectionParserImpl
{
public:
TLSSelectionParserImplBuster(const std::string& selection);
virtual TLSSelectionPtr Parse();
protected:
enum TOKEN {
bt_NONE = 0,
bt_IDENT = 256,
bt_NUMBER,
bt_EOLN,
};
virtual int GetNextToken();
virtual std::string ToString(int token);
TLSSelectionPtr ParseGroup();
std::tuple<std::string,int> ParseAtom();
TLSSelectionPtr ParseOldGroup();
int m_value_i;
std::string m_value_s;
bool m_parsing_old_style = false;
};
TLSSelectionParserImplBuster::TLSSelectionParserImplBuster(const std::string& selection)
: TLSSelectionParserImpl(selection)
{
m_lookahead = GetNextToken();
}
int TLSSelectionParserImplBuster::GetNextToken()
{
int result = bt_NONE;
enum STATE { st_START, st_NEGATE, st_NUM, st_IDENT } state = st_START;
m_value_i = 0;
m_value_s.clear();
bool negative = false;
while (result == bt_NONE)
{
char ch = *m_p++;
if (m_p > m_end)
ch = 0;
switch (state)
{
case st_START:
if (ch == 0)
result = bt_EOLN;
else if (isspace(ch))
continue;
else if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM;
}
else if (isalpha(ch))
{
m_value_s = { ch };
state = st_IDENT;
}
else if (ch == '-')
{
state = st_NEGATE;
}
else
result = ch;
break;
case st_NEGATE:
if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM;
negative = true;
}
else
{
--m_p;
result = '-';
}
break;
case st_NUM:
if (isdigit(ch))
m_value_i = 10 * m_value_i + (ch - '0');
else
{
if (negative)
m_value_i = -m_value_i;
result = bt_NUMBER;
--m_p;
}
break;
case st_IDENT:
if (isalnum(ch))
m_value_s += ch;
else
{
--m_p;
result = bt_IDENT;
}
break;
}
}
return result;
}
std::string TLSSelectionParserImplBuster::ToString(int token)
{
switch (token)
{
case bt_IDENT: return "identifier (" + m_value_s + ')';
case bt_NUMBER: return "number (" + std::to_string(m_value_i) + ')';
case bt_EOLN: return "end of line";
default:
assert(false);
return "unknown token";
}
}
TLSSelectionPtr TLSSelectionParserImplBuster::ParseGroup()
{
TLSSelectionPtr result;
auto add = [&result](const std::string& chainID, int from, int to)
{
TLSSelectionPtr sc(new TLSSelectionChain(chainID));
TLSSelectionPtr sr(new TLSSelectionRangeSeq(from, to));
TLSSelectionPtr s(new TLSSelectionIntersection(sc, sr));
if (result == nullptr)
result.reset(s.release());
else
result.reset(new TLSSelectionUnion{result, s });
};
Match('{');
do
{
std::string chain1;
int seqNr1;
std::tie(chain1, seqNr1) = ParseAtom();
if (m_lookahead == '-')
{
std::string chain2;
int seqNr2 = seqNr1;
Match('-');
if (m_lookahead == bt_NUMBER)
{
seqNr2 = m_value_i;
Match(bt_NUMBER);
}
else
{
std::tie(chain2, seqNr2) = ParseAtom();
if (chain1 != chain2)
{
if (cif::VERBOSE > 0)
std::cerr << "Warning, ranges over multiple chains detected" << std::endl;
TLSSelectionPtr sc1(new TLSSelectionChain(chain1));
TLSSelectionPtr sr1(new TLSSelectionRangeSeq(seqNr1, kResidueNrWildcard));
TLSSelectionPtr s1(new TLSSelectionIntersection(sc1, sr1));
TLSSelectionPtr sc2(new TLSSelectionChain(chain2));
TLSSelectionPtr sr2(new TLSSelectionRangeSeq(kResidueNrWildcard, seqNr2));
TLSSelectionPtr s2(new TLSSelectionIntersection(sc2, sr2));
TLSSelectionPtr s(new TLSSelectionUnion(s1, s2));
if (result == nullptr)
result.reset(s.release());
else
result.reset(new TLSSelectionUnion{result, s });
chain1.clear();
}
}
if (not chain1.empty())
add(chain1, seqNr1, seqNr2);
}
else
add(chain1, seqNr1, seqNr1);
}
while (m_lookahead != '}');
Match('}');
return result;
}
std::tuple<std::string,int> TLSSelectionParserImplBuster::ParseAtom()
{
std::string chain = m_value_s;
int seqNr = kResidueNrWildcard;
if (m_lookahead == '*')
Match('*');
else
Match(bt_IDENT);
Match('|');
if (m_lookahead == '*')
Match('*');
else
{
seqNr = m_value_i;
Match(bt_NUMBER);
if (m_lookahead == ':')
{
Match(':');
std::string atom = m_value_s;
if (cif::VERBOSE > 0)
std::cerr << "Warning: ignoring atom ID '" << atom << "' in TLS selection" << std::endl;
Match(bt_IDENT);
}
}
return make_tuple(chain, seqNr);
}
TLSSelectionPtr TLSSelectionParserImplBuster::Parse()
{
TLSSelectionPtr result = ParseGroup();
Match(bt_EOLN);
return result;
}
// --------------------------------------------------------------------
class TLSSelectionParserImplBusterOld : public TLSSelectionParserImpl
{
public:
TLSSelectionParserImplBusterOld(const std::string& selection)
: TLSSelectionParserImpl(selection)
{
m_lookahead = GetNextToken();
}
virtual TLSSelectionPtr Parse();
private:
TLSSelectionPtr ParseAtomSelection();
TLSSelectionPtr ParseTerm();
TLSSelectionPtr ParseFactor();
TLSSelectionPtr ParseResid();
TLSSelectionPtr ParseChainResid();
enum TOKEN {
pt_NONE = 0,
pt_IDENT = 256,
pt_CHAINRESID,
pt_STRING,
pt_NUMBER,
pt_RANGE,
pt_EOLN,
pt_KW_ALL,
pt_KW_CHAIN,
pt_KW_RESSEQ,
pt_KW_RESID,
pt_KW_RESNAME,
pt_KW_ELEMENT,
pt_KW_AND,
pt_KW_OR,
pt_KW_NOT,
pt_KW_PDB,
pt_KW_ENTRY,
pt_KW_THROUGH
};
virtual int GetNextToken();
virtual std::string ToString(int token);
int m_value_i;
std::string m_value_s;
int m_value_r[2];
};
int TLSSelectionParserImplBusterOld::GetNextToken()
{
int result = pt_NONE;
enum STATE { st_START, st_NEGATE, st_NUM, st_RANGE, st_IDENT_1, st_IDENT, st_CHAINRESID, st_QUOTED_1, st_QUOTED_2 } state = st_START;
m_value_i = 0;
m_value_s.clear();
bool negative = false;
while (result == pt_NONE)
{
char ch = *m_p++;
if (m_p > m_end)
ch = 0;
switch (state)
{
case st_START:
if (ch == 0)
result = pt_EOLN;
else if (isspace(ch))
continue;
else if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM;
}
else if (isalpha(ch))
{
m_value_s = { ch };
state = st_IDENT_1;
}
else if (ch == '-')
{
state = st_NEGATE;
}
else if (ch == '\'')
{
state = st_QUOTED_1;
}
else
result = ch;
break;
case st_NEGATE:
if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM;
negative = true;
}
else
{
--m_p;
result = '-';
}
break;
case st_NUM:
if (isdigit(ch))
m_value_i = 10 * m_value_i + (ch - '0');
else if (ch == '-' or ch == ':')
{
if (negative)
m_value_i = -m_value_i;
m_value_r[0] = m_value_i;
m_value_r[1] = 0;
state = st_RANGE;
}
else
{
if (negative)
m_value_i = -m_value_i;
result = pt_NUMBER;
--m_p;
}
break;
case st_RANGE: // TODO: question, is "-2--1" a valid range? We do not support that, yet
if (isdigit(ch))
m_value_r[1] = 10 * m_value_r[1] + (ch - '0');
else if (m_value_r[1] != 0)
{
result = pt_RANGE;
--m_p;
}
else
{
--m_p;
--m_p;
result = pt_NUMBER;
}
break;
case st_IDENT_1:
if (isalpha(ch))
{
m_value_s += ch;
state = st_IDENT;
}
else if (isdigit(ch))
{
m_value_i = (ch - '0');
state = st_CHAINRESID;
}
else
{
--m_p;
result = pt_IDENT;
}
break;
case st_CHAINRESID:
if (isalpha(ch))
{
m_value_s += std::to_string(m_value_i);
m_value_s += ch;
state = st_IDENT;
}
else if (isdigit(ch))
m_value_i = 10 * m_value_i + (ch - '0');
else
{
--m_p;
result = pt_CHAINRESID;
}
break;
case st_IDENT:
if (isalnum(ch))
m_value_s += ch;
else
{
--m_p;
result = pt_IDENT;
}
break;
case st_QUOTED_1:
if (ch == '\'')
{
--m_p;
result = '\'';
}
else
{
m_value_s = { ch };
state = st_QUOTED_2;
}
break;
case st_QUOTED_2:
if (ch == '\'')
result = pt_STRING;
else if (ch == 0)
throw std::runtime_error("Unexpected end of selection, missing quote character?");
else
m_value_s += ch;
break;
}
}
if (result == pt_IDENT)
{
if (iequals(m_value_s, "CHAIN"))
result = pt_KW_CHAIN;
else if (iequals(m_value_s, "ALL"))
result = pt_KW_ALL;
else if (iequals(m_value_s, "AND"))
result = pt_KW_AND;
else if (iequals(m_value_s, "OR"))
result = pt_KW_OR;
else if (iequals(m_value_s, "NOT"))
result = pt_KW_NOT;
else if (iequals(m_value_s, "RESSEQ"))
result = pt_KW_RESSEQ;
else if (iequals(m_value_s, "RESID") or iequals(m_value_s, "RESI") or iequals(m_value_s, "RESIDUES"))
result = pt_KW_RESID;
else if (iequals(m_value_s, "RESNAME"))
result = pt_KW_RESNAME;
else if (iequals(m_value_s, "PDB"))
result = pt_KW_PDB;
else if (iequals(m_value_s, "ENTRY"))
result = pt_KW_ENTRY;
else if (iequals(m_value_s, "THROUGH"))
result = pt_KW_THROUGH;
}
return result;
}
std::string TLSSelectionParserImplBusterOld::ToString(int token)
{
switch (token)
{
case pt_IDENT: return "identifier (" + m_value_s + ')';
case pt_STRING: return "string (" + m_value_s + ')';
case pt_NUMBER: return "number (" + std::to_string(m_value_i) + ')';
case pt_RANGE: return "range (" + std::to_string(m_value_r[0]) + ':' + std::to_string(m_value_r[1]) + ')';
case pt_EOLN: return "end of line";
case pt_KW_ALL: return "ALL";
case pt_KW_CHAIN: return "CHAIN";
case pt_KW_RESSEQ: return "RESSEQ";
case pt_KW_RESID: return "RESID";
case pt_KW_RESNAME: return "RESNAME";
case pt_KW_ELEMENT: return "ELEMENT";
case pt_KW_AND: return "AND";
case pt_KW_OR: return "OR";
case pt_KW_NOT: return "NOT";
case pt_KW_PDB: return "PDB";
case pt_KW_ENTRY: return "ENTRY";
case pt_KW_THROUGH: return "THROUGH";
default:
assert(false);
return "unknown token";
}
}
TLSSelectionPtr TLSSelectionParserImplBusterOld::Parse()
{
if (m_lookahead == pt_KW_PDB)
{
Match(pt_KW_PDB);
// Match(pt_KW_ENTRY);
throw std::runtime_error("Unimplemented PDB ENTRY specification");
}
TLSSelectionPtr result = ParseAtomSelection();
Match(pt_EOLN);
return result;
}
TLSSelectionPtr TLSSelectionParserImplBusterOld::ParseAtomSelection()
{
TLSSelectionPtr result = ParseTerm();
while (m_lookahead == pt_KW_OR)
{
Match(pt_KW_OR);
result.reset(new TLSSelectionUnion(result, ParseTerm()));
}
return result;
}
TLSSelectionPtr TLSSelectionParserImplBusterOld::ParseTerm()
{
TLSSelectionPtr result = ParseFactor();
while (m_lookahead == pt_KW_AND)
{
Match(pt_KW_AND);
result.reset(new TLSSelectionIntersection(result, ParseFactor()));
}
return result;
}
TLSSelectionPtr TLSSelectionParserImplBusterOld::ParseFactor()
{
TLSSelectionPtr result;
switch (m_lookahead)
{
case '(':
Match('(');
result = ParseAtomSelection();
Match(')');
break;
case pt_KW_NOT:
Match(pt_KW_NOT);
result.reset(new TLSSelectionNot(ParseAtomSelection()));
break;
case pt_KW_CHAIN:
{
Match(pt_KW_CHAIN);
std::string chainID = m_value_s;
if (m_lookahead == pt_NUMBER) // sigh
{
chainID = std::to_string(m_value_i);
Match(pt_NUMBER);
}
else
Match(m_lookahead == pt_STRING ? pt_STRING : pt_IDENT);
result.reset(new TLSSelectionChain(chainID));
break;
}
case pt_KW_RESNAME:
{
Match(pt_KW_RESNAME);
std::string name = m_value_s;
Match(pt_IDENT);
result.reset(new TLSSelectionByName(name));
break;
}
case pt_KW_RESSEQ:
Match(pt_KW_RESSEQ);
result = ParseResid();
break;
case pt_KW_RESID:
Match(pt_KW_RESID);
result = ParseResid();
break;
case pt_KW_ALL:
Match(pt_KW_ALL);
result.reset(new TLSSelectionAll());
break;
case pt_CHAINRESID:
result = ParseChainResid();
break;
default:
throw std::runtime_error("Unexpected token " + ToString(m_lookahead));
}
return result;
}
TLSSelectionPtr TLSSelectionParserImplBusterOld::ParseResid()
{
TLSSelectionPtr result;
for (;;)
{
int from, to;
if (m_lookahead == pt_RANGE)
{
from = m_value_r[0];
to = m_value_r[1];
Match(pt_RANGE);
}
else
{
from = m_value_i;
Match(pt_NUMBER);
to = from;
if (m_lookahead == ':' or m_lookahead == '-' or m_lookahead == pt_KW_THROUGH)
{
Match(m_lookahead);
to = m_value_i;
Match(pt_NUMBER);
}
}
TLSSelectionPtr range(new TLSSelectionRangeSeq(from, to));
if (result)
result.reset(new TLSSelectionUnion(result, range));
else
result.reset(range.release());
if (m_lookahead == ',')
{
Match(',');
continue;
}
break;
}
return result;
}
TLSSelectionPtr TLSSelectionParserImplBusterOld::ParseChainResid()
{
TLSSelectionPtr result;
for (;;)
{
int from, to;
from = to = m_value_i;
std::string chainID = m_value_s;
Match(pt_CHAINRESID);
if (m_lookahead == '-')
{
Match(m_lookahead);
to = m_value_i;
if (m_value_s != chainID)
throw std::runtime_error("Cannot have two different chainIDs in a range selection");
Match(pt_CHAINRESID);
}
TLSSelectionPtr sc(new TLSSelectionChain(chainID));
TLSSelectionPtr sr(new TLSSelectionRangeSeq(from, to));
TLSSelectionPtr range(new TLSSelectionIntersection(sc, sr));
if (result)
result.reset(new TLSSelectionUnion(result, range));
else
result.reset(range.release());
if (m_lookahead == ',')
{
Match(',');
continue;
}
break;
}
return result;
}
// --------------------------------------------------------------------
class TLSSelectionParserBase
{
public:
virtual TLSSelectionPtr Parse(const std::string& selection) const = 0;
virtual ~TLSSelectionParserBase() {}
};
template<typename IMPL>
class TLSSelectionParser
{
public:
virtual TLSSelectionPtr Parse(const std::string& selection) const
{
TLSSelectionPtr result;
try
{
IMPL p(selection);
result = p.Parse();
}
catch (const std::exception& ex)
{
if (cif::VERBOSE >= 0)
std::cerr << "ParseError: " << ex.what() << std::endl;
}
return result;
}
};
// --------------------------------------------------------------------
TLSSelectionPtr ParseSelectionDetails(const std::string& program, const std::string& selection)
{
TLSSelectionParser<TLSSelectionParserImplPhenix> phenix;
TLSSelectionParser<TLSSelectionParserImplBuster> buster;
TLSSelectionParser<TLSSelectionParserImplBusterOld> busterOld;
TLSSelectionPtr result;
if (cif::icontains(program, "buster"))
{
result = buster.Parse(selection);
if (not result)
{
if (cif::VERBOSE > 0)
std::cerr << "Falling back to old BUSTER" << std::endl;
result = busterOld.Parse(selection);
}
if (not result)
{
if (cif::VERBOSE > 0)
std::cerr << "Falling back to PHENIX" << std::endl;
result = phenix.Parse(selection);
}
}
else if (cif::icontains(program, "phenix"))
{
result = phenix.Parse(selection);
if (not result)
{
if (cif::VERBOSE > 0)
std::cerr << "Falling back to BUSTER" << std::endl;
result = buster.Parse(selection);
}
if (not result)
{
if (cif::VERBOSE > 0)
std::cerr << "Falling back to old BUSTER" << std::endl;
result = busterOld.Parse(selection);
}
}
else
{
if (cif::VERBOSE > 0)
std::cerr << "No known program specified, trying PHENIX" << std::endl;
result = phenix.Parse(selection);
if (not result)
{
if (cif::VERBOSE > 0)
std::cerr << "Falling back to BUSTER" << std::endl;
result = buster.Parse(selection);
}
if (not result)
{
if (cif::VERBOSE > 0)
std::cerr << "Falling back to old BUSTER" << std::endl;
result = busterOld.Parse(selection);
}
}
return result;
}
}
...@@ -43,8 +43,8 @@ using std::regex; ...@@ -43,8 +43,8 @@ using std::regex;
#include <gxrio.hpp> #include <gxrio.hpp>
#include <cif++/cif/dictionary_parser.hpp> #include <cif++/dictionary_parser.hpp>
#include <cif++/cif/validate.hpp> #include <cif++/validate.hpp>
#include <cif++/utilities.hpp> #include <cif++/utilities.hpp>
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
#include <stdexcept> #include <stdexcept>
#include <cif++/cif.hpp> #include <cif++.hpp>
#include <cif++/structure/Structure.hpp> #include <cif++/structure/Structure.hpp>
// -------------------------------------------------------------------- // --------------------------------------------------------------------
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
#include <stdexcept> #include <stdexcept>
#include <cif++/cif.hpp> #include <cif++.hpp>
#include <cif++/structure/Structure.hpp> #include <cif++/structure/Structure.hpp>
// -------------------------------------------------------------------- // --------------------------------------------------------------------
......
...@@ -32,9 +32,9 @@ ...@@ -32,9 +32,9 @@
// #include <cif++/DistanceMap.hpp> // #include <cif++/DistanceMap.hpp>
#include <cif++/BondMap.hpp> #include <cif++/BondMap.hpp>
#include <cif++/Cif++.hpp> #include <cif++++.hpp>
#include <cif++/CifValidator.hpp> #include <cif++Validator.hpp>
#include <cif++/CifParser.hpp> #include <cif++Parser.hpp>
namespace tt = boost::test_tools; namespace tt = boost::test_tools;
......
...@@ -31,12 +31,12 @@ ...@@ -31,12 +31,12 @@
// #include <cif++/DistanceMap.hpp> // #include <cif++/DistanceMap.hpp>
// #include <cif++/BondMap.hpp> // #include <cif++/BondMap.hpp>
#include <cif++/cif.hpp> #include <cif++.hpp>
// #include <cif++/CifValidator.hpp> // #include <cif++Validator.hpp>
// #include <cif++/CifParser.hpp> // #include <cif++Parser.hpp>
#include <cif++/cif/parser.hpp> #include <cif++/parser.hpp>
#include <cif++/cif/dictionary_parser.hpp> #include <cif++/dictionary_parser.hpp>
namespace tt = boost::test_tools; namespace tt = boost::test_tools;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment