Commit 1769f986 by Maarten L. Hekkelman

Fixed TLS parser, and more

parent 75ffd978
...@@ -198,6 +198,7 @@ set(project_sources ...@@ -198,6 +198,7 @@ set(project_sources
${PROJECT_SOURCE_DIR}/src/pdb/cif2pdb.cpp ${PROJECT_SOURCE_DIR}/src/pdb/cif2pdb.cpp
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif.cpp ${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif.cpp
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.cpp ${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.cpp
${PROJECT_SOURCE_DIR}/src/pdb/tls.cpp
) )
set(project_headers set(project_headers
...@@ -227,6 +228,7 @@ set(project_headers ...@@ -227,6 +228,7 @@ set(project_headers
${PROJECT_SOURCE_DIR}/include/cif++/pdb/io.hpp ${PROJECT_SOURCE_DIR}/include/cif++/pdb/io.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif.hpp ${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif_remark_3.hpp ${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif_remark_3.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/tls.hpp
) )
add_library(cifpp ${project_sources} ${project_headers} ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp) add_library(cifpp ${project_sources} ${project_headers} ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp)
......
...@@ -278,7 +278,7 @@ class category ...@@ -278,7 +278,7 @@ class category
{ {
auto h = find<T>(pos, std::forward<condition>(cond), column); auto h = find<T>(pos, std::forward<condition>(cond), column);
return h.size() == 1 ? std::get<0>(*h.begin()) : T{}; return h.size() == 1 ? *h.begin() : T{};
} }
template <typename... Ts, typename... Cs, typename U = std::enable_if_t<sizeof...(Ts) != 1>> template <typename... Ts, typename... Cs, typename U = std::enable_if_t<sizeof...(Ts) != 1>>
...@@ -474,7 +474,8 @@ class category ...@@ -474,7 +474,8 @@ class category
} }
// -------------------------------------------------------------------- // --------------------------------------------------------------------
void sort(std::function<int(row_handle,row_handle)> f);
void reorder_by_index(); void reorder_by_index();
// -------------------------------------------------------------------- // --------------------------------------------------------------------
......
...@@ -107,7 +107,7 @@ class item ...@@ -107,7 +107,7 @@ class item
/// \brief constructor for an item with name \a name and as /// \brief constructor for an item with name \a name and as
/// content a the formatted integral value \a value /// content a the formatted integral value \a value
template <typename T, std::enable_if_t<std::is_integral_v<T>, int> = 0> template <typename T, std::enable_if_t<std::is_integral_v<T> and not std::is_same_v<T,bool>, int> = 0>
item(const std::string_view name, const T &value) item(const std::string_view name, const T &value)
: m_name(name) : m_name(name)
{ {
...@@ -123,6 +123,15 @@ class item ...@@ -123,6 +123,15 @@ class item
} }
/// \brief constructor for an item with name \a name and as /// \brief constructor for an item with name \a name and as
/// content a the formatted boolean value \a value
template <typename T, std::enable_if_t<std::is_same_v<T,bool>, int> = 0>
item(const std::string_view name, const T &value)
: m_name(name)
{
m_value.assign(value ? "y" : "n");
}
/// \brief constructor for an item with name \a name and as
/// content value \a value /// content value \a value
item(const std::string_view name, const std::string_view value) item(const std::string_view name, const std::string_view value)
: m_name(name) : m_name(name)
......
...@@ -47,11 +47,13 @@ class iterator_impl ...@@ -47,11 +47,13 @@ class iterator_impl
using category_type = std::remove_cv_t<Category>; using category_type = std::remove_cv_t<Category>;
using row_type = std::conditional_t<std::is_const_v<Category>, const row, row>; using row_type = std::conditional_t<std::is_const_v<Category>, const row, row>;
using tuple_type = std::tuple<Ts...>;
using iterator_category = std::forward_iterator_tag; using iterator_category = std::forward_iterator_tag;
using value_type = std::conditional_t<N == 0, row_handle, std::tuple<Ts...>>; using value_type = tuple_type;
using difference_type = std::ptrdiff_t; using difference_type = std::ptrdiff_t;
using pointer = std::conditional_t<N == 0, row_handle, value_type *>; using pointer = value_type *;
using reference = std::conditional_t<N == 0, row_handle, value_type &>; using reference = value_type &;
iterator_impl() = default; iterator_impl() = default;
...@@ -66,56 +68,269 @@ class iterator_impl ...@@ -66,56 +68,269 @@ class iterator_impl
{ {
} }
template <typename IRowType>
iterator_impl(iterator_impl<IRowType, Ts...> &rhs)
: m_category(rhs.m_category)
, m_current(const_cast<row_type *>(rhs.m_current))
, m_value(rhs.m_value)
, m_column_ix(rhs.m_column_ix)
{
m_value = get(std::make_index_sequence<N>());
}
template <typename IRowType>
iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<size_t, N> &cix)
: m_category(rhs.m_category)
, m_current(rhs.m_current)
, m_column_ix(cix)
{
m_value = get(std::make_index_sequence<N>());
}
iterator_impl &operator=(const iterator_impl &i)
{
m_category = i.m_category;
m_current = i.m_current;
m_column_ix = i.m_column_ix;
m_value = i.m_value;
return *this;
}
virtual ~iterator_impl() = default;
reference operator*()
{
return m_value;
}
pointer operator->()
{
return &m_value;
}
operator const row_handle() const
{
return { *m_category, *m_current };
}
operator row_handle()
{
return { *m_category, *m_current };
}
iterator_impl &operator++()
{
if (m_current != nullptr)
m_current = m_current->m_next;
m_value = get(std::make_index_sequence<N>());
return *this;
}
iterator_impl operator++(int)
{
iterator_impl result(*this);
this->operator++();
return result;
}
bool operator==(const iterator_impl &rhs) const { return m_current == rhs.m_current; }
bool operator!=(const iterator_impl &rhs) const { return m_current != rhs.m_current; }
template <typename IRowType, typename... ITs>
bool operator==(const iterator_impl<IRowType, ITs...> &rhs) const
{
return m_current == rhs.m_current;
}
template <typename IRowType, typename... ITs>
bool operator!=(const iterator_impl<IRowType, ITs...> &rhs) const
{
return m_current != rhs.m_current;
}
private:
template <std::size_t... Is>
tuple_type get(std::index_sequence<Is...>) const
{
if (m_current != nullptr)
{
row_handle rh{*m_category, *m_current};
return tuple_type{rh[m_column_ix[Is]].template as<Ts>()...};
}
return {};
}
category_type *m_category = nullptr;
row_type *m_current = nullptr;
value_type m_value;
std::array<size_t, N> m_column_ix;
};
template<typename Category>
class iterator_impl<Category>
{
public:
template <typename, typename...>
friend class iterator_impl;
friend class category;
using category_type = std::remove_cv_t<Category>;
using row_type = std::conditional_t<std::is_const_v<Category>, const row, row>;
using iterator_category = std::forward_iterator_tag;
using value_type = row_handle;
using difference_type = std::ptrdiff_t;
using pointer = row_handle;
using reference = row_handle;
iterator_impl() = default;
iterator_impl(const iterator_impl &rhs) = default;
template <typename C2>
iterator_impl(const iterator_impl<C2> &rhs)
: m_category(rhs.m_category)
, m_current(const_cast<row_type*>(rhs.m_current))
{
}
iterator_impl(Category &cat, row *current) iterator_impl(Category &cat, row *current)
: m_category(const_cast<category_type *>(&cat)) : m_category(const_cast<category_type *>(&cat))
, m_current(current) , m_current(current)
, m_value(*m_category, *current)
{ {
static_assert(N == 0, "Only valid if this is a row iterator, not a row<xxx> iterator");
} }
// iterator_impl(ItemRow *data) template <typename IRowType>
// : m_current(data) iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<size_t, 0> &cix)
// { : m_category(rhs.m_category)
// static_assert(N == 0, "Only valid if this is a row iterator, not a row<xxx> iterator"); , m_current(rhs.m_current)
// } {
}
// iterator_impl(ItemRow *data, const std::array<size_t, N> &cix) iterator_impl &operator=(const iterator_impl &i)
// : m_current(data) {
// , m_column_ix(cix) m_category = i.m_category;
// { m_current = i.m_current;
// } return *this;
}
virtual ~iterator_impl() = default;
reference operator*()
{
return {*m_category, *m_current};
}
pointer operator->()
{
return &m_current;
}
operator const row_handle() const
{
return { *m_category, *m_current };
}
operator row_handle()
{
return { *m_category, *m_current };
}
iterator_impl &operator++()
{
if (m_current != nullptr)
m_current = m_current->m_next;
return *this;
}
iterator_impl operator++(int)
{
iterator_impl result(*this);
this->operator++();
return result;
}
bool operator==(const iterator_impl &rhs) const { return m_current == rhs.m_current; }
bool operator!=(const iterator_impl &rhs) const { return m_current != rhs.m_current; }
template <typename IRowType, typename... ITs>
bool operator==(const iterator_impl<IRowType, ITs...> &rhs) const
{
return m_current == rhs.m_current;
}
template <typename IRowType, typename... ITs>
bool operator!=(const iterator_impl<IRowType, ITs...> &rhs) const
{
return m_current != rhs.m_current;
}
private:
category_type *m_category = nullptr;
row_type *m_current = nullptr;
};
template<typename Category, typename T>
class iterator_impl<Category, T>
{
public:
template <typename, typename...>
friend class iterator_impl;
friend class category;
using category_type = std::remove_cv_t<Category>;
using row_type = std::conditional_t<std::is_const_v<Category>, const row, row>;
using iterator_category = std::forward_iterator_tag;
using value_type = T;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
using reference = value_type &;
iterator_impl() = default;
iterator_impl(const iterator_impl &rhs) = default;
template <typename C2, typename T2>
iterator_impl(const iterator_impl<C2, T2> &rhs)
: m_category(rhs.m_category)
, m_current(rhs.m_current)
, m_value(rhs.m_value)
, m_column_ix(rhs.m_column_ix)
{
}
template <typename IRowType> template <typename IRowType>
iterator_impl(iterator_impl<IRowType, Ts...> &rhs) iterator_impl(iterator_impl<IRowType, T> &rhs)
: m_category(rhs.m_category) : m_category(rhs.m_category)
, m_current(const_cast<row_type *>(rhs.m_current)) , m_current(const_cast<row_type *>(rhs.m_current))
, m_value(rhs.m_value) , m_value(rhs.m_value)
, m_column_ix(rhs.m_column_ix) , m_column_ix(rhs.m_column_ix)
{ {
if constexpr (N > 0) m_value = get(m_current);
m_value = get(m_current, std::make_index_sequence<N>());
} }
template <typename IRowType> template <typename IRowType>
iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<size_t, N> &cix) iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<size_t, 1> &cix)
: m_category(rhs.m_category) : m_category(rhs.m_category)
, m_current(rhs.m_current) , m_current(rhs.m_current)
, m_column_ix(cix) , m_column_ix(cix[0])
{ {
if constexpr (N > 0) m_value = get();
m_value = get(std::make_index_sequence<N>());
} }
iterator_impl &operator=(const iterator_impl &i) iterator_impl &operator=(const iterator_impl &i)
{ {
m_category = i.m_category; m_category = i.m_category;
m_current = i.m_current; m_current = i.m_current;
if constexpr (N != 0) m_column_ix = i.m_column_ix;
{ m_value = i.m_value;
m_column_ix = i.m_column_ix;
m_value = i.m_value;
}
return *this; return *this;
} }
...@@ -123,18 +338,12 @@ class iterator_impl ...@@ -123,18 +338,12 @@ class iterator_impl
reference operator*() reference operator*()
{ {
if constexpr (N == 0) return m_value;
return {*m_category, *m_current};
else
return m_value;
} }
pointer operator->() pointer operator->()
{ {
if constexpr (N == 0) return &m_value;
return &m_current;
else
return &m_value;
} }
operator const row_handle() const operator const row_handle() const
...@@ -152,8 +361,7 @@ class iterator_impl ...@@ -152,8 +361,7 @@ class iterator_impl
if (m_current != nullptr) if (m_current != nullptr)
m_current = m_current->m_next; m_current = m_current->m_next;
if constexpr (N != 0) m_value = get();
m_value = get(std::make_index_sequence<N>());
return *this; return *this;
} }
...@@ -181,13 +389,12 @@ class iterator_impl ...@@ -181,13 +389,12 @@ class iterator_impl
} }
private: private:
template <std::size_t... Is> value_type get() const
std::tuple<Ts...> get(std::index_sequence<Is...>) const
{ {
if (m_current != nullptr) if (m_current != nullptr)
{ {
row_handle rh{*m_category, *m_current}; row_handle rh{*m_category, *m_current};
return std::tuple<Ts...>{rh[m_column_ix[Is]].template as<Ts>()...}; return rh[m_column_ix].template as<T>();
} }
return {}; return {};
...@@ -196,7 +403,7 @@ class iterator_impl ...@@ -196,7 +403,7 @@ class iterator_impl
category_type *m_category = nullptr; category_type *m_category = nullptr;
row_type *m_current = nullptr; row_type *m_current = nullptr;
value_type m_value; value_type m_value;
std::array<size_t, N> m_column_ix; size_t m_column_ix;
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
......
...@@ -270,6 +270,7 @@ class atom ...@@ -270,6 +270,7 @@ class atom
std::string get_auth_seq_id() const { return get_property("auth_seq_id"); } std::string get_auth_seq_id() const { return get_property("auth_seq_id"); }
std::string get_auth_atom_id() const { return get_property("auth_atom_id"); } std::string get_auth_atom_id() const { return get_property("auth_atom_id"); }
std::string get_auth_alt_id() const { return get_property("auth_alt_id"); } std::string get_auth_alt_id() const { return get_property("auth_alt_id"); }
std::string get_auth_comp_id() const { return get_property("auth_comp_id"); }
std::string get_pdb_ins_code() const { return get_property("pdbx_PDB_ins_code"); } std::string get_pdb_ins_code() const { return get_property("pdbx_PDB_ins_code"); }
bool is_alternate() const { return not get_label_alt_id().empty(); } bool is_alternate() const { return not get_label_alt_id().empty(); }
......
/*- /*-
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
* *
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
* *
* 1. Redistributions of source code must retain the above copyright notice, this * 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer * list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice, * 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation * this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution. * and/or other materials provided with the distribution.
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
...@@ -26,32 +26,30 @@ ...@@ -26,32 +26,30 @@
#pragma once #pragma once
#include <vector>
#include <string> #include <string>
#include <tuple> #include <tuple>
#include <vector>
#include <cif++.hpp> #include <cif++.hpp>
namespace cif namespace cif
{ {
extern const int extern const int
kResidueNrWildcard, kResidueNrWildcard,
kNoSeqNum; kNoSeqNum;
struct TLSSelection; struct tls_selection;
typedef std::unique_ptr<TLSSelection> TLSSelectionPtr; struct tls_residue;
struct TLSResidue;
struct TLSSelection struct tls_selection
{ {
virtual ~TLSSelection() {} virtual ~tls_selection() {}
virtual void CollectResidues(cif::datablock& db, std::vector<TLSResidue>& residues, std::size_t indentLevel = 0) const = 0; virtual void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, std::size_t indentLevel = 0) const = 0;
std::vector<std::tuple<std::string,int,int>> GetRanges(cif::datablock& db, bool pdbNamespace) const; std::vector<std::tuple<std::string, int, int>> get_ranges(cif::datablock &db, bool pdbNamespace) const;
}; };
// Low level: get the selections // Low level: get the selections
TLSSelectionPtr ParseSelectionDetails(const std::string& program, const std::string& selection); std::unique_ptr<tls_selection> parse_tls_selection_details(const std::string &program, const std::string &selection);
} } // namespace cif
...@@ -1679,6 +1679,33 @@ void category::swap_item(size_t column_ix, row_handle &a, row_handle &b) ...@@ -1679,6 +1679,33 @@ void category::swap_item(size_t column_ix, row_handle &a, row_handle &b)
std::swap(ra.at(column_ix), rb.at(column_ix)); std::swap(ra.at(column_ix), rb.at(column_ix));
} }
void category::sort(std::function<int(row_handle,row_handle)> f)
{
if (m_head == nullptr)
return;
std::vector<row_handle> rows;
for (auto itemRow = m_head; itemRow != nullptr; itemRow = itemRow->m_next)
rows.emplace_back(*this, *itemRow);
std::stable_sort(rows.begin(), rows.end(),
[&f](row_handle ia, row_handle ib)
{
return f(ia, ib) < 0;
});
m_head = rows.front().get_row();
m_tail = rows.back().get_row();
auto r = m_head;
for (size_t i = 1; i < rows.size(); ++i)
r = r->m_next = rows[i].get_row();
r->m_next = nullptr;
assert(r == m_tail);
assert(size() == rows.size());
}
void category::reorder_by_index() void category::reorder_by_index()
{ {
if (m_index) if (m_index)
......
...@@ -1107,7 +1107,7 @@ branch::branch(structure &structure, const std::string &asym_id) ...@@ -1107,7 +1107,7 @@ branch::branch(structure &structure, const std::string &asym_id)
auto &branch_scheme = db["pdbx_branch_scheme"]; auto &branch_scheme = db["pdbx_branch_scheme"];
auto &branch_link = db["pdbx_entity_branch_link"]; auto &branch_link = db["pdbx_entity_branch_link"];
for (const auto &[entity_id] : struct_asym.find<std::string>("id"_key == asym_id, "entity_id")) for (const auto &entity_id : struct_asym.find<std::string>("id"_key == asym_id, "entity_id"))
{ {
for (const auto &[comp_id, num] : branch_scheme.find<std::string, int>( for (const auto &[comp_id, num] : branch_scheme.find<std::string, int>(
"asym_id"_key == asym_id, "mon_id", "pdb_seq_num")) "asym_id"_key == asym_id, "mon_id", "pdb_seq_num"))
...@@ -1313,7 +1313,7 @@ void structure::load_data() ...@@ -1313,7 +1313,7 @@ void structure::load_data()
auto &branchScheme = m_db["pdbx_branch_scheme"]; auto &branchScheme = m_db["pdbx_branch_scheme"];
for (const auto &[asym_id] : branchScheme.rows<std::string>("asym_id")) for (const auto &asym_id : branchScheme.rows<std::string>("asym_id"))
{ {
if (m_branches.empty() or m_branches.back().get_asym_id() != asym_id) if (m_branches.empty() or m_branches.back().get_asym_id() != asym_id)
m_branches.emplace_back(*this, asym_id); m_branches.emplace_back(*this, asym_id);
...@@ -2523,14 +2523,14 @@ void structure::cleanup_empty_categories() ...@@ -2523,14 +2523,14 @@ void structure::cleanup_empty_categories()
std::optional<size_t> count; std::optional<size_t> count;
if (type == "polymer") if (type == "polymer")
count = m_db["entity_poly"].find("entity_id"_key == id).size(); count = m_db["struct_asym"].find("entity_id"_key == id).size();
else if (type == "non-polymer" or type == "water") else if (type == "non-polymer" or type == "water")
count = m_db["pdbx_nonpoly_scheme"].find("entity_id"_key == id).size(); count = m_db["pdbx_nonpoly_scheme"].find("entity_id"_key == id).size();
else if (type == "branched") else if (type == "branched")
{ {
// is this correct? // is this correct?
std::set<std::string> asym_ids; std::set<std::string> asym_ids;
for (const auto &[asym_id] : m_db["pdbx_branch_scheme"].find<std::string>("entity_id"_key == id, "asym_id")) for (const auto &asym_id : m_db["pdbx_branch_scheme"].find<std::string>("entity_id"_key == id, "asym_id"))
asym_ids.insert(asym_id); asym_ids.insert(asym_id);
count = asym_ids.size(); count = asym_ids.size();
} }
......
...@@ -3006,21 +3006,20 @@ void PDBFileParser::ParseRemark200() ...@@ -3006,21 +3006,20 @@ void PDBFileParser::ParseRemark200()
if (inRM200({ "REJECTION CRITERIA (SIGMA(I))", "RESOLUTION RANGE HIGH (A)", "RESOLUTION RANGE LOW (A)", "NUMBER OF UNIQUE REFLECTIONS", "COMPLETENESS FOR RANGE (%)", "<I/SIGMA(I)> FOR THE DATA SET", "R MERGE (I)", "R SYM (I)", "DATA REDUNDANCY" })) if (inRM200({ "REJECTION CRITERIA (SIGMA(I))", "RESOLUTION RANGE HIGH (A)", "RESOLUTION RANGE LOW (A)", "NUMBER OF UNIQUE REFLECTIONS", "COMPLETENESS FOR RANGE (%)", "<I/SIGMA(I)> FOR THE DATA SET", "R MERGE (I)", "R SYM (I)", "DATA REDUNDANCY" }))
{ {
auto cat = getCategory("reflns"); auto cat = getCategory("reflns");
if (cat->empty()) cat->emplace({
cat->emplace({}); { "entry_id", mStructureID },
auto r = cat->back(); { "observed_criterion_sigma_I", mRemark200["REJECTION CRITERIA (SIGMA(I))"] },
r["entry_id"] = mStructureID; { "d_resolution_high", mRemark200["RESOLUTION RANGE HIGH (A)"] },
r["observed_criterion_sigma_I"] = mRemark200["REJECTION CRITERIA (SIGMA(I))"]; { "d_resolution_low", mRemark200["RESOLUTION RANGE LOW (A)"] },
r["d_resolution_high"] = mRemark200["RESOLUTION RANGE HIGH (A)"]; { "number_obs", mRemark200["NUMBER OF UNIQUE REFLECTIONS"] },
r["d_resolution_low"] = mRemark200["RESOLUTION RANGE LOW (A)"]; { "percent_possible_obs", mRemark200["COMPLETENESS FOR RANGE (%)"] },
r["number_obs"] = mRemark200["NUMBER OF UNIQUE REFLECTIONS"]; { "pdbx_netI_over_sigmaI", mRemark200["<I/SIGMA(I)> FOR THE DATA SET"] },
r["percent_possible_obs"] = mRemark200["COMPLETENESS FOR RANGE (%)"]; { "pdbx_Rmerge_I_obs", mRemark200["R MERGE (I)"] },
r["pdbx_netI_over_sigmaI"] = mRemark200["<I/SIGMA(I)> FOR THE DATA SET"]; { "pdbx_Rsym_value", mRemark200["R SYM (I)"] },
r["pdbx_Rmerge_I_obs"] = mRemark200["R MERGE (I)"]; { "pdbx_redundancy", mRemark200["DATA REDUNDANCY"] },
r["pdbx_Rsym_value"] = mRemark200["R SYM (I)"]; { "pdbx_ordinal", 1 },
r["pdbx_redundancy"] = mRemark200["DATA REDUNDANCY"]; { "pdbx_diffrn_id", 1 }
r["pdbx_ordinal"] = 1; });
r["pdbx_diffrn_id"] = 1;
} }
if (inRM200({ "HIGHEST RESOLUTION SHELL, RANGE HIGH (A)" })) // that one field is mandatory... if (inRM200({ "HIGHEST RESOLUTION SHELL, RANGE HIGH (A)" })) // that one field is mandatory...
...@@ -3987,12 +3986,12 @@ void PDBFileParser::ConstructEntities() ...@@ -3987,12 +3986,12 @@ void PDBFileParser::ConstructEntities()
{ {
seqAlignBeg = pdbxPolySeqScheme.find1<int>(key("pdb_strand_id") == std::string { dbref.chainID } and seqAlignBeg = pdbxPolySeqScheme.find1<int>(key("pdb_strand_id") == std::string { dbref.chainID } and
key("pdb_seq_num") == dbref.seqBegin and key("pdb_seq_num") == dbref.seqBegin and
key("pdb_ins_code") == insToStr(dbref.insertBegin), (key("pdb_ins_code") == insToStr(dbref.insertBegin) or key("pdb_ins_code") == cif::null),
"seq_id"); "seq_id");
seqAlignEnd = pdbxPolySeqScheme.find1<int>(key("pdb_strand_id") == std::string { dbref.chainID } and seqAlignEnd = pdbxPolySeqScheme.find1<int>(key("pdb_strand_id") == std::string { dbref.chainID } and
key("pdb_seq_num") == dbref.seqEnd and key("pdb_seq_num") == dbref.seqEnd and
key("pdb_ins_code") == insToStr(dbref.insertEnd), (key("pdb_ins_code") == insToStr(dbref.insertEnd) or key("pdb_ins_code") == cif::null),
"seq_id"); "seq_id");
} }
catch (...) catch (...)
...@@ -4001,20 +4000,20 @@ void PDBFileParser::ConstructEntities() ...@@ -4001,20 +4000,20 @@ void PDBFileParser::ConstructEntities()
getCategory("struct_ref_seq")->emplace({ getCategory("struct_ref_seq")->emplace({
{ "align_id", structRefSeqAlignID }, { "align_id", structRefSeqAlignID },
{ "ref_id", structRefID }, { "ref_id", structRefID },
{ "pdbx_PDB_id_code", dbref.PDBIDCode }, { "pdbx_PDB_id_code", dbref.PDBIDCode },
{ "pdbx_strand_id", std::string{ chain.mDbref.chainID } }, { "pdbx_strand_id", std::string{ chain.mDbref.chainID } },
{ "seq_align_beg", seqAlignBeg }, { "seq_align_beg", seqAlignBeg },
{ "pdbx_seq_align_beg_ins_code", insToStr(dbref.insertBegin) }, { "pdbx_seq_align_beg_ins_code", insToStr(dbref.insertBegin) },
{ "seq_align_end", seqAlignEnd }, { "seq_align_end", seqAlignEnd },
{ "pdbx_seq_align_end_ins_code", insToStr(dbref.insertEnd) }, { "pdbx_seq_align_end_ins_code", insToStr(dbref.insertEnd) },
{ "pdbx_db_accession", dbref.dbAccession }, { "pdbx_db_accession", dbref.dbAccession },
{ "db_align_beg", dbref.dbSeqBegin }, { "db_align_beg", dbref.dbSeqBegin },
{ "pdbx_db_align_beg_ins_code", insToStr(dbref.dbinsBeg) }, { "pdbx_db_align_beg_ins_code", insToStr(dbref.dbinsBeg) },
{ "db_align_end", dbref.dbSeqEnd }, { "db_align_end", dbref.dbSeqEnd },
{ "pdbx_db_align_end_ins_code", insToStr(dbref.dbinsEnd) }, { "pdbx_db_align_end_ins_code", insToStr(dbref.dbinsEnd) },
{ "pdbx_auth_seq_align_beg", dbref.seqBegin }, { "pdbx_auth_seq_align_beg", dbref.seqBegin },
{ "pdbx_auth_seq_align_end", dbref.seqEnd } }); { "pdbx_auth_seq_align_end", dbref.seqEnd } });
// write the struct_ref_seq_dif // write the struct_ref_seq_dif
for (auto &seqadv : mSeqadvs) for (auto &seqadv : mSeqadvs)
...@@ -5696,6 +5695,8 @@ void PDBFileParser::Parse(std::istream &is, cif::file &result) ...@@ -5696,6 +5695,8 @@ void PDBFileParser::Parse(std::istream &is, cif::file &result)
{ {
try try
{ {
mDatablock.set_validator(result.get_validator());
PreParseInput(is); PreParseInput(is);
mRec = mData; mRec = mData;
...@@ -6164,10 +6165,10 @@ void ReadPDBFile(std::istream &pdbFile, cif::file &cifFile) ...@@ -6164,10 +6165,10 @@ void ReadPDBFile(std::istream &pdbFile, cif::file &cifFile)
{ {
PDBFileParser p; PDBFileParser p;
p.Parse(pdbFile, cifFile);
cifFile.load_dictionary("mmcif_pdbx"); cifFile.load_dictionary("mmcif_pdbx");
p.Parse(pdbFile, cifFile);
if (not cifFile.is_valid() and cif::VERBOSE >= 0) if (not cifFile.is_valid() and cif::VERBOSE >= 0)
std::cerr << "Resulting mmCIF file is not valid!" << std::endl; std::cerr << "Resulting mmCIF file is not valid!" << std::endl;
} }
......
...@@ -971,6 +971,7 @@ Remark3Parser::Remark3Parser(const std::string &name, const std::string &expMeth ...@@ -971,6 +971,7 @@ Remark3Parser::Remark3Parser(const std::string &name, const std::string &expMeth
, mTemplateCount(templateLineCount) , mTemplateCount(templateLineCount)
, mProgramVersion(programversion) , mProgramVersion(programversion)
{ {
mDb.set_validator(db.get_validator());
} }
std::string Remark3Parser::nextLine() std::string Remark3Parser::nextLine()
......
/*
Created by: Maarten L. Hekkelman
Date: dinsdag 07 november, 2017
Copyright 2017 NKI AVL
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
// #include <sys/ioctl.h>
// #include <termios.h>
#include <iomanip>
#include <iostream>
#include <cif++.hpp>
#include <cif++/pdb/tls.hpp>
namespace cif
{
const int
kResidueNrWildcard = std::numeric_limits<int>::min(),
kNoSeqNum = std::numeric_limits<int>::max() - 1;
// --------------------------------------------------------------------
// We parse selection statements and create a selection expression tree
// which is then interpreted by setting the selected flag for the
// residues. After that, the selected ranges are collected and printed.
struct tls_residue
{
std::string chainID;
int seqNr = 0;
char iCode;
std::string name;
bool selected;
std::string asymID;
int seqID = 0;
bool operator==(const tls_residue &rhs) const
{
return chainID == rhs.chainID and
seqNr == rhs.seqNr and
iCode == rhs.iCode and
iequals(name, rhs.name) and
selected == rhs.selected;
}
};
void dump_selection(const std::vector<tls_residue> &selected, int indentLevel)
{
std::string indent(indentLevel * 2, ' ');
auto i = selected.begin();
bool first = true;
// First print in PDB space
while (i != selected.end())
{
auto b = find_if(i, selected.end(), [](auto s) -> bool
{ return s.selected; });
if (b == selected.end())
break;
if (first)
std::cout << indent << "PDB:" << std::endl;
first = false;
auto e = find_if(b, selected.end(), [b](auto s) -> bool
{ return s.chainID != b->chainID or not s.selected; });
std::cout << indent << " >> " << b->chainID << ' ' << b->seqNr << ':' << (e - 1)->seqNr << std::endl;
i = e;
}
// Then in mmCIF space
if (not first)
std::cout << indent << "mmCIF:" << std::endl;
i = selected.begin();
while (i != selected.end())
{
auto b = find_if(i, selected.end(), [](auto s) -> bool
{ return s.selected; });
if (b == selected.end())
break;
auto e = find_if(b, selected.end(), [b](auto s) -> bool
{ return s.asymID != b->asymID or not s.selected; });
std::string asymID = b->asymID;
int from = b->seqID, to = from;
for (auto j = b + 1; j != e; ++j)
{
if (j->seqID == to + 1)
to = j->seqID;
else if (j->seqID != to) // probably an insertion code
{
if (from == kNoSeqNum or to == kNoSeqNum)
std::cout << indent << " >> " << asymID << std::endl;
else
std::cout << indent << " >> " << asymID << ' ' << from << ':' << to << std::endl;
asymID = b->asymID;
from = to = b->seqID;
}
}
if (from == kNoSeqNum or to == kNoSeqNum)
std::cout << indent << " >> " << asymID << std::endl;
else
std::cout << indent << " >> " << asymID << ' ' << from << ':' << to << std::endl;
i = e;
}
if (first)
{
if (isatty(STDOUT_FILENO))
std::cout << indent << cif::coloured("Empty selection") << std::endl;
else
std::cout << indent << "Empty selection" << std::endl;
}
}
std::vector<std::tuple<std::string, int, int>> tls_selection::get_ranges(cif::datablock &db, bool pdbNamespace) const
{
std::vector<tls_residue> selected;
// Collect the residues from poly seq scheme...
for (auto r : db["pdbx_poly_seq_scheme"])
{
std::string chain, seqNr, iCode, name;
std::string asymID;
int seqID = 0;
if (pdbNamespace)
cif::tie(chain, seqNr, iCode, name, asymID, seqID) = r.get("pdb_strand_id", "pdb_seq_num", "pdb_ins_code", "pdb_mon_id", "asym_id", "seq_id");
else
{
cif::tie(chain, seqNr, name) = r.get("asym_id", "seq_id", "mon_id");
asymID = chain;
seqID = stoi(seqNr);
}
if (seqNr.empty())
continue;
if (iCode.length() > 1)
throw std::runtime_error("invalid iCode");
selected.push_back({ chain, stoi(seqNr), iCode[0], name, false, asymID, seqID });
}
// ... those from the nonpoly scheme
for (auto r : db["pdbx_nonpoly_scheme"])
{
std::string chain, seqNr, iCode, name, asymID;
if (pdbNamespace)
{
cif::tie(chain, seqNr, iCode, name, asymID) = r.get("pdb_strand_id", "pdb_seq_num", "pdb_ins_code", "pdb_mon_id", "asym_id");
if (seqNr.empty())
continue;
}
else
{
cif::tie(chain, name) = r.get("asym_id", "mon_id");
asymID = chain;
seqNr = "0";
}
if (iequals(name, "HOH") or iequals(name, "H2O"))
continue;
if (iCode.length() > 1)
throw std::runtime_error("invalid iCode");
selected.push_back({ chain, stoi(seqNr), iCode[0], name, false, asymID, kNoSeqNum });
}
// ... those from the nonpoly scheme
for (auto r : db["pdbx_branch_scheme"])
{
std::string chain, seqNr, iCode, name, asymID;
if (pdbNamespace)
{
cif::tie(chain, seqNr, iCode, name, asymID) = r.get("auth_asym_id", "pdb_seq_num", "pdb_ins_code", "pdb_mon_id", "asym_id");
if (seqNr.empty())
continue;
}
else
{
cif::tie(chain, name) = r.get("asym_id", "mon_id");
asymID = chain;
seqNr = "0";
}
if (iCode.length() > 1)
throw std::runtime_error("invalid iCode");
selected.push_back({ chain, stoi(seqNr), iCode[0], name, false, asymID, kNoSeqNum });
}
// selected might consist of multiple ranges
// output per chain
stable_sort(selected.begin(), selected.end(), [](auto &a, auto &b) -> bool
{
int d = a.chainID.compare(b.chainID);
if (d == 0)
d = a.seqNr - b.seqNr;
return d < 0; });
collect_residues(db, selected);
std::vector<std::tuple<std::string, int, int>> result;
if (pdbNamespace)
{
auto i = selected.begin();
while (i != selected.end())
{
auto b = find_if(i, selected.end(), [](auto s) -> bool
{ return s.selected; });
if (b == selected.end())
break;
auto e = find_if(b, selected.end(), [b](auto s) -> bool
{ return s.chainID != b->chainID or not s.selected; });
// return ranges with strict increasing sequence numbers.
// So when there's a gap in the sequence we split the range.
// Beware of iCodes though
result.push_back(std::make_tuple(b->chainID, b->seqNr, b->seqNr));
for (auto j = b + 1; j != e; ++j)
{
if (j->seqNr == std::get<2>(result.back()) + 1)
std::get<2>(result.back()) = j->seqNr;
else if (j->seqNr != std::get<2>(result.back())) // probably an insertion code
result.push_back(std::make_tuple(b->chainID, j->seqNr, j->seqNr));
}
i = e;
}
}
else
{
auto i = selected.begin();
while (i != selected.end())
{
auto b = find_if(i, selected.end(), [](auto s) -> bool
{ return s.selected; });
if (b == selected.end())
break;
auto e = find_if(b, selected.end(), [b](auto s) -> bool
{ return s.asymID != b->asymID or not s.selected; });
// return ranges with strict increasing sequence numbers.
// So when there's a gap in the sequence we split the range.
// Beware of iCodes though
result.push_back(std::make_tuple(b->asymID, b->seqID, b->seqID));
for (auto j = b + 1; j != e; ++j)
{
if (j->seqID == std::get<2>(result.back()) + 1)
std::get<2>(result.back()) = j->seqID;
else if (j->seqID != std::get<2>(result.back())) // probably an insertion code
result.push_back(std::make_tuple(b->asymID, j->seqID, j->seqID));
}
i = e;
}
}
for (auto &&[name, i1, i2] : result)
{
if (i1 == kNoSeqNum) i1 = 0;
if (i2 == kNoSeqNum) i2 = 0;
}
return result;
}
struct tls_selection_not : public tls_selection
{
tls_selection_not(std::unique_ptr<tls_selection> selection)
: selection(selection.release())
{
}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
selection->collect_residues(db, residues, indentLevel + 1);
for (auto &r : residues)
r.selected = not r.selected;
if (cif::VERBOSE)
{
std::cout << std::string(indentLevel * 2, ' ') << "NOT" << std::endl;
dump_selection(residues, indentLevel);
}
}
std::unique_ptr<tls_selection> selection;
};
struct tls_selection_all : public tls_selection
{
tls_selection_all() {}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
for (auto &r : residues)
r.selected = true;
if (cif::VERBOSE)
{
std::cout << std::string(indentLevel * 2, ' ') << "ALL" << std::endl;
dump_selection(residues, indentLevel);
}
}
};
struct tls_selection_chain : public tls_selection_all
{
tls_selection_chain(const std::string &chainID)
: m_chain(chainID)
{
}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
bool allChains = m_chain == "*";
for (auto &r : residues)
r.selected = allChains or r.chainID == m_chain;
if (cif::VERBOSE)
{
std::cout << std::string(indentLevel * 2, ' ') << "CHAIN " << m_chain << std::endl;
dump_selection(residues, indentLevel);
}
}
std::string m_chain;
};
struct tls_selection_res_id : public tls_selection_all
{
tls_selection_res_id(int seqNr, char iCode)
: m_seq_nr(seqNr)
, m_icode(iCode)
{
}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
for (auto &r : residues)
r.selected = r.seqNr == m_seq_nr and r.iCode == m_icode;
if (cif::VERBOSE)
{
std::cout << std::string(indentLevel * 2, ' ') << "ResID " << m_seq_nr << (m_icode ? std::string{ m_icode } : "") << std::endl;
dump_selection(residues, indentLevel);
}
}
int m_seq_nr;
char m_icode;
};
struct tls_selection_range_seq : public tls_selection_all
{
tls_selection_range_seq(int first, int last)
: m_first(first)
, m_last(last)
{
}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
for (auto &r : residues)
{
r.selected = ((r.seqNr >= m_first or m_first == kResidueNrWildcard) and
(r.seqNr <= m_last or m_last == kResidueNrWildcard));
}
if (cif::VERBOSE)
{
std::cout << std::string(indentLevel * 2, ' ') << "Range " << m_first << ':' << m_last << std::endl;
dump_selection(residues, indentLevel);
}
}
int m_first, m_last;
};
struct tls_selection_range_id : public tls_selection_all
{
tls_selection_range_id(int first, int last, char icodeFirst = 0, char icodeLast = 0)
: m_first(first)
, m_last(last)
, m_icode_first(icodeFirst)
, m_icode_last(icodeLast)
{
}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
// need to do this per chain
std::set<std::string> chains;
for (auto &r : residues)
chains.insert(r.chainID);
for (std::string chain : chains)
{
auto f = find_if(residues.begin(), residues.end(),
[this,chain](auto r) -> bool
{
return r.chainID == chain and r.seqNr == m_first and r.iCode == m_icode_first;
});
auto l = find_if(residues.begin(), residues.end(),
[this,chain](auto r) -> bool
{
return r.chainID == chain and r.seqNr == m_last and r.iCode == m_icode_last;
});
if (f != residues.end() and l != residues.end() and f <= l)
{
++l;
for (; f != l; ++f)
f->selected = true;
}
}
if (cif::VERBOSE)
{
std::cout << std::string(indentLevel * 2, ' ') << "Through " << m_first << ':' << m_last << std::endl;
dump_selection(residues, indentLevel);
}
}
int m_first, m_last;
char m_icode_first, m_icode_last;
};
struct tls_selection_union : public tls_selection
{
tls_selection_union(std::unique_ptr<tls_selection> &lhs, std::unique_ptr<tls_selection> &rhs)
: lhs(lhs.release())
, rhs(rhs.release())
{
}
tls_selection_union(std::unique_ptr<tls_selection> &lhs, std::unique_ptr<tls_selection> &&rhs)
: lhs(lhs.release())
, rhs(rhs.release())
{
}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
auto a = residues;
for_each(a.begin(), a.end(), [](auto &r)
{ r.selected = false; });
auto b = residues;
for_each(b.begin(), b.end(), [](auto &r)
{ r.selected = false; });
lhs->collect_residues(db, a, indentLevel + 1);
rhs->collect_residues(db, b, indentLevel + 1);
for (auto ai = a.begin(), bi = b.begin(), ri = residues.begin(); ri != residues.end(); ++ai, ++bi, ++ri)
ri->selected = ai->selected or bi->selected;
if (cif::VERBOSE)
{
std::cout << std::string(indentLevel * 2, ' ') << "Union" << std::endl;
dump_selection(residues, indentLevel);
}
}
std::unique_ptr<tls_selection> lhs;
std::unique_ptr<tls_selection> rhs;
};
struct tls_selection_intersection : public tls_selection
{
tls_selection_intersection(std::unique_ptr<tls_selection> &lhs, std::unique_ptr<tls_selection> &rhs)
: lhs(lhs.release())
, rhs(rhs.release())
{
}
tls_selection_intersection(std::unique_ptr<tls_selection> &lhs, std::unique_ptr<tls_selection> &&rhs)
: lhs(lhs.release())
, rhs(rhs.release())
{
}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
auto a = residues;
for_each(a.begin(), a.end(), [](auto &r)
{ r.selected = false; });
auto b = residues;
for_each(b.begin(), b.end(), [](auto &r)
{ r.selected = false; });
lhs->collect_residues(db, a, indentLevel + 1);
rhs->collect_residues(db, b, indentLevel + 1);
for (auto ai = a.begin(), bi = b.begin(), ri = residues.begin(); ri != residues.end(); ++ai, ++bi, ++ri)
ri->selected = ai->selected and bi->selected;
if (cif::VERBOSE)
{
std::cout << std::string(indentLevel * 2, ' ') << "Intersection" << std::endl;
dump_selection(residues, indentLevel);
}
}
std::unique_ptr<tls_selection> lhs;
std::unique_ptr<tls_selection> rhs;
};
struct tls_selection_by_name : public tls_selection_all
{
public:
tls_selection_by_name(const std::string &resname)
: m_name(resname)
{
}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
for (auto &r : residues)
r.selected = r.name == m_name;
if (cif::VERBOSE)
{
std::cout << std::string(indentLevel * 2, ' ') << "Name " << m_name << std::endl;
dump_selection(residues, indentLevel);
}
}
std::string m_name;
};
struct tls_selection_by_element : public tls_selection_all
{
public:
tls_selection_by_element(const std::string &element)
: m_element(element)
{
}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
// rationale... We want to select residues only. So we select
// residues that have just a single atom of type m_element.
// And we assume these have as residue name... m_element.
// ... Right?
for (auto &r : residues)
r.selected = iequals(r.name, m_element);
if (cif::VERBOSE)
{
std::cout << std::string(indentLevel * 2, ' ') << "Element " << m_element << std::endl;
dump_selection(residues, indentLevel);
}
}
std::string m_element;
};
// --------------------------------------------------------------------
class tls_selection_parser_impl
{
public:
tls_selection_parser_impl(const std::string &selection)
: m_selection(selection)
, m_p(m_selection.begin())
, m_end(m_selection.end())
{
}
virtual std::unique_ptr<tls_selection> Parse() = 0;
protected:
virtual int get_next_token() = 0;
virtual void match(int token);
virtual std::string to_string(int token) = 0;
std::string m_selection;
std::string::iterator m_p, m_end;
int m_lookahead;
std::string m_token;
};
void tls_selection_parser_impl::match(int token)
{
if (m_lookahead == token)
m_lookahead = get_next_token();
else
{
std::string expected;
if (token >= 256)
expected = to_string(token);
else
expected = { char(token) };
std::string found;
if (m_lookahead >= 256)
found = to_string(m_lookahead) + " (" + m_token + ')';
else
found = { char(m_lookahead) };
throw std::runtime_error("Expected " + expected + " but found " + found);
}
}
// --------------------------------------------------------------------
class TLSSelectionParserImplPhenix : public tls_selection_parser_impl
{
public:
TLSSelectionParserImplPhenix(const std::string &selection)
: tls_selection_parser_impl(selection)
{
m_lookahead = get_next_token();
}
virtual std::unique_ptr<tls_selection> Parse();
private:
std::unique_ptr<tls_selection> ParseAtomSelection();
std::unique_ptr<tls_selection> ParseTerm();
std::unique_ptr<tls_selection> ParseFactor();
enum TOKEN
{
pt_NONE = 0,
pt_IDENT = 256,
pt_STRING,
pt_NUMBER,
pt_RESID,
pt_EOLN,
pt_KW_ALL,
pt_KW_CHAIN,
pt_KW_RESSEQ,
pt_KW_RESID,
pt_KW_ICODE,
pt_KW_RESNAME,
pt_KW_ELEMENT,
pt_KW_AND,
pt_KW_OR,
pt_KW_NOT,
pt_KW_PDB,
pt_KW_ENTRY,
pt_KW_THROUGH
};
virtual int get_next_token();
virtual std::string to_string(int token);
int m_value_i;
std::string m_value_s;
char m_icode;
};
int TLSSelectionParserImplPhenix::get_next_token()
{
int result = pt_NONE;
enum STATE
{
st_START,
st_RESID = 200,
st_NUM = 300,
st_IDENT = 400,
st_QUOTED = 500,
st_DQUOTED = 550,
st_OTHER = 600
};
int state = st_START;
m_value_i = 0;
m_icode = 0;
m_value_s.clear();
auto s = m_p;
auto start = state;
m_token.clear();
auto restart = [&]()
{
switch (start)
{
case st_START: state = start = st_RESID; break;
case st_RESID: state = start = st_NUM; break;
case st_NUM: state = start = st_IDENT; break;
case st_IDENT: state = start = st_QUOTED; break;
case st_QUOTED: state = start = st_DQUOTED; break;
case st_DQUOTED: state = start = st_OTHER; break;
}
m_token.clear();
m_p = s;
};
auto retract = [&]()
{
--m_p;
m_token.pop_back();
};
while (result == pt_NONE)
{
char ch = *m_p++;
if (m_p > m_end)
ch = 0;
else
m_token += ch;
switch (state)
{
// start block
case st_START:
if (ch == 0)
result = pt_EOLN;
else if (isspace(ch))
{
m_token.clear();
++s;
}
else
restart();
break;
// RESID block
case st_RESID:
if (ch == '-')
state = st_RESID + 1;
else if (isdigit(ch))
{
m_value_i = (ch - '0');
state = st_RESID + 2;
}
else
restart();
break;
case st_RESID + 1:
if (isdigit(ch))
{
m_value_i = -(ch - '0');
state = st_RESID + 2;
}
else
restart();
break;
case st_RESID + 2:
if (isdigit(ch))
m_value_i = 10 * m_value_i + (m_value_i < 0 ? -1 : 1) * (ch - '0');
else if (isalpha(ch))
{
m_icode = ch;
state = st_RESID + 3;
}
else
restart();
break;
case st_RESID + 3:
if (isalnum(ch))
restart();
else
{
retract();
result = pt_RESID;
}
break;
// NUM block
case st_NUM:
if (ch == '-')
state = st_NUM + 1;
else if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM + 2;
}
else
restart();
break;
case st_NUM + 1:
if (isdigit(ch))
{
m_value_i = -(ch - '0');
state = st_NUM + 2;
}
else
restart();
break;
case st_NUM + 2:
if (isdigit(ch))
m_value_i = 10 * m_value_i + (m_value_i < 0 ? -1 : 1) * (ch - '0');
else if (not isalpha(ch))
{
result = pt_NUMBER;
retract();
}
else
restart();
break;
// IDENT block
case st_IDENT:
if (isalnum(ch))
{
m_value_s = { ch };
state = st_IDENT + 1;
}
else
restart();
break;
case st_IDENT + 1:
if (isalnum(ch) or ch == '\'')
m_value_s += ch;
else
{
--m_p;
result = pt_IDENT;
}
break;
// QUOTED block
case st_QUOTED:
if (ch == '\'')
{
m_value_s.clear();
state = st_QUOTED + 1;
}
else
restart();
break;
case st_QUOTED + 1:
if (ch == '\'')
result = pt_STRING;
else if (ch == 0)
throw std::runtime_error("Unexpected end of selection, missing quote character?");
else
m_value_s += ch;
break;
// QUOTED block
case st_DQUOTED:
if (ch == '\"')
{
m_value_s.clear();
state = st_DQUOTED + 1;
}
else
restart();
break;
case st_DQUOTED + 1:
if (ch == '\"')
result = pt_STRING;
else if (ch == 0)
throw std::runtime_error("Unexpected end of selection, missing quote character?");
else
m_value_s += ch;
break;
// OTHER block
case st_OTHER:
result = ch;
break;
}
}
if (result == pt_IDENT)
{
if (iequals(m_value_s, "CHAIN"))
result = pt_KW_CHAIN;
else if (iequals(m_value_s, "ALL"))
result = pt_KW_ALL;
else if (iequals(m_value_s, "AND"))
result = pt_KW_AND;
else if (iequals(m_value_s, "OR"))
result = pt_KW_OR;
else if (iequals(m_value_s, "NOT"))
result = pt_KW_NOT;
else if (iequals(m_value_s, "RESSEQ"))
result = pt_KW_RESSEQ;
else if (iequals(m_value_s, "RESID") or iequals(m_value_s, "RESI"))
result = pt_KW_RESID;
else if (iequals(m_value_s, "RESNAME"))
result = pt_KW_RESNAME;
else if (iequals(m_value_s, "ELEMENT"))
result = pt_KW_ELEMENT;
else if (iequals(m_value_s, "PDB"))
result = pt_KW_PDB;
else if (iequals(m_value_s, "ENTRY"))
result = pt_KW_ENTRY;
else if (iequals(m_value_s, "THROUGH"))
result = pt_KW_THROUGH;
}
return result;
}
std::string TLSSelectionParserImplPhenix::to_string(int token)
{
switch (token)
{
case pt_IDENT: return "identifier";
case pt_STRING: return "std::string";
case pt_NUMBER: return "number";
case pt_RESID: return "resid";
case pt_EOLN: return "end of line";
case pt_KW_ALL: return "ALL";
case pt_KW_CHAIN: return "CHAIN";
case pt_KW_RESSEQ: return "RESSEQ";
case pt_KW_RESID: return "RESID";
case pt_KW_RESNAME: return "RESNAME";
case pt_KW_ELEMENT: return "ELEMENT";
case pt_KW_AND: return "AND";
case pt_KW_OR: return "OR";
case pt_KW_NOT: return "NOT";
case pt_KW_PDB: return "PDB";
case pt_KW_ENTRY: return "ENTRY";
case pt_KW_THROUGH: return "THROUGH";
default: return "character";
}
}
std::unique_ptr<tls_selection> TLSSelectionParserImplPhenix::Parse()
{
if (m_lookahead == pt_KW_PDB)
{
match(pt_KW_PDB);
// Match(pt_KW_ENTRY);
throw std::runtime_error("Unimplemented PDB ENTRY specification");
}
std::unique_ptr<tls_selection> result = ParseAtomSelection();
bool extraParenthesis = false;
if (m_lookahead == ')')
{
extraParenthesis = true;
m_lookahead = get_next_token();
}
match(pt_EOLN);
if (extraParenthesis)
std::cerr << "WARNING: too many closing parenthesis in TLS selection statement" << std::endl;
return result;
}
std::unique_ptr<tls_selection> TLSSelectionParserImplPhenix::ParseAtomSelection()
{
std::unique_ptr<tls_selection> result = ParseTerm();
while (m_lookahead == pt_KW_OR)
{
match(pt_KW_OR);
result.reset(new tls_selection_union(result, ParseTerm()));
}
return result;
}
std::unique_ptr<tls_selection> TLSSelectionParserImplPhenix::ParseTerm()
{
std::unique_ptr<tls_selection> result = ParseFactor();
while (m_lookahead == pt_KW_AND)
{
match(pt_KW_AND);
result.reset(new tls_selection_intersection(result, ParseFactor()));
}
return result;
}
std::unique_ptr<tls_selection> TLSSelectionParserImplPhenix::ParseFactor()
{
std::unique_ptr<tls_selection> result;
switch (m_lookahead)
{
case '(':
match('(');
result = ParseAtomSelection();
if (m_lookahead == pt_EOLN)
std::cerr << "WARNING: missing closing parenthesis in TLS selection statement" << std::endl;
else
match(')');
break;
case pt_KW_NOT:
match(pt_KW_NOT);
result.reset(new tls_selection_not(ParseAtomSelection()));
break;
case pt_KW_CHAIN:
{
match(pt_KW_CHAIN);
std::string chainID = m_value_s;
if (m_lookahead == pt_NUMBER) // sigh
{
chainID = to_string(m_value_i);
match(pt_NUMBER);
}
else
match(m_lookahead == pt_STRING ? pt_STRING : pt_IDENT);
result.reset(new tls_selection_chain(chainID));
break;
}
case pt_KW_RESNAME:
{
match(pt_KW_RESNAME);
std::string name = m_value_s;
match(pt_IDENT);
result.reset(new tls_selection_by_name(name));
break;
}
case pt_KW_ELEMENT:
{
match(pt_KW_ELEMENT);
std::string element = m_value_s;
match(pt_IDENT);
result.reset(new tls_selection_by_element(element));
break;
}
case pt_KW_RESSEQ:
{
match(pt_KW_RESSEQ);
int from = m_value_i;
match(pt_NUMBER);
int to = from;
if (m_lookahead == ':')
{
match(':');
to = m_value_i;
match(pt_NUMBER);
}
result.reset(new tls_selection_range_seq(from, to));
break;
}
case pt_KW_RESID:
{
match(pt_KW_RESID);
int from, to;
char icode_from = 0, icode_to = 0;
bool through = false;
from = to = m_value_i;
if (m_lookahead == pt_NUMBER)
match(pt_NUMBER);
else
{
icode_from = m_icode;
match(pt_RESID);
}
if (m_lookahead == ':' or m_lookahead == pt_KW_THROUGH or m_lookahead == '-')
{
through = m_lookahead == pt_KW_THROUGH;
match(m_lookahead);
to = m_value_i;
if (m_lookahead == pt_NUMBER)
match(pt_NUMBER);
else
{
icode_to = m_icode;
match(pt_RESID);
}
if (through)
result.reset(new tls_selection_range_id(from, to, icode_from, icode_to));
else
{
if (cif::VERBOSE and (icode_from or icode_to))
std::cerr << "Warning, ignoring insertion codes" << std::endl;
result.reset(new tls_selection_range_seq(from, to));
}
}
else
result.reset(new tls_selection_res_id(from, icode_from));
break;
}
case pt_KW_ALL:
match(pt_KW_ALL);
result.reset(new tls_selection_all());
break;
default:
throw std::runtime_error("Unexpected token " + to_string(m_lookahead) + " (" + m_token + ')');
}
return result;
}
// --------------------------------------------------------------------
class TLSSelectionParserImplBuster : public tls_selection_parser_impl
{
public:
TLSSelectionParserImplBuster(const std::string &selection);
virtual std::unique_ptr<tls_selection> Parse();
protected:
enum TOKEN
{
bt_NONE = 0,
bt_IDENT = 256,
bt_NUMBER,
bt_EOLN,
};
virtual int get_next_token();
virtual std::string to_string(int token);
std::unique_ptr<tls_selection> ParseGroup();
std::tuple<std::string, int> ParseAtom();
std::unique_ptr<tls_selection> ParseOldGroup();
int m_value_i;
std::string m_value_s;
bool m_parsing_old_style = false;
};
TLSSelectionParserImplBuster::TLSSelectionParserImplBuster(const std::string &selection)
: tls_selection_parser_impl(selection)
{
m_lookahead = get_next_token();
}
int TLSSelectionParserImplBuster::get_next_token()
{
int result = bt_NONE;
enum STATE
{
st_START,
st_NEGATE,
st_NUM,
st_IDENT
} state = st_START;
m_value_i = 0;
m_value_s.clear();
bool negative = false;
while (result == bt_NONE)
{
char ch = *m_p++;
if (m_p > m_end)
ch = 0;
switch (state)
{
case st_START:
if (ch == 0)
result = bt_EOLN;
else if (isspace(ch))
continue;
else if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM;
}
else if (isalpha(ch))
{
m_value_s = { ch };
state = st_IDENT;
}
else if (ch == '-')
{
state = st_NEGATE;
}
else
result = ch;
break;
case st_NEGATE:
if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM;
negative = true;
}
else
{
--m_p;
result = '-';
}
break;
case st_NUM:
if (isdigit(ch))
m_value_i = 10 * m_value_i + (ch - '0');
else
{
if (negative)
m_value_i = -m_value_i;
result = bt_NUMBER;
--m_p;
}
break;
case st_IDENT:
if (isalnum(ch))
m_value_s += ch;
else
{
--m_p;
result = bt_IDENT;
}
break;
}
}
return result;
}
std::string TLSSelectionParserImplBuster::to_string(int token)
{
switch (token)
{
case bt_IDENT: return "identifier (" + m_value_s + ')';
case bt_NUMBER: return "number (" + to_string(m_value_i) + ')';
case bt_EOLN: return "end of line";
default:
assert(false);
return "unknown token";
}
}
std::unique_ptr<tls_selection> TLSSelectionParserImplBuster::ParseGroup()
{
std::unique_ptr<tls_selection> result;
auto add = [&result](const std::string &chainID, int from, int to)
{
std::unique_ptr<tls_selection> sc(new tls_selection_chain(chainID));
std::unique_ptr<tls_selection> sr(new tls_selection_range_seq(from, to));
std::unique_ptr<tls_selection> s(new tls_selection_intersection(sc, sr));
if (result == nullptr)
result.reset(s.release());
else
result.reset(new tls_selection_union{ result, s });
};
match('{');
do
{
std::string chain1;
int seqNr1;
std::tie(chain1, seqNr1) = ParseAtom();
if (m_lookahead == '-')
{
std::string chain2;
int seqNr2 = seqNr1;
match('-');
if (m_lookahead == bt_NUMBER)
{
seqNr2 = m_value_i;
match(bt_NUMBER);
}
else
{
std::tie(chain2, seqNr2) = ParseAtom();
if (chain1 != chain2)
{
std::cerr << "Warning, ranges over multiple chains detected" << std::endl;
std::unique_ptr<tls_selection> sc1(new tls_selection_chain(chain1));
std::unique_ptr<tls_selection> sr1(new tls_selection_range_seq(seqNr1, kResidueNrWildcard));
std::unique_ptr<tls_selection> s1(new tls_selection_intersection(sc1, sr1));
std::unique_ptr<tls_selection> sc2(new tls_selection_chain(chain2));
std::unique_ptr<tls_selection> sr2(new tls_selection_range_seq(kResidueNrWildcard, seqNr2));
std::unique_ptr<tls_selection> s2(new tls_selection_intersection(sc2, sr2));
std::unique_ptr<tls_selection> s(new tls_selection_union(s1, s2));
if (result == nullptr)
result.reset(s.release());
else
result.reset(new tls_selection_union{ result, s });
chain1.clear();
}
}
if (not chain1.empty())
add(chain1, seqNr1, seqNr2);
}
else
add(chain1, seqNr1, seqNr1);
} while (m_lookahead != '}');
match('}');
return result;
}
std::tuple<std::string, int> TLSSelectionParserImplBuster::ParseAtom()
{
std::string chain = m_value_s;
int seqNr = kResidueNrWildcard;
if (m_lookahead == '*')
match('*');
else
match(bt_IDENT);
match('|');
if (m_lookahead == '*')
match('*');
else
{
seqNr = m_value_i;
match(bt_NUMBER);
if (m_lookahead == ':')
{
match(':');
std::string atom = m_value_s;
if (cif::VERBOSE)
std::cerr << "Warning: ignoring atom ID '" << atom << "' in TLS selection" << std::endl;
match(bt_IDENT);
}
}
return std::make_tuple(chain, seqNr);
}
std::unique_ptr<tls_selection> TLSSelectionParserImplBuster::Parse()
{
std::unique_ptr<tls_selection> result = ParseGroup();
match(bt_EOLN);
return result;
}
// --------------------------------------------------------------------
class TLSSelectionParserImplBusterOld : public tls_selection_parser_impl
{
public:
TLSSelectionParserImplBusterOld(const std::string &selection)
: tls_selection_parser_impl(selection)
{
m_lookahead = get_next_token();
}
virtual std::unique_ptr<tls_selection> Parse();
private:
std::unique_ptr<tls_selection> ParseAtomSelection();
std::unique_ptr<tls_selection> ParseTerm();
std::unique_ptr<tls_selection> ParseFactor();
std::unique_ptr<tls_selection> ParseResid();
std::unique_ptr<tls_selection> ParseChainResid();
enum TOKEN
{
pt_NONE = 0,
pt_IDENT = 256,
pt_CHAINRESID,
pt_STRING,
pt_NUMBER,
pt_RANGE,
pt_EOLN,
pt_KW_ALL,
pt_KW_CHAIN,
pt_KW_RESSEQ,
pt_KW_RESID,
pt_KW_RESNAME,
pt_KW_ELEMENT,
pt_KW_AND,
pt_KW_OR,
pt_KW_NOT,
pt_KW_PDB,
pt_KW_ENTRY,
pt_KW_THROUGH
};
virtual int get_next_token();
virtual std::string to_string(int token);
int m_value_i;
std::string m_value_s;
int m_value_r[2];
};
int TLSSelectionParserImplBusterOld::get_next_token()
{
int result = pt_NONE;
enum STATE
{
st_START,
st_NEGATE,
st_NUM,
st_RANGE,
st_IDENT_1,
st_IDENT,
st_CHAINRESID,
st_QUOTED_1,
st_QUOTED_2
} state = st_START;
m_value_i = 0;
m_value_s.clear();
bool negative = false;
while (result == pt_NONE)
{
char ch = *m_p++;
if (m_p > m_end)
ch = 0;
switch (state)
{
case st_START:
if (ch == 0)
result = pt_EOLN;
else if (isspace(ch))
continue;
else if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM;
}
else if (isalpha(ch))
{
m_value_s = { ch };
state = st_IDENT_1;
}
else if (ch == '-')
{
state = st_NEGATE;
}
else if (ch == '\'')
{
state = st_QUOTED_1;
}
else
result = ch;
break;
case st_NEGATE:
if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM;
negative = true;
}
else
{
--m_p;
result = '-';
}
break;
case st_NUM:
if (isdigit(ch))
m_value_i = 10 * m_value_i + (ch - '0');
else if (ch == '-' or ch == ':')
{
if (negative)
m_value_i = -m_value_i;
m_value_r[0] = m_value_i;
m_value_r[1] = 0;
state = st_RANGE;
}
else
{
if (negative)
m_value_i = -m_value_i;
result = pt_NUMBER;
--m_p;
}
break;
case st_RANGE: // TODO: question, is "-2--1" a valid range? We do not support that, yet
if (isdigit(ch))
m_value_r[1] = 10 * m_value_r[1] + (ch - '0');
else if (m_value_r[1] != 0)
{
result = pt_RANGE;
--m_p;
}
else
{
--m_p;
--m_p;
result = pt_NUMBER;
}
break;
case st_IDENT_1:
if (isalpha(ch))
{
m_value_s += ch;
state = st_IDENT;
}
else if (isdigit(ch))
{
m_value_i = (ch - '0');
state = st_CHAINRESID;
}
else
{
--m_p;
result = pt_IDENT;
}
break;
case st_CHAINRESID:
if (isalpha(ch))
{
m_value_s += to_string(m_value_i);
m_value_s += ch;
state = st_IDENT;
}
else if (isdigit(ch))
m_value_i = 10 * m_value_i + (ch - '0');
else
{
--m_p;
result = pt_CHAINRESID;
}
break;
case st_IDENT:
if (isalnum(ch))
m_value_s += ch;
else
{
--m_p;
result = pt_IDENT;
}
break;
case st_QUOTED_1:
if (ch == '\'')
{
--m_p;
result = '\'';
}
else
{
m_value_s = { ch };
state = st_QUOTED_2;
}
break;
case st_QUOTED_2:
if (ch == '\'')
result = pt_STRING;
else if (ch == 0)
throw std::runtime_error("Unexpected end of selection, missing quote character?");
else
m_value_s += ch;
break;
}
}
if (result == pt_IDENT)
{
if (iequals(m_value_s, "CHAIN"))
result = pt_KW_CHAIN;
else if (iequals(m_value_s, "ALL"))
result = pt_KW_ALL;
else if (iequals(m_value_s, "AND"))
result = pt_KW_AND;
else if (iequals(m_value_s, "OR"))
result = pt_KW_OR;
else if (iequals(m_value_s, "NOT"))
result = pt_KW_NOT;
else if (iequals(m_value_s, "RESSEQ"))
result = pt_KW_RESSEQ;
else if (iequals(m_value_s, "RESID") or iequals(m_value_s, "RESI") or iequals(m_value_s, "RESIDUES"))
result = pt_KW_RESID;
else if (iequals(m_value_s, "RESNAME"))
result = pt_KW_RESNAME;
else if (iequals(m_value_s, "PDB"))
result = pt_KW_PDB;
else if (iequals(m_value_s, "ENTRY"))
result = pt_KW_ENTRY;
else if (iequals(m_value_s, "THROUGH"))
result = pt_KW_THROUGH;
}
return result;
}
std::string TLSSelectionParserImplBusterOld::to_string(int token)
{
switch (token)
{
case pt_IDENT: return "identifier (" + m_value_s + ')';
case pt_STRING: return "std::string (" + m_value_s + ')';
case pt_NUMBER: return "number (" + to_string(m_value_i) + ')';
case pt_RANGE: return "range (" + to_string(m_value_r[0]) + ':' + to_string(m_value_r[1]) + ')';
case pt_EOLN: return "end of line";
case pt_KW_ALL: return "ALL";
case pt_KW_CHAIN: return "CHAIN";
case pt_KW_RESSEQ: return "RESSEQ";
case pt_KW_RESID: return "RESID";
case pt_KW_RESNAME: return "RESNAME";
case pt_KW_ELEMENT: return "ELEMENT";
case pt_KW_AND: return "AND";
case pt_KW_OR: return "OR";
case pt_KW_NOT: return "NOT";
case pt_KW_PDB: return "PDB";
case pt_KW_ENTRY: return "ENTRY";
case pt_KW_THROUGH: return "THROUGH";
default:
assert(false);
return "unknown token";
}
}
std::unique_ptr<tls_selection> TLSSelectionParserImplBusterOld::Parse()
{
if (m_lookahead == pt_KW_PDB)
{
match(pt_KW_PDB);
// Match(pt_KW_ENTRY);
throw std::runtime_error("Unimplemented PDB ENTRY specification");
}
std::unique_ptr<tls_selection> result = ParseAtomSelection();
match(pt_EOLN);
return result;
}
std::unique_ptr<tls_selection> TLSSelectionParserImplBusterOld::ParseAtomSelection()
{
std::unique_ptr<tls_selection> result = ParseTerm();
while (m_lookahead == pt_KW_OR)
{
match(pt_KW_OR);
result.reset(new tls_selection_union(result, ParseTerm()));
}
return result;
}
std::unique_ptr<tls_selection> TLSSelectionParserImplBusterOld::ParseTerm()
{
std::unique_ptr<tls_selection> result = ParseFactor();
while (m_lookahead == pt_KW_AND)
{
match(pt_KW_AND);
result.reset(new tls_selection_intersection(result, ParseFactor()));
}
return result;
}
std::unique_ptr<tls_selection> TLSSelectionParserImplBusterOld::ParseFactor()
{
std::unique_ptr<tls_selection> result;
switch (m_lookahead)
{
case '(':
match('(');
result = ParseAtomSelection();
match(')');
break;
case pt_KW_NOT:
match(pt_KW_NOT);
result.reset(new tls_selection_not(ParseAtomSelection()));
break;
case pt_KW_CHAIN:
{
match(pt_KW_CHAIN);
std::string chainID = m_value_s;
if (m_lookahead == pt_NUMBER) // sigh
{
chainID = to_string(m_value_i);
match(pt_NUMBER);
}
else
match(m_lookahead == pt_STRING ? pt_STRING : pt_IDENT);
result.reset(new tls_selection_chain(chainID));
break;
}
case pt_KW_RESNAME:
{
match(pt_KW_RESNAME);
std::string name = m_value_s;
match(pt_IDENT);
result.reset(new tls_selection_by_name(name));
break;
}
case pt_KW_RESSEQ:
match(pt_KW_RESSEQ);
result = ParseResid();
break;
case pt_KW_RESID:
match(pt_KW_RESID);
result = ParseResid();
break;
case pt_KW_ALL:
match(pt_KW_ALL);
result.reset(new tls_selection_all());
break;
case pt_CHAINRESID:
result = ParseChainResid();
break;
default:
throw std::runtime_error("Unexpected token " + to_string(m_lookahead));
}
return result;
}
std::unique_ptr<tls_selection> TLSSelectionParserImplBusterOld::ParseResid()
{
std::unique_ptr<tls_selection> result;
for (;;)
{
int from, to;
if (m_lookahead == pt_RANGE)
{
from = m_value_r[0];
to = m_value_r[1];
match(pt_RANGE);
}
else
{
from = m_value_i;
match(pt_NUMBER);
to = from;
if (m_lookahead == ':' or m_lookahead == '-' or m_lookahead == pt_KW_THROUGH)
{
match(m_lookahead);
to = m_value_i;
match(pt_NUMBER);
}
}
std::unique_ptr<tls_selection> range(new tls_selection_range_seq(from, to));
if (result)
result.reset(new tls_selection_union(result, range));
else
result.reset(range.release());
if (m_lookahead == ',')
{
match(',');
continue;
}
break;
}
return result;
}
std::unique_ptr<tls_selection> TLSSelectionParserImplBusterOld::ParseChainResid()
{
std::unique_ptr<tls_selection> result;
for (;;)
{
int from, to;
from = to = m_value_i;
std::string chainID = m_value_s;
match(pt_CHAINRESID);
if (m_lookahead == '-')
{
match(m_lookahead);
to = m_value_i;
if (m_value_s != chainID)
throw std::runtime_error("Cannot have two different chainIDs in a range selection");
match(pt_CHAINRESID);
}
std::unique_ptr<tls_selection> sc(new tls_selection_chain(chainID));
std::unique_ptr<tls_selection> sr(new tls_selection_range_seq(from, to));
std::unique_ptr<tls_selection> range(new tls_selection_intersection(sc, sr));
if (result)
result.reset(new tls_selection_union(result, range));
else
result.reset(range.release());
if (m_lookahead == ',')
{
match(',');
continue;
}
break;
}
return result;
}
// --------------------------------------------------------------------
class TLSSelectionParserBase
{
public:
virtual std::unique_ptr<tls_selection> Parse(const std::string &selection) const = 0;
virtual ~TLSSelectionParserBase() {}
};
template <typename IMPL>
class TLSSelectionParser
{
public:
virtual std::unique_ptr<tls_selection> Parse(const std::string &selection) const
{
std::unique_ptr<tls_selection> result;
try
{
IMPL p(selection);
result = p.Parse();
}
catch (const std::exception &ex)
{
std::cerr << "ParseError: " << ex.what() << std::endl;
}
return result;
}
};
// --------------------------------------------------------------------
std::unique_ptr<tls_selection> parse_tls_selection_details(const std::string &program, const std::string &selection)
{
TLSSelectionParser<TLSSelectionParserImplPhenix> phenix;
TLSSelectionParser<TLSSelectionParserImplBuster> buster;
TLSSelectionParser<TLSSelectionParserImplBusterOld> busterOld;
std::unique_ptr<tls_selection> result;
if (cif::icontains(program, "buster"))
{
result = buster.Parse(selection);
if (not result)
{
if (cif::VERBOSE)
std::cerr << "Falling back to old BUSTER" << std::endl;
result = busterOld.Parse(selection);
}
if (not result)
{
if (cif::VERBOSE)
std::cerr << "Falling back to PHENIX" << std::endl;
result = phenix.Parse(selection);
}
}
else if (cif::icontains(program, "phenix"))
{
result = phenix.Parse(selection);
if (not result)
{
if (cif::VERBOSE)
std::cerr << "Falling back to BUSTER" << std::endl;
result = buster.Parse(selection);
}
if (not result)
{
if (cif::VERBOSE)
std::cerr << "Falling back to old BUSTER" << std::endl;
result = busterOld.Parse(selection);
}
}
else
{
if (cif::VERBOSE)
std::cerr << "No known program specified, trying PHENIX" << std::endl;
result = phenix.Parse(selection);
if (not result)
{
if (cif::VERBOSE)
std::cerr << "Falling back to BUSTER" << std::endl;
result = buster.Parse(selection);
}
if (not result)
{
if (cif::VERBOSE)
std::cerr << "Falling back to old BUSTER" << std::endl;
result = busterOld.Parse(selection);
}
}
return result;
}
} // namespace cif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment