Commit f4506438 by Maarten L. Hekkelman

Added remove column

parent fc14a655
...@@ -31,22 +31,22 @@ ...@@ -31,22 +31,22 @@
#include "cif++/condition.hpp" #include "cif++/condition.hpp"
#include "cif++/iterator.hpp" #include "cif++/iterator.hpp"
#include "cif++/row.hpp" #include "cif++/row.hpp"
#include "cif++/validate.hpp"
#include "cif++/text.hpp" #include "cif++/text.hpp"
#include "cif++/validate.hpp"
#include <array> #include <array>
/** \file category.hpp /** \file category.hpp
* Documentation for the cif::category class * Documentation for the cif::category class
* *
* The category class should meet the requirements of Container and * The category class should meet the requirements of Container and
* SequenceContainer. * SequenceContainer.
* *
* TODO: implement all of: * TODO: implement all of:
* https://en.cppreference.com/w/cpp/named_req/Container * https://en.cppreference.com/w/cpp/named_req/Container
* https://en.cppreference.com/w/cpp/named_req/SequenceContainer * https://en.cppreference.com/w/cpp/named_req/SequenceContainer
* and more? * and more?
*/ */
namespace cif namespace cif
{ {
...@@ -61,9 +61,9 @@ namespace cif ...@@ -61,9 +61,9 @@ namespace cif
class duplicate_key_error : public std::runtime_error class duplicate_key_error : public std::runtime_error
{ {
public: public:
/** /**
* @brief Construct a new duplicate key error object * @brief Construct a new duplicate key error object
*/ */
duplicate_key_error(const std::string &msg) duplicate_key_error(const std::string &msg)
: std::runtime_error(msg) : std::runtime_error(msg)
{ {
...@@ -75,9 +75,9 @@ class duplicate_key_error : public std::runtime_error ...@@ -75,9 +75,9 @@ class duplicate_key_error : public std::runtime_error
class missing_key_error : public std::runtime_error class missing_key_error : public std::runtime_error
{ {
public: public:
/** /**
* @brief Construct a new duplicate key error object * @brief Construct a new duplicate key error object
*/ */
missing_key_error(const std::string &msg, const std::string &key) missing_key_error(const std::string &msg, const std::string &key)
: std::runtime_error(msg) : std::runtime_error(msg)
, m_key(key) , m_key(key)
...@@ -95,9 +95,9 @@ class missing_key_error : public std::runtime_error ...@@ -95,9 +95,9 @@ class missing_key_error : public std::runtime_error
class multiple_results_error : public std::runtime_error class multiple_results_error : public std::runtime_error
{ {
public: public:
/** /**
* @brief Construct a new multiple results error object * @brief Construct a new multiple results error object
*/ */
multiple_results_error() multiple_results_error()
: std::runtime_error("query should have returned exactly one row") : std::runtime_error("query should have returned exactly one row")
{ {
...@@ -156,8 +156,8 @@ class category ...@@ -156,8 +156,8 @@ class category
// -------------------------------------------------------------------- // --------------------------------------------------------------------
const std::string &name() const { return m_name; } ///< Returns the name of the category const std::string &name() const { return m_name; } ///< Returns the name of the category
iset key_fields() const; ///< Returns the cif::iset of key field names. Retrieved from the @ref category_validator for this category iset key_fields() const; ///< Returns the cif::iset of key field names. Retrieved from the @ref category_validator for this category
std::set<uint16_t> key_field_indices() const; ///< Returns a set of indices for the key fields. std::set<uint16_t> key_field_indices() const; ///< Returns a set of indices for the key fields.
/// @brief Set the validator for this category to @a v /// @brief Set the validator for this category to @a v
/// @param v The category_validator to assign. A nullptr value is allowed. /// @param v The category_validator to assign. A nullptr value is allowed.
...@@ -182,7 +182,7 @@ class category ...@@ -182,7 +182,7 @@ class category
/// @brief Validate links, that means, values in this category should have an /// @brief Validate links, that means, values in this category should have an
/// accompanying value in parent categories. /// accompanying value in parent categories.
/// ///
/// @note /// @note
/// The code makes one exception when validating missing links and that's between /// The code makes one exception when validating missing links and that's between
/// *atom_site* and a parent *pdbx_poly_seq_scheme* or *entity_poly_seq*. /// *atom_site* and a parent *pdbx_poly_seq_scheme* or *entity_poly_seq*.
...@@ -285,7 +285,7 @@ class category ...@@ -285,7 +285,7 @@ class category
/// Return the theoretical maximum number or rows that can be stored /// Return the theoretical maximum number or rows that can be stored
size_t max_size() const size_t max_size() const
{ {
return std::numeric_limits<size_t>::max(); // this is a bit optimistic, I guess return std::numeric_limits<size_t>::max(); // this is a bit optimistic, I guess
} }
/// Return true if the category is empty /// Return true if the category is empty
...@@ -321,7 +321,7 @@ class category ...@@ -321,7 +321,7 @@ class category
/// @code{.cpp} /// @code{.cpp}
/// for (const auto &[name, value] : cat.rows<std::string,int>("item_name", "item_value")) /// for (const auto &[name, value] : cat.rows<std::string,int>("item_name", "item_value"))
/// std::cout << name << ": " << value << '\n'; /// std::cout << name << ": " << value << '\n';
/// @endcode /// @endcode
/// ///
/// @tparam Ts The types for the columns requested /// @tparam Ts The types for the columns requested
/// @param names The names for the columns requested /// @param names The names for the columns requested
...@@ -344,7 +344,7 @@ class category ...@@ -344,7 +344,7 @@ class category
/// ///
/// for (int id : cat.rows<int>("id")) /// for (int id : cat.rows<int>("id"))
/// std::cout << id << '\n'; /// std::cout << id << '\n';
/// @endcode /// @endcode
/// ///
/// @tparam Ts The types for the columns requested /// @tparam Ts The types for the columns requested
/// @param names The names for the columns requested /// @param names The names for the columns requested
...@@ -363,7 +363,7 @@ class category ...@@ -363,7 +363,7 @@ class category
/// @code{.cpp} /// @code{.cpp}
/// for (row_handle rh : cat.find(cif::key("first_name") == "John" and cif::key("last_name") == "Doe")) /// for (row_handle rh : cat.find(cif::key("first_name") == "John" and cif::key("last_name") == "Doe"))
/// .. // do something with rh /// .. // do something with rh
/// @endcode /// @endcode
/// ///
/// @param cond The condition for the query /// @param cond The condition for the query
/// @return A special iterator that loops over all elements that match. The iterator can be dereferenced /// @return A special iterator that loops over all elements that match. The iterator can be dereferenced
...@@ -417,7 +417,7 @@ class category ...@@ -417,7 +417,7 @@ class category
/// @code{.cpp} /// @code{.cpp}
/// for (const auto &[name, value] : cat.find<std::string,int>(cif::key("item_value") > 10, "item_name", "item_value")) /// for (const auto &[name, value] : cat.find<std::string,int>(cif::key("item_value") > 10, "item_name", "item_value"))
/// std::cout << name << ": " << value << '\n'; /// std::cout << name << ": " << value << '\n';
/// @endcode /// @endcode
/// ///
/// @param cond The condition for the query /// @param cond The condition for the query
/// @tparam Ts The types for the columns requested /// @tparam Ts The types for the columns requested
...@@ -776,8 +776,7 @@ class category ...@@ -776,8 +776,7 @@ class category
/// @brief Return whether a row exists that matches condition @a cond /// @brief Return whether a row exists that matches condition @a cond
/// @param cond The condition to match /// @param cond The condition to match
/// @return True if a row exists /// @return True if a row exists
[[deprecated("Use contains instead")]] [[deprecated("Use contains instead")]] bool exists(condition &&cond) const
bool exists(condition &&cond) const
{ {
return contains(std::move(cond)); return contains(std::move(cond));
} }
...@@ -875,7 +874,7 @@ class category ...@@ -875,7 +874,7 @@ class category
// insert_impl(pos, std::move(row)); // insert_impl(pos, std::move(row));
// } // }
/// Erase the row pointed to by @a pos and return the iterator to the /// Erase the row pointed to by @a pos and return the iterator to the
/// row following pos. /// row following pos.
iterator erase(iterator pos); iterator erase(iterator pos);
...@@ -941,7 +940,6 @@ class category ...@@ -941,7 +940,6 @@ class category
/// result is unique in the context of this category /// result is unique in the context of this category
std::string get_unique_id(std::function<std::string(int)> generator = cif::cif_id_for_number); std::string get_unique_id(std::function<std::string(int)> generator = cif::cif_id_for_number);
/// @brief Generate a new, unique ID based on a string prefix followed by a number /// @brief Generate a new, unique ID based on a string prefix followed by a number
/// @param prefix The string prefix /// @param prefix The string prefix
/// @return a new unique ID /// @return a new unique ID
...@@ -1038,6 +1036,11 @@ class category ...@@ -1038,6 +1036,11 @@ class category
return result; return result;
} }
/** @brief Remove column name @a colum_name
* @param column_name The column to be removed
*/
void remove_column(std::string_view column_name);
/// @brief Return whether a column with name @a name exists in this category /// @brief Return whether a column with name @a name exists in this category
/// @param name The name of the column /// @param name The name of the column
/// @return True if the column exists /// @return True if the column exists
...@@ -1082,11 +1085,10 @@ class category ...@@ -1082,11 +1085,10 @@ class category
void write(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyColumns) const; void write(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyColumns) const;
public: public:
/// friend function to make it possible to do: /// friend function to make it possible to do:
/// @code {.cpp} /// @code {.cpp}
/// std::cout << my_category; /// std::cout << my_category;
/// @endcode /// @endcode
friend std::ostream &operator<<(std::ostream &os, const category &cat) friend std::ostream &operator<<(std::ostream &os, const category &cat)
{ {
cat.write(os); cat.write(os);
......
...@@ -111,7 +111,7 @@ class row_comparator ...@@ -111,7 +111,7 @@ class row_comparator
if (d != 0) if (d != 0)
break; break;
++ai; ++ai;
} }
...@@ -360,7 +360,8 @@ row *category_index::find_by_value(const category &cat, row_initializer k) const ...@@ -360,7 +360,8 @@ row *category_index::find_by_value(const category &cat, row_initializer k) const
{ {
auto fld = cat.get_column_name(f); auto fld = cat.get_column_name(f);
auto ki = find_if(k.begin(), k.end(), [&fld](auto &i) { return i.name() == fld; }); auto ki = find_if(k.begin(), k.end(), [&fld](auto &i)
{ return i.name() == fld; });
if (ki == k.end()) if (ki == k.end())
k2.emplace_back(fld, ""); k2.emplace_back(fld, "");
else else
...@@ -594,6 +595,25 @@ category::~category() ...@@ -594,6 +595,25 @@ category::~category()
// -------------------------------------------------------------------- // --------------------------------------------------------------------
void category::remove_column(std::string_view column_name)
{
for (size_t ix = 0; ix < m_columns.size(); ++ix)
{
if (not iequals(column_name, m_columns[ix].m_name))
continue;
for (row *r = m_head; r != nullptr; r = r->m_next)
{
if (r->size() > ix)
r->erase(r->begin() + ix);
}
m_columns.erase(m_columns.begin() + ix);
break;
}
}
iset category::get_columns() const iset category::get_columns() const
{ {
iset result; iset result;
...@@ -671,7 +691,7 @@ void category::set_validator(const validator *v, datablock &db) ...@@ -671,7 +691,7 @@ void category::set_validator(const validator *v, datablock &db)
{ {
std::ostringstream msg; std::ostringstream msg;
msg << "Cannot construct index since the key field" << (missing.size() > 1 ? "s" : "") << " " msg << "Cannot construct index since the key field" << (missing.size() > 1 ? "s" : "") << " "
<< cif::join(missing, ", ") << " in " << m_name << " " << (missing.size() == 1 ? "is" : "are") << " missing\n"; << cif::join(missing, ", ") << " in " << m_name << " " << (missing.size() == 1 ? "is" : "are") << " missing\n";
throw missing_key_error(msg.str(), *missing.begin()); throw missing_key_error(msg.str(), *missing.begin());
} }
} }
...@@ -873,12 +893,12 @@ bool category::validate_links() const ...@@ -873,12 +893,12 @@ bool category::validate_links() const
result = false; result = false;
std::cerr << "Links for " << link.v->m_link_group_label << " are incomplete\n" std::cerr << "Links for " << link.v->m_link_group_label << " are incomplete\n"
<< " There are " << missing << " items in " << m_name << " that don't have matching parent items in " << parent->m_name << '\n'; << " There are " << missing << " items in " << m_name << " that don't have matching parent items in " << parent->m_name << '\n';
if (VERBOSE) if (VERBOSE)
{ {
std::cerr << "showing first " << first_missing_rows.size() << " rows\n" std::cerr << "showing first " << first_missing_rows.size() << " rows\n"
<< '\n'; << '\n';
first_missing_rows.write(std::cerr, link.v->m_child_keys, false); first_missing_rows.write(std::cerr, link.v->m_child_keys, false);
...@@ -919,7 +939,9 @@ condition category::get_parents_condition(row_handle rh, const category &parentC ...@@ -919,7 +939,9 @@ condition category::get_parents_condition(row_handle rh, const category &parentC
condition result; condition result;
auto links = m_validator->get_links_for_child(m_name); auto links = m_validator->get_links_for_child(m_name);
links.erase(remove_if(links.begin(), links.end(), [n=parentCat.m_name](auto &l) { return l->m_parent_category != n; }), links.end()); links.erase(remove_if(links.begin(), links.end(), [n = parentCat.m_name](auto &l)
{ return l->m_parent_category != n; }),
links.end());
if (not links.empty()) if (not links.empty())
{ {
...@@ -959,7 +981,9 @@ condition category::get_children_condition(row_handle rh, const category &childC ...@@ -959,7 +981,9 @@ condition category::get_children_condition(row_handle rh, const category &childC
mandatoryChildFields = childCatValidator->m_mandatory_fields; mandatoryChildFields = childCatValidator->m_mandatory_fields;
auto links = m_validator->get_links_for_parent(m_name); auto links = m_validator->get_links_for_parent(m_name);
links.erase(remove_if(links.begin(), links.end(), [n=childCat.m_name](auto &l) { return l->m_child_category != n; }), links.end()); links.erase(remove_if(links.begin(), links.end(), [n = childCat.m_name](auto &l)
{ return l->m_child_category != n; }),
links.end());
if (not links.empty()) if (not links.empty())
{ {
...@@ -1123,7 +1147,7 @@ category::iterator category::erase(iterator pos) ...@@ -1123,7 +1147,7 @@ category::iterator category::erase(iterator pos)
return result; return result;
} }
template<typename T> template <typename T>
class save_value class save_value
{ {
public: public:
...@@ -1213,7 +1237,7 @@ void category::erase_orphans(condition &&cond, category &parent) ...@@ -1213,7 +1237,7 @@ void category::erase_orphans(condition &&cond, category &parent)
{ {
if (not cond(r)) if (not cond(r))
continue; continue;
if (parent.contains(get_parents_condition(r, parent))) if (parent.contains(get_parents_condition(r, parent)))
continue; continue;
...@@ -1222,11 +1246,10 @@ void category::erase_orphans(condition &&cond, category &parent) ...@@ -1222,11 +1246,10 @@ void category::erase_orphans(condition &&cond, category &parent)
category c(m_name); category c(m_name);
c.emplace(r); c.emplace(r);
std::cerr << "Removing orphaned record: \n" std::cerr << "Removing orphaned record: \n"
<< c << '\n' << c << '\n'
<< '\n'; << '\n';
} }
remove.emplace_back(r.m_row); remove.emplace_back(r.m_row);
} }
...@@ -1251,10 +1274,10 @@ std::string category::get_unique_id(std::function<std::string(int)> generator) ...@@ -1251,10 +1274,10 @@ std::string category::get_unique_id(std::function<std::string(int)> generator)
if (m_index == nullptr and m_cat_validator != nullptr) if (m_index == nullptr and m_cat_validator != nullptr)
m_index = new category_index(*this); m_index = new category_index(*this);
for (;;) for (;;)
{ {
if (m_index->find_by_value(*this, {{ id_tag, result }}) == nullptr) if (m_index->find_by_value(*this, { { id_tag, result } }) == nullptr)
break; break;
result = generator(static_cast<int>(m_last_unique_num++)); result = generator(static_cast<int>(m_last_unique_num++));
} }
...@@ -1265,7 +1288,7 @@ std::string category::get_unique_id(std::function<std::string(int)> generator) ...@@ -1265,7 +1288,7 @@ std::string category::get_unique_id(std::function<std::string(int)> generator)
{ {
if (not contains(key(id_tag) == result)) if (not contains(key(id_tag) == result))
break; break;
result = generator(static_cast<int>(m_last_unique_num++)); result = generator(static_cast<int>(m_last_unique_num++));
} }
} }
...@@ -1571,8 +1594,8 @@ row *category::clone_row(const row &r) ...@@ -1571,8 +1594,8 @@ row *category::clone_row(const row &r)
auto &i = r[ix]; auto &i = r[ix];
if (not i) if (not i)
continue; continue;
result->append( ix, { i.text() }); result->append(ix, { i.text() });
} }
} }
catch (...) catch (...)
...@@ -1639,10 +1662,10 @@ category::iterator category::insert_impl(const_iterator pos, row *n) ...@@ -1639,10 +1662,10 @@ category::iterator category::insert_impl(const_iterator pos, row *n)
if (n == nullptr) if (n == nullptr)
throw std::runtime_error("Invalid pointer passed to insert"); throw std::runtime_error("Invalid pointer passed to insert");
// #ifndef NDEBUG // #ifndef NDEBUG
// if (m_validator) // if (m_validator)
// is_valid(); // is_valid();
// #endif // #endif
try try
{ {
...@@ -1699,10 +1722,10 @@ category::iterator category::insert_impl(const_iterator pos, row *n) ...@@ -1699,10 +1722,10 @@ category::iterator category::insert_impl(const_iterator pos, row *n)
throw; throw;
} }
// #ifndef NDEBUG // #ifndef NDEBUG
// if (m_validator) // if (m_validator)
// is_valid(); // is_valid();
// #endif // #endif
} }
void category::swap_item(uint16_t column_ix, row_handle &a, row_handle &b) void category::swap_item(uint16_t column_ix, row_handle &a, row_handle &b)
...@@ -1716,7 +1739,7 @@ void category::swap_item(uint16_t column_ix, row_handle &a, row_handle &b) ...@@ -1716,7 +1739,7 @@ void category::swap_item(uint16_t column_ix, row_handle &a, row_handle &b)
std::swap(ra.at(column_ix), rb.at(column_ix)); std::swap(ra.at(column_ix), rb.at(column_ix));
} }
void category::sort(std::function<int(row_handle,row_handle)> f) void category::sort(std::function<int(row_handle, row_handle)> f)
{ {
if (m_head == nullptr) if (m_head == nullptr)
return; return;
...@@ -1740,7 +1763,7 @@ void category::sort(std::function<int(row_handle,row_handle)> f) ...@@ -1740,7 +1763,7 @@ void category::sort(std::function<int(row_handle,row_handle)> f)
r->m_next = nullptr; r->m_next = nullptr;
assert(r == m_tail); assert(r == m_tail);
assert(size() == rows.size()); assert(size() == rows.size());
} }
void category::reorder_by_index() void category::reorder_by_index()
...@@ -1903,8 +1926,8 @@ void category::write(std::ostream &os, const std::vector<uint16_t> &order, bool ...@@ -1903,8 +1926,8 @@ void category::write(std::ostream &os, const std::vector<uint16_t> &order, bool
{ {
auto &col = m_columns[cix]; auto &col = m_columns[cix];
right_aligned[cix] = col.m_validator != nullptr and right_aligned[cix] = col.m_validator != nullptr and
col.m_validator->m_type != nullptr and col.m_validator->m_type != nullptr and
col.m_validator->m_type->m_primitive_type == cif::DDL_PrimitiveType::Numb; col.m_validator->m_type->m_primitive_type == cif::DDL_PrimitiveType::Numb;
} }
} }
...@@ -2062,32 +2085,37 @@ void category::write(std::ostream &os, const std::vector<uint16_t> &order, bool ...@@ -2062,32 +2085,37 @@ void category::write(std::ostream &os, const std::vector<uint16_t> &order, bool
bool category::operator==(const category &rhs) const bool category::operator==(const category &rhs) const
{ {
// shortcut
if (this == &rhs)
return true;
auto &a = *this; auto &a = *this;
auto &b = rhs; auto &b = rhs;
using namespace std::placeholders; using namespace std::placeholders;
// set<std::string> tagsA(a.fields()), tagsB(b.fields()); // set<std::string> tagsA(a.fields()), tagsB(b.fields());
// //
// if (tagsA != tagsB) // if (tagsA != tagsB)
// std::cout << "Unequal number of fields\n"; // std::cout << "Unequal number of fields\n";
const category_validator *catValidator = nullptr; const category_validator *catValidator = nullptr;
auto validator = a.get_validator(); auto validator = a.get_validator();
if (validator != nullptr) if (validator != nullptr)
catValidator = validator->get_validator_for_category(a.name()); catValidator = validator->get_validator_for_category(a.name());
typedef std::function<int(std::string_view,std::string_view)> compType; typedef std::function<int(std::string_view, std::string_view)> compType;
std::vector<std::tuple<std::string,compType>> tags; std::vector<std::tuple<std::string, compType>> tags;
std::vector<std::string> keys; std::vector<std::string> keys;
std::vector<size_t> keyIx; std::vector<size_t> keyIx;
if (catValidator == nullptr) if (catValidator == nullptr)
{ {
for (auto& tag: a.get_columns()) for (auto &tag : a.get_columns())
{ {
tags.push_back(std::make_tuple(tag, [](std::string_view va, std::string_view vb) { return va.compare(vb); })); tags.push_back(std::make_tuple(tag, [](std::string_view va, std::string_view vb)
{ return va.compare(vb); }));
keyIx.push_back(keys.size()); keyIx.push_back(keys.size());
keys.push_back(tag); keys.push_back(tag);
} }
...@@ -2096,7 +2124,7 @@ bool category::operator==(const category &rhs) const ...@@ -2096,7 +2124,7 @@ bool category::operator==(const category &rhs) const
{ {
keys = catValidator->m_keys; keys = catValidator->m_keys;
for (auto& tag: a.key_fields()) for (auto &tag : a.key_fields())
{ {
auto iv = catValidator->get_validator_for_item(tag); auto iv = catValidator->get_validator_for_item(tag);
if (iv == nullptr) if (iv == nullptr)
...@@ -2105,25 +2133,28 @@ bool category::operator==(const category &rhs) const ...@@ -2105,25 +2133,28 @@ bool category::operator==(const category &rhs) const
if (tv == nullptr) if (tv == nullptr)
throw std::runtime_error("missing type validator"); throw std::runtime_error("missing type validator");
tags.push_back(std::make_tuple(tag, std::bind(&cif::type_validator::compare, tv, std::placeholders::_1, std::placeholders::_2))); tags.push_back(std::make_tuple(tag, std::bind(&cif::type_validator::compare, tv, std::placeholders::_1, std::placeholders::_2)));
auto pred = [tag](const std::string& s) -> bool { return cif::iequals(tag, s) == 0; }; auto pred = [tag](const std::string &s) -> bool
{
return cif::iequals(tag, s) == 0;
};
if (find_if(keys.begin(), keys.end(), pred) == keys.end()) if (find_if(keys.begin(), keys.end(), pred) == keys.end())
keyIx.push_back(tags.size() - 1); keyIx.push_back(tags.size() - 1);
} }
} }
// a.reorderByIndex(); // a.reorderByIndex();
// b.reorderByIndex(); // b.reorderByIndex();
auto rowEqual = [&](const row_handle& a, const row_handle& b) auto rowEqual = [&](const row_handle &a, const row_handle &b)
{ {
int d = 0; int d = 0;
for (auto kix: keyIx) for (auto kix : keyIx)
{ {
std::string tag; std::string tag;
compType compare; compType compare;
std::tie(tag, compare) = tags[kix]; std::tie(tag, compare) = tags[kix];
d = compare(a[tag].text(), b[tag].text()); d = compare(a[tag].text(), b[tag].text());
...@@ -2131,7 +2162,7 @@ bool category::operator==(const category &rhs) const ...@@ -2131,7 +2162,7 @@ bool category::operator==(const category &rhs) const
if (d != 0) if (d != 0)
break; break;
} }
return d == 0; return d == 0;
}; };
...@@ -2140,30 +2171,34 @@ bool category::operator==(const category &rhs) const ...@@ -2140,30 +2171,34 @@ bool category::operator==(const category &rhs) const
{ {
if (ai == a.end() or bi == b.end()) if (ai == a.end() or bi == b.end())
return false; return false;
auto ra = *ai, rb = *bi; auto ra = *ai, rb = *bi;
if (not rowEqual(ra, rb)) if (not rowEqual(ra, rb))
return false; return false;
std::vector<std::string> missingA, missingB, different; std::vector<std::string> missingA, missingB, different;
for (auto& tt: tags) for (auto &tt : tags)
{ {
std::string tag; std::string tag;
compType compare; compType compare;
std::tie(tag, compare) = tt; std::tie(tag, compare) = tt;
// make it an option to compare unapplicable to empty or something // make it an option to compare unapplicable to empty or something
auto ta = ra[tag].text(); if (ta == "." or ta == "?") ta = ""; auto ta = ra[tag].text();
auto tb = rb[tag].text(); if (tb == "." or tb == "?") tb = ""; if (ta == "." or ta == "?")
ta = "";
auto tb = rb[tag].text();
if (tb == "." or tb == "?")
tb = "";
if (compare(ta, tb) != 0) if (compare(ta, tb) != 0)
return false; return false;
} }
++ai; ++ai;
++bi; ++bi;
} }
......
...@@ -374,6 +374,10 @@ void datablock::write(std::ostream &os, const std::vector<std::string> &tag_orde ...@@ -374,6 +374,10 @@ void datablock::write(std::ostream &os, const std::vector<std::string> &tag_orde
bool datablock::operator==(const datablock &rhs) const bool datablock::operator==(const datablock &rhs) const
{ {
// shortcut
if (this == &rhs)
return true;
auto &dbA = *this; auto &dbA = *this;
auto &dbB = rhs; auto &dbB = rhs;
......
...@@ -117,8 +117,8 @@ void fixNegativeSeqID(category &atom_site) ...@@ -117,8 +117,8 @@ void fixNegativeSeqID(category &atom_site)
const auto &[auth_seq_id, label_seq_id] = poly_seq.front(); const auto &[auth_seq_id, label_seq_id] = poly_seq.front();
for (auto row : atom_site.find(key("label_asym_id") == asym_id and for (auto row : atom_site.find(key("label_asym_id") == asym_id and
key("auth_seq_id") == auth_seq_id and key("auth_seq_id") == auth_seq_id and
key("label_seq_id") == label_seq_id)) key("label_seq_id") == label_seq_id))
{ {
row.assign("label_seq_id", ".", false, false); row.assign("label_seq_id", ".", false, false);
} }
...@@ -236,20 +236,19 @@ void checkAtomRecords(datablock &db) ...@@ -236,20 +236,19 @@ void checkAtomRecords(datablock &db)
{ "auth_seq_id", auth_seq_id.value_or(std::to_string(*label_seq_id)) }, { "auth_seq_id", auth_seq_id.value_or(std::to_string(*label_seq_id)) },
{ "auth_comp_id", auth_comp_id.value_or(*label_comp_id) }, { "auth_comp_id", auth_comp_id.value_or(*label_comp_id) },
{ "auth_atom_id", auth_atom_id.value_or(*label_atom_id) } }); { "auth_atom_id", auth_atom_id.value_or(*label_atom_id) } });
// Rewrite the coordinates and other fields that look better in a fixed format // Rewrite the coordinates and other fields that look better in a fixed format
// Be careful not to nuke invalidly formatted data here // Be careful not to nuke invalidly formatted data here
for (auto [tag, prec] : std::vector<std::tuple<std::string_view,std::string::size_type>>{ for (auto [tag, prec] : std::vector<std::tuple<std::string_view, std::string::size_type>>{
{ "cartn_x", 3 }, { "cartn_x", 3 },
{ "cartn_y", 3 }, { "cartn_y", 3 },
{ "cartn_z", 3 }, { "cartn_z", 3 },
{ "occupancy", 2 }, { "occupancy", 2 },
{ "b_iso_or_equiv", 2 } { "b_iso_or_equiv", 2 } })
})
{ {
if (row[tag].empty()) if (row[tag].empty())
continue; continue;
float v; float v;
auto s = row.get<std::string>(tag); auto s = row.get<std::string>(tag);
if (auto [ptr, ec] = cif::from_chars(s.data(), s.data() + s.length(), v); ec != std::errc()) if (auto [ptr, ec] = cif::from_chars(s.data(), s.data() + s.length(), v); ec != std::errc())
...@@ -260,8 +259,30 @@ void checkAtomRecords(datablock &db) ...@@ -260,8 +259,30 @@ void checkAtomRecords(datablock &db)
char b[12]; char b[12];
if (auto [ptr, ec] = cif::to_chars(b, b + sizeof(b), v, cif::chars_format::fixed, prec); ec == std::errc()) if (auto [ptr, ec] = cif::to_chars(b, b + sizeof(b), v, cif::chars_format::fixed, prec); ec == std::errc())
row.assign(tag, {b, static_cast<std::string::size_type>(ptr - b)}, false, false); row.assign(tag, { b, static_cast<std::string::size_type>(ptr - b) }, false, false);
}
}
}
auto *cv = atom_site.get_cat_validator();
if (cv)
{
// See if there are columns that are no longer known
for (auto tag : atom_site.get_columns())
{
if (cv->get_validator_for_item(tag) != nullptr)
continue;
auto r = atom_site.find_first(key(tag) != null);
if (not r)
{
if (cif::VERBOSE > 0)
std::clog << "Dropping unknown column " << tag << '\n';
atom_site.remove_column(tag);
} }
else if (cif::VERBOSE > 0)
std::clog << "Keeping unknown column " << std::quoted(tag) << " in atom_site since it is not empty\n";
} }
} }
} }
...@@ -607,14 +628,14 @@ void comparePolySeqSchemes(datablock &db) ...@@ -607,14 +628,14 @@ void comparePolySeqSchemes(datablock &db)
if (i == asym_ids_ndb.end() or *i != asym_id) if (i == asym_ids_ndb.end() or *i != asym_id)
asym_ids_ndb.insert(i, asym_id); asym_ids_ndb.insert(i, asym_id);
} }
for (auto asym_id : pdbx_poly_seq_scheme.rows<std::string>("asym_id")) for (auto asym_id : pdbx_poly_seq_scheme.rows<std::string>("asym_id"))
{ {
auto i = std::lower_bound(asym_ids_pdbx.begin(), asym_ids_pdbx.end(), asym_id); auto i = std::lower_bound(asym_ids_pdbx.begin(), asym_ids_pdbx.end(), asym_id);
if (i == asym_ids_pdbx.end() or *i != asym_id) if (i == asym_ids_pdbx.end() or *i != asym_id)
asym_ids_pdbx.insert(i, asym_id); asym_ids_pdbx.insert(i, asym_id);
} }
// If we have different Asym ID's assume the ndb is invalid. // If we have different Asym ID's assume the ndb is invalid.
if (asym_ids_ndb != asym_ids_pdbx) if (asym_ids_ndb != asym_ids_pdbx)
{ {
...@@ -632,7 +653,7 @@ void comparePolySeqSchemes(datablock &db) ...@@ -632,7 +653,7 @@ void comparePolySeqSchemes(datablock &db)
auto pdbx_range = pdbx_poly_seq_scheme.find(key("asym_id") == asym_id); auto pdbx_range = pdbx_poly_seq_scheme.find(key("asym_id") == asym_id);
for (auto ndb_i = ndb_range.begin(), pdbx_i = pdbx_range.begin(); for (auto ndb_i = ndb_range.begin(), pdbx_i = pdbx_range.begin();
ndb_i != ndb_range.end() or pdbx_i != pdbx_range.end(); ++ndb_i, ++pdbx_i) ndb_i != ndb_range.end() or pdbx_i != pdbx_range.end(); ++ndb_i, ++pdbx_i)
{ {
if (ndb_i == ndb_range.end() or pdbx_i == pdbx_range.end()) if (ndb_i == ndb_range.end() or pdbx_i == pdbx_range.end())
{ {
...@@ -662,6 +683,9 @@ void comparePolySeqSchemes(datablock &db) ...@@ -662,6 +683,9 @@ void comparePolySeqSchemes(datablock &db)
} }
} }
} }
if (ndb_poly_seq_scheme.empty())
db.erase(std::remove(db.begin(), db.end(), ndb_poly_seq_scheme), db.end());
} }
void reconstruct_pdbx(file &file, std::string_view dictionary) void reconstruct_pdbx(file &file, std::string_view dictionary)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment