Commit f4506438 by Maarten L. Hekkelman

Added remove column

parent fc14a655
...@@ -31,22 +31,22 @@ ...@@ -31,22 +31,22 @@
#include "cif++/condition.hpp" #include "cif++/condition.hpp"
#include "cif++/iterator.hpp" #include "cif++/iterator.hpp"
#include "cif++/row.hpp" #include "cif++/row.hpp"
#include "cif++/validate.hpp"
#include "cif++/text.hpp" #include "cif++/text.hpp"
#include "cif++/validate.hpp"
#include <array> #include <array>
/** \file category.hpp /** \file category.hpp
* Documentation for the cif::category class * Documentation for the cif::category class
* *
* The category class should meet the requirements of Container and * The category class should meet the requirements of Container and
* SequenceContainer. * SequenceContainer.
* *
* TODO: implement all of: * TODO: implement all of:
* https://en.cppreference.com/w/cpp/named_req/Container * https://en.cppreference.com/w/cpp/named_req/Container
* https://en.cppreference.com/w/cpp/named_req/SequenceContainer * https://en.cppreference.com/w/cpp/named_req/SequenceContainer
* and more? * and more?
*/ */
namespace cif namespace cif
{ {
...@@ -61,9 +61,9 @@ namespace cif ...@@ -61,9 +61,9 @@ namespace cif
class duplicate_key_error : public std::runtime_error class duplicate_key_error : public std::runtime_error
{ {
public: public:
/** /**
* @brief Construct a new duplicate key error object * @brief Construct a new duplicate key error object
*/ */
duplicate_key_error(const std::string &msg) duplicate_key_error(const std::string &msg)
: std::runtime_error(msg) : std::runtime_error(msg)
{ {
...@@ -75,9 +75,9 @@ class duplicate_key_error : public std::runtime_error ...@@ -75,9 +75,9 @@ class duplicate_key_error : public std::runtime_error
class missing_key_error : public std::runtime_error class missing_key_error : public std::runtime_error
{ {
public: public:
/** /**
* @brief Construct a new duplicate key error object * @brief Construct a new duplicate key error object
*/ */
missing_key_error(const std::string &msg, const std::string &key) missing_key_error(const std::string &msg, const std::string &key)
: std::runtime_error(msg) : std::runtime_error(msg)
, m_key(key) , m_key(key)
...@@ -95,9 +95,9 @@ class missing_key_error : public std::runtime_error ...@@ -95,9 +95,9 @@ class missing_key_error : public std::runtime_error
class multiple_results_error : public std::runtime_error class multiple_results_error : public std::runtime_error
{ {
public: public:
/** /**
* @brief Construct a new multiple results error object * @brief Construct a new multiple results error object
*/ */
multiple_results_error() multiple_results_error()
: std::runtime_error("query should have returned exactly one row") : std::runtime_error("query should have returned exactly one row")
{ {
...@@ -156,8 +156,8 @@ class category ...@@ -156,8 +156,8 @@ class category
// -------------------------------------------------------------------- // --------------------------------------------------------------------
const std::string &name() const { return m_name; } ///< Returns the name of the category const std::string &name() const { return m_name; } ///< Returns the name of the category
iset key_fields() const; ///< Returns the cif::iset of key field names. Retrieved from the @ref category_validator for this category iset key_fields() const; ///< Returns the cif::iset of key field names. Retrieved from the @ref category_validator for this category
std::set<uint16_t> key_field_indices() const; ///< Returns a set of indices for the key fields. std::set<uint16_t> key_field_indices() const; ///< Returns a set of indices for the key fields.
/// @brief Set the validator for this category to @a v /// @brief Set the validator for this category to @a v
/// @param v The category_validator to assign. A nullptr value is allowed. /// @param v The category_validator to assign. A nullptr value is allowed.
...@@ -182,7 +182,7 @@ class category ...@@ -182,7 +182,7 @@ class category
/// @brief Validate links, that means, values in this category should have an /// @brief Validate links, that means, values in this category should have an
/// accompanying value in parent categories. /// accompanying value in parent categories.
/// ///
/// @note /// @note
/// The code makes one exception when validating missing links and that's between /// The code makes one exception when validating missing links and that's between
/// *atom_site* and a parent *pdbx_poly_seq_scheme* or *entity_poly_seq*. /// *atom_site* and a parent *pdbx_poly_seq_scheme* or *entity_poly_seq*.
...@@ -285,7 +285,7 @@ class category ...@@ -285,7 +285,7 @@ class category
/// Return the theoretical maximum number or rows that can be stored /// Return the theoretical maximum number or rows that can be stored
size_t max_size() const size_t max_size() const
{ {
return std::numeric_limits<size_t>::max(); // this is a bit optimistic, I guess return std::numeric_limits<size_t>::max(); // this is a bit optimistic, I guess
} }
/// Return true if the category is empty /// Return true if the category is empty
...@@ -321,7 +321,7 @@ class category ...@@ -321,7 +321,7 @@ class category
/// @code{.cpp} /// @code{.cpp}
/// for (const auto &[name, value] : cat.rows<std::string,int>("item_name", "item_value")) /// for (const auto &[name, value] : cat.rows<std::string,int>("item_name", "item_value"))
/// std::cout << name << ": " << value << '\n'; /// std::cout << name << ": " << value << '\n';
/// @endcode /// @endcode
/// ///
/// @tparam Ts The types for the columns requested /// @tparam Ts The types for the columns requested
/// @param names The names for the columns requested /// @param names The names for the columns requested
...@@ -344,7 +344,7 @@ class category ...@@ -344,7 +344,7 @@ class category
/// ///
/// for (int id : cat.rows<int>("id")) /// for (int id : cat.rows<int>("id"))
/// std::cout << id << '\n'; /// std::cout << id << '\n';
/// @endcode /// @endcode
/// ///
/// @tparam Ts The types for the columns requested /// @tparam Ts The types for the columns requested
/// @param names The names for the columns requested /// @param names The names for the columns requested
...@@ -363,7 +363,7 @@ class category ...@@ -363,7 +363,7 @@ class category
/// @code{.cpp} /// @code{.cpp}
/// for (row_handle rh : cat.find(cif::key("first_name") == "John" and cif::key("last_name") == "Doe")) /// for (row_handle rh : cat.find(cif::key("first_name") == "John" and cif::key("last_name") == "Doe"))
/// .. // do something with rh /// .. // do something with rh
/// @endcode /// @endcode
/// ///
/// @param cond The condition for the query /// @param cond The condition for the query
/// @return A special iterator that loops over all elements that match. The iterator can be dereferenced /// @return A special iterator that loops over all elements that match. The iterator can be dereferenced
...@@ -417,7 +417,7 @@ class category ...@@ -417,7 +417,7 @@ class category
/// @code{.cpp} /// @code{.cpp}
/// for (const auto &[name, value] : cat.find<std::string,int>(cif::key("item_value") > 10, "item_name", "item_value")) /// for (const auto &[name, value] : cat.find<std::string,int>(cif::key("item_value") > 10, "item_name", "item_value"))
/// std::cout << name << ": " << value << '\n'; /// std::cout << name << ": " << value << '\n';
/// @endcode /// @endcode
/// ///
/// @param cond The condition for the query /// @param cond The condition for the query
/// @tparam Ts The types for the columns requested /// @tparam Ts The types for the columns requested
...@@ -776,8 +776,7 @@ class category ...@@ -776,8 +776,7 @@ class category
/// @brief Return whether a row exists that matches condition @a cond /// @brief Return whether a row exists that matches condition @a cond
/// @param cond The condition to match /// @param cond The condition to match
/// @return True if a row exists /// @return True if a row exists
[[deprecated("Use contains instead")]] [[deprecated("Use contains instead")]] bool exists(condition &&cond) const
bool exists(condition &&cond) const
{ {
return contains(std::move(cond)); return contains(std::move(cond));
} }
...@@ -875,7 +874,7 @@ class category ...@@ -875,7 +874,7 @@ class category
// insert_impl(pos, std::move(row)); // insert_impl(pos, std::move(row));
// } // }
/// Erase the row pointed to by @a pos and return the iterator to the /// Erase the row pointed to by @a pos and return the iterator to the
/// row following pos. /// row following pos.
iterator erase(iterator pos); iterator erase(iterator pos);
...@@ -941,7 +940,6 @@ class category ...@@ -941,7 +940,6 @@ class category
/// result is unique in the context of this category /// result is unique in the context of this category
std::string get_unique_id(std::function<std::string(int)> generator = cif::cif_id_for_number); std::string get_unique_id(std::function<std::string(int)> generator = cif::cif_id_for_number);
/// @brief Generate a new, unique ID based on a string prefix followed by a number /// @brief Generate a new, unique ID based on a string prefix followed by a number
/// @param prefix The string prefix /// @param prefix The string prefix
/// @return a new unique ID /// @return a new unique ID
...@@ -1038,6 +1036,11 @@ class category ...@@ -1038,6 +1036,11 @@ class category
return result; return result;
} }
/** @brief Remove column name @a colum_name
* @param column_name The column to be removed
*/
void remove_column(std::string_view column_name);
/// @brief Return whether a column with name @a name exists in this category /// @brief Return whether a column with name @a name exists in this category
/// @param name The name of the column /// @param name The name of the column
/// @return True if the column exists /// @return True if the column exists
...@@ -1082,11 +1085,10 @@ class category ...@@ -1082,11 +1085,10 @@ class category
void write(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyColumns) const; void write(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyColumns) const;
public: public:
/// friend function to make it possible to do: /// friend function to make it possible to do:
/// @code {.cpp} /// @code {.cpp}
/// std::cout << my_category; /// std::cout << my_category;
/// @endcode /// @endcode
friend std::ostream &operator<<(std::ostream &os, const category &cat) friend std::ostream &operator<<(std::ostream &os, const category &cat)
{ {
cat.write(os); cat.write(os);
......
...@@ -374,6 +374,10 @@ void datablock::write(std::ostream &os, const std::vector<std::string> &tag_orde ...@@ -374,6 +374,10 @@ void datablock::write(std::ostream &os, const std::vector<std::string> &tag_orde
bool datablock::operator==(const datablock &rhs) const bool datablock::operator==(const datablock &rhs) const
{ {
// shortcut
if (this == &rhs)
return true;
auto &dbA = *this; auto &dbA = *this;
auto &dbB = rhs; auto &dbB = rhs;
......
...@@ -117,8 +117,8 @@ void fixNegativeSeqID(category &atom_site) ...@@ -117,8 +117,8 @@ void fixNegativeSeqID(category &atom_site)
const auto &[auth_seq_id, label_seq_id] = poly_seq.front(); const auto &[auth_seq_id, label_seq_id] = poly_seq.front();
for (auto row : atom_site.find(key("label_asym_id") == asym_id and for (auto row : atom_site.find(key("label_asym_id") == asym_id and
key("auth_seq_id") == auth_seq_id and key("auth_seq_id") == auth_seq_id and
key("label_seq_id") == label_seq_id)) key("label_seq_id") == label_seq_id))
{ {
row.assign("label_seq_id", ".", false, false); row.assign("label_seq_id", ".", false, false);
} }
...@@ -236,20 +236,19 @@ void checkAtomRecords(datablock &db) ...@@ -236,20 +236,19 @@ void checkAtomRecords(datablock &db)
{ "auth_seq_id", auth_seq_id.value_or(std::to_string(*label_seq_id)) }, { "auth_seq_id", auth_seq_id.value_or(std::to_string(*label_seq_id)) },
{ "auth_comp_id", auth_comp_id.value_or(*label_comp_id) }, { "auth_comp_id", auth_comp_id.value_or(*label_comp_id) },
{ "auth_atom_id", auth_atom_id.value_or(*label_atom_id) } }); { "auth_atom_id", auth_atom_id.value_or(*label_atom_id) } });
// Rewrite the coordinates and other fields that look better in a fixed format // Rewrite the coordinates and other fields that look better in a fixed format
// Be careful not to nuke invalidly formatted data here // Be careful not to nuke invalidly formatted data here
for (auto [tag, prec] : std::vector<std::tuple<std::string_view,std::string::size_type>>{ for (auto [tag, prec] : std::vector<std::tuple<std::string_view, std::string::size_type>>{
{ "cartn_x", 3 }, { "cartn_x", 3 },
{ "cartn_y", 3 }, { "cartn_y", 3 },
{ "cartn_z", 3 }, { "cartn_z", 3 },
{ "occupancy", 2 }, { "occupancy", 2 },
{ "b_iso_or_equiv", 2 } { "b_iso_or_equiv", 2 } })
})
{ {
if (row[tag].empty()) if (row[tag].empty())
continue; continue;
float v; float v;
auto s = row.get<std::string>(tag); auto s = row.get<std::string>(tag);
if (auto [ptr, ec] = cif::from_chars(s.data(), s.data() + s.length(), v); ec != std::errc()) if (auto [ptr, ec] = cif::from_chars(s.data(), s.data() + s.length(), v); ec != std::errc())
...@@ -260,8 +259,30 @@ void checkAtomRecords(datablock &db) ...@@ -260,8 +259,30 @@ void checkAtomRecords(datablock &db)
char b[12]; char b[12];
if (auto [ptr, ec] = cif::to_chars(b, b + sizeof(b), v, cif::chars_format::fixed, prec); ec == std::errc()) if (auto [ptr, ec] = cif::to_chars(b, b + sizeof(b), v, cif::chars_format::fixed, prec); ec == std::errc())
row.assign(tag, {b, static_cast<std::string::size_type>(ptr - b)}, false, false); row.assign(tag, { b, static_cast<std::string::size_type>(ptr - b) }, false, false);
}
}
}
auto *cv = atom_site.get_cat_validator();
if (cv)
{
// See if there are columns that are no longer known
for (auto tag : atom_site.get_columns())
{
if (cv->get_validator_for_item(tag) != nullptr)
continue;
auto r = atom_site.find_first(key(tag) != null);
if (not r)
{
if (cif::VERBOSE > 0)
std::clog << "Dropping unknown column " << tag << '\n';
atom_site.remove_column(tag);
} }
else if (cif::VERBOSE > 0)
std::clog << "Keeping unknown column " << std::quoted(tag) << " in atom_site since it is not empty\n";
} }
} }
} }
...@@ -607,14 +628,14 @@ void comparePolySeqSchemes(datablock &db) ...@@ -607,14 +628,14 @@ void comparePolySeqSchemes(datablock &db)
if (i == asym_ids_ndb.end() or *i != asym_id) if (i == asym_ids_ndb.end() or *i != asym_id)
asym_ids_ndb.insert(i, asym_id); asym_ids_ndb.insert(i, asym_id);
} }
for (auto asym_id : pdbx_poly_seq_scheme.rows<std::string>("asym_id")) for (auto asym_id : pdbx_poly_seq_scheme.rows<std::string>("asym_id"))
{ {
auto i = std::lower_bound(asym_ids_pdbx.begin(), asym_ids_pdbx.end(), asym_id); auto i = std::lower_bound(asym_ids_pdbx.begin(), asym_ids_pdbx.end(), asym_id);
if (i == asym_ids_pdbx.end() or *i != asym_id) if (i == asym_ids_pdbx.end() or *i != asym_id)
asym_ids_pdbx.insert(i, asym_id); asym_ids_pdbx.insert(i, asym_id);
} }
// If we have different Asym ID's assume the ndb is invalid. // If we have different Asym ID's assume the ndb is invalid.
if (asym_ids_ndb != asym_ids_pdbx) if (asym_ids_ndb != asym_ids_pdbx)
{ {
...@@ -632,7 +653,7 @@ void comparePolySeqSchemes(datablock &db) ...@@ -632,7 +653,7 @@ void comparePolySeqSchemes(datablock &db)
auto pdbx_range = pdbx_poly_seq_scheme.find(key("asym_id") == asym_id); auto pdbx_range = pdbx_poly_seq_scheme.find(key("asym_id") == asym_id);
for (auto ndb_i = ndb_range.begin(), pdbx_i = pdbx_range.begin(); for (auto ndb_i = ndb_range.begin(), pdbx_i = pdbx_range.begin();
ndb_i != ndb_range.end() or pdbx_i != pdbx_range.end(); ++ndb_i, ++pdbx_i) ndb_i != ndb_range.end() or pdbx_i != pdbx_range.end(); ++ndb_i, ++pdbx_i)
{ {
if (ndb_i == ndb_range.end() or pdbx_i == pdbx_range.end()) if (ndb_i == ndb_range.end() or pdbx_i == pdbx_range.end())
{ {
...@@ -662,6 +683,9 @@ void comparePolySeqSchemes(datablock &db) ...@@ -662,6 +683,9 @@ void comparePolySeqSchemes(datablock &db)
} }
} }
} }
if (ndb_poly_seq_scheme.empty())
db.erase(std::remove(db.begin(), db.end(), ndb_poly_seq_scheme), db.end());
} }
void reconstruct_pdbx(file &file, std::string_view dictionary) void reconstruct_pdbx(file &file, std::string_view dictionary)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment