Commit 30a2ebdb by Maarten L. Hekkelman

Merge remote-tracking branch 'github/develop-cif2fasta' into develop

parents 13ab1caf a5d43998
...@@ -169,7 +169,16 @@ class datablock : public std::list<category> ...@@ -169,7 +169,16 @@ class datablock : public std::list<category>
/** /**
* @brief Get the preferred order of the categories when writing them * @brief Get the preferred order of the categories when writing them
*/ */
std::vector<std::string> get_tag_order() const; [[deprecated("use get_item_order instead")]]
std::vector<std::string> get_tag_order() const
{
return get_item_order();
}
/**
* @brief Get the preferred order of the categories when writing them
*/
std::vector<std::string> get_item_order() const;
/** /**
* @brief Write out the contents to @a os * @brief Write out the contents to @a os
...@@ -177,9 +186,9 @@ class datablock : public std::list<category> ...@@ -177,9 +186,9 @@ class datablock : public std::list<category>
void write(std::ostream &os) const; void write(std::ostream &os) const;
/** /**
* @brief Write out the contents to @a os using the order defined in @a tag_order * @brief Write out the contents to @a os using the order defined in @a item_name_order
*/ */
void write(std::ostream &os, const std::vector<std::string> &tag_order); void write(std::ostream &os, const std::vector<std::string> &item_name_order);
/** /**
* @brief Friend operator<< to write datablock @a db to std::ostream @a os * @brief Friend operator<< to write datablock @a db to std::ostream @a os
......
...@@ -44,7 +44,7 @@ ...@@ -44,7 +44,7 @@
/** \file item.hpp /** \file item.hpp
* *
* This file contains the declaration of item but also the item_value and item_handle * This file contains the declaration of item but also the item_value and item_handle
* These handle the storage of and access to the data for a single data field. * These handle the storage of and access to the data for a single data item.
*/ */
namespace cif namespace cif
...@@ -227,10 +227,10 @@ class item ...@@ -227,10 +227,10 @@ class item
/// \brief empty means either null or unknown /// \brief empty means either null or unknown
bool empty() const { return m_value.empty(); } bool empty() const { return m_value.empty(); }
/// \brief returns true if the field contains '.' /// \brief returns true if the item contains '.'
bool is_null() const { return m_value == "."; } bool is_null() const { return m_value == "."; }
/// \brief returns true if the field contains '?' /// \brief returns true if the item contains '?'
bool is_unknown() const { return m_value == "?"; } bool is_unknown() const { return m_value == "?"; }
/// \brief the length of the value string /// \brief the length of the value string
...@@ -464,14 +464,14 @@ struct item_handle ...@@ -464,14 +464,14 @@ struct item_handle
/** Easy way to test for an empty item */ /** Easy way to test for an empty item */
explicit operator bool() const { return not empty(); } explicit operator bool() const { return not empty(); }
/// is_null return true if the field contains '.' /// is_null return true if the item contains '.'
bool is_null() const bool is_null() const
{ {
auto txt = text(); auto txt = text();
return txt.length() == 1 and txt.front() == '.'; return txt.length() == 1 and txt.front() == '.';
} }
/// is_unknown returns true if the field contains '?' /// is_unknown returns true if the item contains '?'
bool is_unknown() const bool is_unknown() const
{ {
auto txt = text(); auto txt = text();
...@@ -484,11 +484,11 @@ struct item_handle ...@@ -484,11 +484,11 @@ struct item_handle
/** /**
* @brief Construct a new item handle object * @brief Construct a new item handle object
* *
* @param column Column index * @param item Item index
* @param row Reference to the row * @param row Reference to the row
*/ */
item_handle(uint16_t column, row_handle &row) item_handle(uint16_t item, row_handle &row)
: m_column(column) : m_item_ix(item)
, m_row_handle(row) , m_row_handle(row)
{ {
} }
...@@ -505,7 +505,7 @@ struct item_handle ...@@ -505,7 +505,7 @@ struct item_handle
private: private:
item_handle(); item_handle();
uint16_t m_column; uint16_t m_item_ix;
row_handle &m_row_handle; row_handle &m_row_handle;
void assign_value(const item &value); void assign_value(const item &value);
......
...@@ -90,7 +90,7 @@ class iterator_impl ...@@ -90,7 +90,7 @@ class iterator_impl
: m_category(rhs.m_category) : m_category(rhs.m_category)
, m_current(rhs.m_current) , m_current(rhs.m_current)
, m_value(rhs.m_value) , m_value(rhs.m_value)
, m_column_ix(rhs.m_column_ix) , m_item_ix(rhs.m_item_ix)
{ {
} }
...@@ -99,7 +99,7 @@ class iterator_impl ...@@ -99,7 +99,7 @@ class iterator_impl
: m_category(rhs.m_category) : m_category(rhs.m_category)
, m_current(const_cast<row_type *>(rhs.m_current)) , m_current(const_cast<row_type *>(rhs.m_current))
, m_value(rhs.m_value) , m_value(rhs.m_value)
, m_column_ix(rhs.m_column_ix) , m_item_ix(rhs.m_item_ix)
{ {
m_value = get(std::make_index_sequence<N>()); m_value = get(std::make_index_sequence<N>());
} }
...@@ -108,7 +108,7 @@ class iterator_impl ...@@ -108,7 +108,7 @@ class iterator_impl
iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<uint16_t, N> &cix) iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<uint16_t, N> &cix)
: m_category(rhs.m_category) : m_category(rhs.m_category)
, m_current(rhs.m_current) , m_current(rhs.m_current)
, m_column_ix(cix) , m_item_ix(cix)
{ {
m_value = get(std::make_index_sequence<N>()); m_value = get(std::make_index_sequence<N>());
} }
...@@ -117,7 +117,7 @@ class iterator_impl ...@@ -117,7 +117,7 @@ class iterator_impl
{ {
m_category = i.m_category; m_category = i.m_category;
m_current = i.m_current; m_current = i.m_current;
m_column_ix = i.m_column_ix; m_item_ix = i.m_item_ix;
m_value = i.m_value; m_value = i.m_value;
return *this; return *this;
} }
...@@ -185,7 +185,7 @@ class iterator_impl ...@@ -185,7 +185,7 @@ class iterator_impl
if (m_current != nullptr) if (m_current != nullptr)
{ {
row_handle rh{ *m_category, *m_current }; row_handle rh{ *m_category, *m_current };
return tuple_type{ rh[m_column_ix[Is]].template as<Ts>()... }; return tuple_type{ rh[m_item_ix[Is]].template as<Ts>()... };
} }
return {}; return {};
...@@ -194,7 +194,7 @@ class iterator_impl ...@@ -194,7 +194,7 @@ class iterator_impl
category_type *m_category = nullptr; category_type *m_category = nullptr;
row_type *m_current = nullptr; row_type *m_current = nullptr;
value_type m_value; value_type m_value;
std::array<uint16_t, N> m_column_ix; std::array<uint16_t, N> m_item_ix;
}; };
/** /**
...@@ -348,7 +348,7 @@ class iterator_impl<Category, T> ...@@ -348,7 +348,7 @@ class iterator_impl<Category, T>
: m_category(rhs.m_category) : m_category(rhs.m_category)
, m_current(rhs.m_current) , m_current(rhs.m_current)
, m_value(rhs.m_value) , m_value(rhs.m_value)
, m_column_ix(rhs.m_column_ix) , m_item_ix(rhs.m_item_ix)
{ {
} }
...@@ -357,7 +357,7 @@ class iterator_impl<Category, T> ...@@ -357,7 +357,7 @@ class iterator_impl<Category, T>
: m_category(rhs.m_category) : m_category(rhs.m_category)
, m_current(const_cast<row_type *>(rhs.m_current)) , m_current(const_cast<row_type *>(rhs.m_current))
, m_value(rhs.m_value) , m_value(rhs.m_value)
, m_column_ix(rhs.m_column_ix) , m_item_ix(rhs.m_item_ix)
{ {
m_value = get(m_current); m_value = get(m_current);
} }
...@@ -366,7 +366,7 @@ class iterator_impl<Category, T> ...@@ -366,7 +366,7 @@ class iterator_impl<Category, T>
iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<uint16_t, 1> &cix) iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<uint16_t, 1> &cix)
: m_category(rhs.m_category) : m_category(rhs.m_category)
, m_current(rhs.m_current) , m_current(rhs.m_current)
, m_column_ix(cix[0]) , m_item_ix(cix[0])
{ {
m_value = get(); m_value = get();
} }
...@@ -375,7 +375,7 @@ class iterator_impl<Category, T> ...@@ -375,7 +375,7 @@ class iterator_impl<Category, T>
{ {
m_category = i.m_category; m_category = i.m_category;
m_current = i.m_current; m_current = i.m_current;
m_column_ix = i.m_column_ix; m_item_ix = i.m_item_ix;
m_value = i.m_value; m_value = i.m_value;
return *this; return *this;
} }
...@@ -442,7 +442,7 @@ class iterator_impl<Category, T> ...@@ -442,7 +442,7 @@ class iterator_impl<Category, T>
if (m_current != nullptr) if (m_current != nullptr)
{ {
row_handle rh{ *m_category, *m_current }; row_handle rh{ *m_category, *m_current };
return rh[m_column_ix].template as<T>(); return rh[m_item_ix].template as<T>();
} }
return {}; return {};
...@@ -451,7 +451,7 @@ class iterator_impl<Category, T> ...@@ -451,7 +451,7 @@ class iterator_impl<Category, T>
category_type *m_category = nullptr; category_type *m_category = nullptr;
row_type *m_current = nullptr; row_type *m_current = nullptr;
value_type m_value; value_type m_value;
uint16_t m_column_ix; uint16_t m_item_ix;
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -482,8 +482,8 @@ class iterator_proxy ...@@ -482,8 +482,8 @@ class iterator_proxy
using iterator = iterator_impl<category_type, Ts...>; using iterator = iterator_impl<category_type, Ts...>;
using row_iterator = iterator_impl<category_type>; using row_iterator = iterator_impl<category_type>;
iterator_proxy(category_type &cat, row_iterator pos, char const *const columns[N]); iterator_proxy(category_type &cat, row_iterator pos, char const *const items[N]);
iterator_proxy(category_type &cat, row_iterator pos, std::initializer_list<char const *> columns); iterator_proxy(category_type &cat, row_iterator pos, std::initializer_list<char const *> items);
iterator_proxy(iterator_proxy &&p); iterator_proxy(iterator_proxy &&p);
iterator_proxy &operator=(iterator_proxy &&p); iterator_proxy &operator=(iterator_proxy &&p);
...@@ -492,8 +492,8 @@ class iterator_proxy ...@@ -492,8 +492,8 @@ class iterator_proxy
iterator_proxy &operator=(const iterator_proxy &) = delete; iterator_proxy &operator=(const iterator_proxy &) = delete;
/** @endcond */ /** @endcond */
iterator begin() const { return iterator(m_begin, m_column_ix); } ///< Return the iterator pointing to the first row iterator begin() const { return iterator(m_begin, m_item_ix); } ///< Return the iterator pointing to the first row
iterator end() const { return iterator(m_end, m_column_ix); } ///< Return the iterator pointing past the last row iterator end() const { return iterator(m_end, m_item_ix); } ///< Return the iterator pointing past the last row
bool empty() const { return m_begin == m_end; } ///< Return true if the range is empty bool empty() const { return m_begin == m_end; } ///< Return true if the range is empty
explicit operator bool() const { return not empty(); } ///< Easy way to detect if the range is empty explicit operator bool() const { return not empty(); } ///< Easy way to detect if the range is empty
...@@ -510,13 +510,13 @@ class iterator_proxy ...@@ -510,13 +510,13 @@ class iterator_proxy
std::swap(m_category, rhs.m_category); std::swap(m_category, rhs.m_category);
std::swap(m_begin, rhs.m_begin); std::swap(m_begin, rhs.m_begin);
std::swap(m_end, rhs.m_end); std::swap(m_end, rhs.m_end);
std::swap(m_column_ix, rhs.m_column_ix); std::swap(m_item_ix, rhs.m_item_ix);
} }
private: private:
category_type *m_category; category_type *m_category;
row_iterator m_begin, m_end; row_iterator m_begin, m_end;
std::array<uint16_t, N> m_column_ix; std::array<uint16_t, N> m_item_ix;
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -651,26 +651,26 @@ class conditional_iterator_proxy ...@@ -651,26 +651,26 @@ class conditional_iterator_proxy
/** @cond */ /** @cond */
template <typename Category, typename... Ts> template <typename Category, typename... Ts>
iterator_proxy<Category, Ts...>::iterator_proxy(Category &cat, row_iterator pos, char const *const columns[N]) iterator_proxy<Category, Ts...>::iterator_proxy(Category &cat, row_iterator pos, char const *const items[N])
: m_category(&cat) : m_category(&cat)
, m_begin(pos) , m_begin(pos)
, m_end(cat.end()) , m_end(cat.end())
{ {
for (uint16_t i = 0; i < N; ++i) for (uint16_t i = 0; i < N; ++i)
m_column_ix[i] = m_category->get_column_ix(columns[i]); m_item_ix[i] = m_category->get_item_ix(items[i]);
} }
template <typename Category, typename... Ts> template <typename Category, typename... Ts>
iterator_proxy<Category, Ts...>::iterator_proxy(Category &cat, row_iterator pos, std::initializer_list<char const *> columns) iterator_proxy<Category, Ts...>::iterator_proxy(Category &cat, row_iterator pos, std::initializer_list<char const *> items)
: m_category(&cat) : m_category(&cat)
, m_begin(pos) , m_begin(pos)
, m_end(cat.end()) , m_end(cat.end())
{ {
// static_assert(columns.size() == N, "The list of column names should be exactly the same as the list of requested columns"); // static_assert(items.size() == N, "The list of item names should be exactly the same as the list of requested items");
std::uint16_t i = 0; std::uint16_t i = 0;
for (auto column : columns) for (auto item : items)
m_column_ix[i++] = m_category->get_column_ix(column); m_item_ix[i++] = m_category->get_item_ix(item);
} }
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -707,7 +707,7 @@ conditional_iterator_proxy<Category, Ts...>::conditional_iterator_proxy(Category ...@@ -707,7 +707,7 @@ conditional_iterator_proxy<Category, Ts...>::conditional_iterator_proxy(Category
, mCBegin(pos) , mCBegin(pos)
, mCEnd(cat.end()) , mCEnd(cat.end())
{ {
static_assert(sizeof...(Ts) == sizeof...(Ns), "Number of column names should be equal to number of requested value types"); static_assert(sizeof...(Ts) == sizeof...(Ns), "Number of item names should be equal to number of requested value types");
if (m_condition) if (m_condition)
{ {
...@@ -720,7 +720,7 @@ conditional_iterator_proxy<Category, Ts...>::conditional_iterator_proxy(Category ...@@ -720,7 +720,7 @@ conditional_iterator_proxy<Category, Ts...>::conditional_iterator_proxy(Category
mCBegin = mCEnd; mCBegin = mCEnd;
uint16_t i = 0; uint16_t i = 0;
((mCix[i++] = m_cat->get_column_ix(names)), ...); ((mCix[i++] = m_cat->get_item_ix(names)), ...);
} }
template <typename Category, typename... Ts> template <typename Category, typename... Ts>
......
...@@ -72,7 +72,7 @@ class structure; ...@@ -72,7 +72,7 @@ class structure;
* *
* The class atom is a kind of flyweight class. It can be copied * The class atom is a kind of flyweight class. It can be copied
* with low overhead. All data is stored in the underlying mmCIF * with low overhead. All data is stored in the underlying mmCIF
* categories but some very often used fields are cached in the * categories but some very often used items are cached in the
* impl. * impl.
* *
* It is also possible to have symmetry copies of atoms. They * It is also possible to have symmetry copies of atoms. They
...@@ -207,7 +207,7 @@ class atom ...@@ -207,7 +207,7 @@ class atom
/// \brief Copy assignement operator /// \brief Copy assignement operator
atom &operator=(const atom &rhs) = default; atom &operator=(const atom &rhs) = default;
/// \brief Return the field named @a name in the _atom_site category for this atom /// \brief Return the item named @a name in the _atom_site category for this atom
std::string get_property(std::string_view name) const std::string get_property(std::string_view name) const
{ {
if (not m_impl) if (not m_impl)
...@@ -215,7 +215,7 @@ class atom ...@@ -215,7 +215,7 @@ class atom
return m_impl->get_property(name); return m_impl->get_property(name);
} }
/// \brief Return the field named @a name in the _atom_site category for this atom cast to an int /// \brief Return the item named @a name in the _atom_site category for this atom cast to an int
int get_property_int(std::string_view name) const int get_property_int(std::string_view name) const
{ {
if (not m_impl) if (not m_impl)
...@@ -223,7 +223,7 @@ class atom ...@@ -223,7 +223,7 @@ class atom
return m_impl->get_property_int(name); return m_impl->get_property_int(name);
} }
/// \brief Return the field named @a name in the _atom_site category for this atom cast to a float /// \brief Return the item named @a name in the _atom_site category for this atom cast to a float
float get_property_float(std::string_view name) const float get_property_float(std::string_view name) const
{ {
if (not m_impl) if (not m_impl)
...@@ -231,7 +231,7 @@ class atom ...@@ -231,7 +231,7 @@ class atom
return m_impl->get_property_float(name); return m_impl->get_property_float(name);
} }
/// \brief Set value for the field named @a name in the _atom_site category to @a value /// \brief Set value for the item named @a name in the _atom_site category to @a value
void set_property(const std::string_view name, const std::string &value) void set_property(const std::string_view name, const std::string &value)
{ {
if (not m_impl) if (not m_impl)
...@@ -239,7 +239,7 @@ class atom ...@@ -239,7 +239,7 @@ class atom
m_impl->set_property(name, value); m_impl->set_property(name, value);
} }
/// \brief Set value for the field named @a name in the _atom_site category to @a value /// \brief Set value for the item named @a name in the _atom_site category to @a value
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0> template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
void set_property(const std::string_view name, const T &value) void set_property(const std::string_view name, const T &value)
{ {
...@@ -730,7 +730,7 @@ class sugar : public residue ...@@ -730,7 +730,7 @@ class sugar : public residue
/** /**
* @brief Return the sugar number in the glycosylation tree * @brief Return the sugar number in the glycosylation tree
* *
* To store the sugar number, the auth_seq_id field has been overloaded * To store the sugar number, the auth_seq_id item has been overloaded
* in the specification. But since a sugar number should be, ehm, a number * in the specification. But since a sugar number should be, ehm, a number
* and auth_seq_id is specified to contain a string, we do a check here * and auth_seq_id is specified to contain a string, we do a check here
* to see if it really is a number. * to see if it really is a number.
......
...@@ -143,9 +143,9 @@ class sac_parser ...@@ -143,9 +143,9 @@ class sac_parser
enum class CIFToken enum class CIFToken
{ {
Unknown, UNKNOWN,
Eof, END_OF_FILE,
DATA, DATA,
LOOP, LOOP,
...@@ -153,24 +153,24 @@ class sac_parser ...@@ -153,24 +153,24 @@ class sac_parser
SAVE_, SAVE_,
SAVE_NAME, SAVE_NAME,
STOP, STOP,
Tag, ITEM_NAME,
Value VALUE
}; };
static constexpr const char *get_token_name(CIFToken token) static constexpr const char *get_token_name(CIFToken token)
{ {
switch (token) switch (token)
{ {
case CIFToken::Unknown: return "Unknown"; case CIFToken::UNKNOWN: return "Unknown";
case CIFToken::Eof: return "Eof"; case CIFToken::END_OF_FILE: return "Eof";
case CIFToken::DATA: return "DATA"; case CIFToken::DATA: return "DATA";
case CIFToken::LOOP: return "LOOP"; case CIFToken::LOOP: return "LOOP";
case CIFToken::GLOBAL: return "GLOBAL"; case CIFToken::GLOBAL: return "GLOBAL";
case CIFToken::SAVE_: return "SAVE"; case CIFToken::SAVE_: return "SAVE";
case CIFToken::SAVE_NAME: return "SAVE+name"; case CIFToken::SAVE_NAME: return "SAVE+name";
case CIFToken::STOP: return "STOP"; case CIFToken::STOP: return "STOP";
case CIFToken::Tag: return "Tag"; case CIFToken::ITEM_NAME: return "Tag";
case CIFToken::Value: return "Value"; case CIFToken::VALUE: return "Value";
default: return "Invalid token parameter"; default: return "Invalid token parameter";
} }
} }
...@@ -267,9 +267,9 @@ class sac_parser ...@@ -267,9 +267,9 @@ class sac_parser
QuotedString, QuotedString,
QuotedStringQuote, QuotedStringQuote,
UnquotedString, UnquotedString,
Tag, ItemName,
TextField, TextItem,
TextFieldNL, TextItemNL,
Reserved, Reserved,
Value Value
}; };
......
...@@ -28,6 +28,8 @@ ...@@ -28,6 +28,8 @@
#include "cif++/file.hpp" #include "cif++/file.hpp"
#include <system_error>
/** /**
* @file pdb.hpp * @file pdb.hpp
* *
...@@ -119,6 +121,8 @@ void reconstruct_pdbx(file &pdbx_file, std::string_view dictionary = "mmcif_pdbx ...@@ -119,6 +121,8 @@ void reconstruct_pdbx(file &pdbx_file, std::string_view dictionary = "mmcif_pdbx
* atom_site -> pdbx_poly_seq_scheme -> entity_poly_seq -> entity_poly -> entity * atom_site -> pdbx_poly_seq_scheme -> entity_poly_seq -> entity_poly -> entity
* *
* Use the common \ref cif::VERBOSE flag to turn on diagnostic messages. * Use the common \ref cif::VERBOSE flag to turn on diagnostic messages.
*
* This function throws a std::system_error in case of an error
* *
* \param file The input file * \param file The input file
* \param dictionary The mmcif dictionary to use * \param dictionary The mmcif dictionary to use
...@@ -127,6 +131,43 @@ void reconstruct_pdbx(file &pdbx_file, std::string_view dictionary = "mmcif_pdbx ...@@ -127,6 +131,43 @@ void reconstruct_pdbx(file &pdbx_file, std::string_view dictionary = "mmcif_pdbx
bool is_valid_pdbx_file(const file &pdbx_file, std::string_view dictionary = "mmcif_pdbx"); bool is_valid_pdbx_file(const file &pdbx_file, std::string_view dictionary = "mmcif_pdbx");
/** \brief This is an extension to cif::validator, use the logic in common
* PDBx files to see if the file is internally consistent.
*
* This function for now checks if the following categories are consistent:
*
* atom_site -> pdbx_poly_seq_scheme -> entity_poly_seq -> entity_poly -> entity
*
* Use the common \ref cif::VERBOSE flag to turn on diagnostic messages.
*
* The dictionary is assumed to be specified in the file or to be the
* default mmcif_pdbx.dic dictionary.
*
* \param file The input file
* \param ec The error_code in case something was wrong
* \result Returns true if the file was valid and consistent
*/
bool is_valid_pdbx_file(const file &pdbx_file, std::error_code &ec);
/** \brief This is an extension to cif::validator, use the logic in common
* PDBx files to see if the file is internally consistent.
*
* This function for now checks if the following categories are consistent:
*
* atom_site -> pdbx_poly_seq_scheme -> entity_poly_seq -> entity_poly -> entity
*
* Use the common \ref cif::VERBOSE flag to turn on diagnostic messages.
*
* \param file The input file
* \param dictionary The dictionary to use
* \param ec The error_code in case something was wrong
* \result Returns true if the file was valid and consistent
*/
bool is_valid_pdbx_file(const file &pdbx_file, std::string_view dictionary,
std::error_code &ec);
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// Other I/O related routines // Other I/O related routines
......
...@@ -51,7 +51,7 @@ ...@@ -51,7 +51,7 @@
* std::string name = rh["label_atom_id"].as<std::string>(); * std::string name = rh["label_atom_id"].as<std::string>();
* *
* // by index: * // by index:
* uint16_t ix = atom_site.get_column_ix("label_atom_id"); * uint16_t ix = atom_site.get_item_ix("label_atom_id");
* assert(rh[ix].as<std::string() == name); * assert(rh[ix].as<std::string() == name);
* @endcode * @endcode
* *
...@@ -87,15 +87,15 @@ namespace detail ...@@ -87,15 +87,15 @@ namespace detail
{ {
static constexpr size_t N = sizeof...(C); static constexpr size_t N = sizeof...(C);
get_row_result(const row_handle &r, std::array<uint16_t, N> &&columns) get_row_result(const row_handle &r, std::array<uint16_t, N> &&items)
: m_row(r) : m_row(r)
, m_columns(std::move(columns)) , m_items(std::move(items))
{ {
} }
const item_handle operator[](uint16_t ix) const const item_handle operator[](uint16_t ix) const
{ {
return m_row[m_columns[ix]]; return m_row[m_items[ix]];
} }
template <typename... Ts, std::enable_if_t<N == sizeof...(Ts), int> = 0> template <typename... Ts, std::enable_if_t<N == sizeof...(Ts), int> = 0>
...@@ -107,11 +107,11 @@ namespace detail ...@@ -107,11 +107,11 @@ namespace detail
template <typename... Ts, size_t... Is> template <typename... Ts, size_t... Is>
std::tuple<Ts...> get(std::index_sequence<Is...>) const std::tuple<Ts...> get(std::index_sequence<Is...>) const
{ {
return std::tuple<Ts...>{ m_row[m_columns[Is]].template as<Ts>()... }; return std::tuple<Ts...>{ m_row[m_items[Is]].template as<Ts>()... };
} }
const row_handle &m_row; const row_handle &m_row;
std::array<uint16_t, N> m_columns; std::array<uint16_t, N> m_items;
}; };
// we want to be able to tie some variables to a get_row_result, for this we use tiewraps // we want to be able to tie some variables to a get_row_result, for this we use tiewraps
...@@ -244,70 +244,70 @@ class row_handle ...@@ -244,70 +244,70 @@ class row_handle
return not empty(); return not empty();
} }
/// \brief return a cif::item_handle to the item in column @a column_ix /// \brief return a cif::item_handle to the item in item @a item_ix
item_handle operator[](uint16_t column_ix) item_handle operator[](uint16_t item_ix)
{ {
return empty() ? item_handle::s_null_item : item_handle(column_ix, *this); return empty() ? item_handle::s_null_item : item_handle(item_ix, *this);
} }
/// \brief return a const cif::item_handle to the item in column @a column_ix /// \brief return a const cif::item_handle to the item in item @a item_ix
const item_handle operator[](uint16_t column_ix) const const item_handle operator[](uint16_t item_ix) const
{ {
return empty() ? item_handle::s_null_item : item_handle(column_ix, const_cast<row_handle &>(*this)); return empty() ? item_handle::s_null_item : item_handle(item_ix, const_cast<row_handle &>(*this));
} }
/// \brief return a cif::item_handle to the item in the column named @a column_name /// \brief return a cif::item_handle to the item in the item named @a item_name
item_handle operator[](std::string_view column_name) item_handle operator[](std::string_view item_name)
{ {
return empty() ? item_handle::s_null_item : item_handle(add_column(column_name), *this); return empty() ? item_handle::s_null_item : item_handle(add_item(item_name), *this);
} }
/// \brief return a const cif::item_handle to the item in the column named @a column_name /// \brief return a const cif::item_handle to the item in the item named @a item_name
const item_handle operator[](std::string_view column_name) const const item_handle operator[](std::string_view item_name) const
{ {
return empty() ? item_handle::s_null_item : item_handle(get_column_ix(column_name), const_cast<row_handle &>(*this)); return empty() ? item_handle::s_null_item : item_handle(get_item_ix(item_name), const_cast<row_handle &>(*this));
} }
/// \brief Return an object that can be used in combination with cif::tie /// \brief Return an object that can be used in combination with cif::tie
/// to assign the values for the columns @a columns /// to assign the values for the items @a items
template <typename... C> template <typename... C>
auto get(C... columns) const auto get(C... items) const
{ {
return detail::get_row_result<C...>(*this, { get_column_ix(columns)... }); return detail::get_row_result<C...>(*this, { get_item_ix(items)... });
} }
/// \brief Return a tuple of values of types @a Ts for the columns @a columns /// \brief Return a tuple of values of types @a Ts for the items @a items
template <typename... Ts, typename... C, std::enable_if_t<sizeof...(Ts) == sizeof...(C) and sizeof...(C) != 1, int> = 0> template <typename... Ts, typename... C, std::enable_if_t<sizeof...(Ts) == sizeof...(C) and sizeof...(C) != 1, int> = 0>
std::tuple<Ts...> get(C... columns) const std::tuple<Ts...> get(C... items) const
{ {
return detail::get_row_result<Ts...>(*this, { get_column_ix(columns)... }); return detail::get_row_result<Ts...>(*this, { get_item_ix(items)... });
} }
/// \brief Get the value of column @a column cast to type @a T /// \brief Get the value of item @a item cast to type @a T
template <typename T> template <typename T>
T get(const char *column) const T get(const char *item) const
{ {
return operator[](get_column_ix(column)).template as<T>(); return operator[](get_item_ix(item)).template as<T>();
} }
/// \brief Get the value of column @a column cast to type @a T /// \brief Get the value of item @a item cast to type @a T
template <typename T> template <typename T>
T get(std::string_view column) const T get(std::string_view item) const
{ {
return operator[](get_column_ix(column)).template as<T>(); return operator[](get_item_ix(item)).template as<T>();
} }
/// \brief assign each of the columns named in @a values to their respective value /// \brief assign each of the items named in @a values to their respective value
void assign(const std::vector<item> &values) void assign(const std::vector<item> &values)
{ {
for (auto &value : values) for (auto &value : values)
assign(value, true); assign(value, true);
} }
/** \brief assign the value @a value to the column named @a name /** \brief assign the value @a value to the item named @a name
* *
* If updateLinked it true, linked records are updated as well. * If updateLinked it true, linked records are updated as well.
* That means that if column @a name is part of the link definition * That means that if item @a name is part of the link definition
* and the link results in a linked record in another category * and the link results in a linked record in another category
* this record in the linked category is updated as well. * this record in the linked category is updated as well.
* *
...@@ -317,13 +317,13 @@ class row_handle ...@@ -317,13 +317,13 @@ class row_handle
void assign(std::string_view name, std::string_view value, bool updateLinked, bool validate = true) void assign(std::string_view name, std::string_view value, bool updateLinked, bool validate = true)
{ {
assign(add_column(name), value, updateLinked, validate); assign(add_item(name), value, updateLinked, validate);
} }
/** \brief assign the value @a value to column at index @a column /** \brief assign the value @a value to item at index @a item
* *
* If updateLinked it true, linked records are updated as well. * If updateLinked it true, linked records are updated as well.
* That means that if column @a column is part of the link definition * That means that if item @a item is part of the link definition
* and the link results in a linked record in another category * and the link results in a linked record in another category
* this record in the linked category is updated as well. * this record in the linked category is updated as well.
* *
...@@ -331,7 +331,7 @@ class row_handle ...@@ -331,7 +331,7 @@ class row_handle
* checked to see if it conforms to the rules defined in the dictionary * checked to see if it conforms to the rules defined in the dictionary
*/ */
void assign(uint16_t column, std::string_view value, bool updateLinked, bool validate = true); void assign(uint16_t item, std::string_view value, bool updateLinked, bool validate = true);
/// \brief compare two rows /// \brief compare two rows
bool operator==(const row_handle &rhs) const { return m_category == rhs.m_category and m_row == rhs.m_row; } bool operator==(const row_handle &rhs) const { return m_category == rhs.m_category and m_row == rhs.m_row; }
...@@ -340,10 +340,10 @@ class row_handle ...@@ -340,10 +340,10 @@ class row_handle
bool operator!=(const row_handle &rhs) const { return m_category != rhs.m_category or m_row != rhs.m_row; } bool operator!=(const row_handle &rhs) const { return m_category != rhs.m_category or m_row != rhs.m_row; }
private: private:
uint16_t get_column_ix(std::string_view name) const; uint16_t get_item_ix(std::string_view name) const;
std::string_view get_column_name(uint16_t ix) const; std::string_view get_item_name(uint16_t ix) const;
uint16_t add_column(std::string_view name); uint16_t add_item(std::string_view name);
row *get_row() row *get_row()
{ {
...@@ -360,7 +360,7 @@ class row_handle ...@@ -360,7 +360,7 @@ class row_handle
assign(i.name(), i.value(), updateLinked); assign(i.name(), i.value(), updateLinked);
} }
void swap(uint16_t column, row_handle &r); void swap(uint16_t item, row_handle &r);
category *m_category = nullptr; category *m_category = nullptr;
row *m_row = nullptr; row *m_row = nullptr;
......
...@@ -317,7 +317,7 @@ inline char tolower(int ch) ...@@ -317,7 +317,7 @@ inline char tolower(int ch)
// -------------------------------------------------------------------- // --------------------------------------------------------------------
/** \brief return a tuple consisting of the category and item name for @a tag /** \brief return a tuple consisting of the category and item name for @a item_name
* *
* The category name is stripped of its leading underscore character. * The category name is stripped of its leading underscore character.
* *
...@@ -325,7 +325,19 @@ inline char tolower(int ch) ...@@ -325,7 +325,19 @@ inline char tolower(int ch)
* cif 1.0 formatted data. * cif 1.0 formatted data.
*/ */
std::tuple<std::string, std::string> split_tag_name(std::string_view tag); [[deprecated("use split_item_name instead")]]
std::tuple<std::string, std::string> split_tag_name(std::string_view item_name);
/** \brief return a tuple consisting of the category and item name for @a item_name
*
* The category name is stripped of its leading underscore character.
*
* If no dot character was found, the category name is empty. That's for
* cif 1.0 formatted data.
*/
std::tuple<std::string, std::string> split_item_name(std::string_view item_name);
// -------------------------------------------------------------------- // --------------------------------------------------------------------
......
...@@ -28,9 +28,11 @@ ...@@ -28,9 +28,11 @@
#include "cif++/text.hpp" #include "cif++/text.hpp"
#include <cassert>
#include <filesystem> #include <filesystem>
#include <list> #include <list>
#include <mutex> #include <mutex>
#include <system_error>
#include <utility> #include <utility>
/** /**
...@@ -49,29 +51,123 @@ namespace cif ...@@ -49,29 +51,123 @@ namespace cif
struct category_validator; struct category_validator;
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// New: error_code
/** /**
* @brief The exception thrown when a validation error occurs * @enum validation_error
* *
* @brief A stronly typed class containing the error codes reported by @ref cif::validator and friends
*/ */
class validation_error : public std::exception enum class validation_error
{
value_does_not_match_rx = 1, /**< The value of an item does not conform to the regular expression specified for it */
value_is_not_in_enumeration_list, /**< The value of an item is not in the list of values allowed */
not_a_known_primitive_type, /**< The type is not a known primitive type */
undefined_category, /**< Category has no definition in the dictionary */
unknown_item, /**< The item is not defined to be part of the category */
incorrect_item_validator, /**< Incorrectly specified validator for item */
missing_mandatory_items, /**< Missing mandatory items */
missing_key_items, /**< An index could not be constructed due to missing key items */
item_not_allowed_in_category, /**< Requested item allowed in category according to dictionary */
empty_file, /**< The file contains no datablocks */
empty_datablock, /**< The datablock contains no categories */
empty_category, /**< The category is empty */
not_valid_pdbx, /**< The file is not a valid PDBx file */
};
/**
* @brief The implementation for @ref validation_category error messages
*
*/
class validation_category_impl : public std::error_category
{ {
public: public:
/// @brief Constructor /**
validation_error(const std::string &msg); * @brief User friendly name
*
* @return const char*
*/
/// @brief Constructor const char *name() const noexcept override
validation_error(const std::string &cat, const std::string &item, {
const std::string &msg); return "cif::validation";
}
/// @brief The description of the error /**
const char *what() const noexcept { return m_msg.c_str(); } * @brief Provide the error message as a string for the error code @a ev
*
* @param ev The error code
* @return std::string
*/
std::string message(int ev) const override
{
switch (static_cast<validation_error>(ev))
{
case validation_error::value_does_not_match_rx:
return "Value in item does not match regular expression";
case validation_error::value_is_not_in_enumeration_list:
return "Value is not in the enumerated list of valid values";
case validation_error::not_a_known_primitive_type:
return "The type is not a known primitive type";
case validation_error::undefined_category:
return "Category has no definition in the dictionary";
case validation_error::unknown_item:
return "The item is not defined to be part of the category";
case validation_error::incorrect_item_validator:
return "Incorrectly specified validator for item";
case validation_error::missing_mandatory_items:
return "Missing mandatory items";
case validation_error::missing_key_items:
return "An index could not be constructed due to missing key items";
case validation_error::item_not_allowed_in_category:
return "Requested item allowed in category according to dictionary";
case validation_error::empty_file:
return "The file contains no datablocks";
case validation_error::empty_datablock:
return "The datablock contains no categories";
case validation_error::empty_category:
return "The category is empty";
case validation_error::not_valid_pdbx:
return "The file is not a valid PDBx file";
default:
assert(false);
return "unknown error code";
}
}
/// @cond /**
std::string m_msg; * @brief Return whether two error codes are equivalent, always false in this case
/// @endcond *
*/
bool equivalent(const std::error_code & /*code*/, int /*condition*/) const noexcept override
{
return false;
}
}; };
/**
* @brief Return the implementation for the validation_category
*
* @return std::error_category&
*/
inline std::error_category &validation_category()
{
static validation_category_impl instance;
return instance;
}
inline std::error_code make_error_code(validation_error e)
{
return std::error_code(static_cast<int>(e), validation_category());
}
inline std::error_condition make_error_condition(validation_error e)
{
return std::error_condition(static_cast<int>(e), validation_category());
}
// -------------------------------------------------------------------- // --------------------------------------------------------------------
/** @brief the primitive types known */ /** @brief the primitive types known */
...@@ -85,6 +181,9 @@ enum class DDL_PrimitiveType ...@@ -85,6 +181,9 @@ enum class DDL_PrimitiveType
/// @brief Return the DDL_PrimitiveType encoded in @a s /// @brief Return the DDL_PrimitiveType encoded in @a s
DDL_PrimitiveType map_to_primitive_type(std::string_view s); DDL_PrimitiveType map_to_primitive_type(std::string_view s);
/// @brief Return the DDL_PrimitiveType encoded in @a s, error reporting variant
DDL_PrimitiveType map_to_primitive_type(std::string_view s, std::error_code &ec) noexcept;
struct regex_impl; struct regex_impl;
/** /**
...@@ -177,7 +276,7 @@ struct item_alias ...@@ -177,7 +276,7 @@ struct item_alias
*/ */
struct item_validator struct item_validator
{ {
std::string m_tag; ///< The item name std::string m_item_name; ///< The item name
bool m_mandatory; ///< Flag indicating this item is mandatory bool m_mandatory; ///< Flag indicating this item is mandatory
const type_validator *m_type; ///< The type for this item const type_validator *m_type; ///< The type for this item
cif::iset m_enums; ///< If filled, the set of allowed values cif::iset m_enums; ///< If filled, the set of allowed values
...@@ -188,18 +287,21 @@ struct item_validator ...@@ -188,18 +287,21 @@ struct item_validator
/// @brief Compare based on the name /// @brief Compare based on the name
bool operator<(const item_validator &rhs) const bool operator<(const item_validator &rhs) const
{ {
return icompare(m_tag, rhs.m_tag) < 0; return icompare(m_item_name, rhs.m_item_name) < 0;
} }
/// @brief Compare based on the name /// @brief Compare based on the name
bool operator==(const item_validator &rhs) const bool operator==(const item_validator &rhs) const
{ {
return iequals(m_tag, rhs.m_tag); return iequals(m_item_name, rhs.m_item_name);
} }
/// @brief Validate the value in @a value for this item /// @brief Validate the value in @a value for this item
/// Will throw a validation_error exception if it fails /// Will throw a std::system_error exception if it fails
void operator()(std::string_view value) const; void operator()(std::string_view value) const;
/// @brief A more gentle version of value validation
bool validate_value(std::string_view value, std::error_code &ec) const noexcept;
}; };
/** /**
...@@ -213,7 +315,7 @@ struct category_validator ...@@ -213,7 +315,7 @@ struct category_validator
std::string m_name; ///< The name of the category std::string m_name; ///< The name of the category
std::vector<std::string> m_keys; ///< The list of items that make up the key std::vector<std::string> m_keys; ///< The list of items that make up the key
cif::iset m_groups; ///< The category groups this category belongs to cif::iset m_groups; ///< The category groups this category belongs to
cif::iset m_mandatory_fields; ///< The mandatory fields for this category cif::iset m_mandatory_items; ///< The mandatory items for this category
std::set<item_validator> m_item_validators; ///< The item validators for the items in this category std::set<item_validator> m_item_validators; ///< The item validators for the items in this category
/// @brief return true if this category sorts before @a rhs /// @brief return true if this category sorts before @a rhs
...@@ -225,11 +327,11 @@ struct category_validator ...@@ -225,11 +327,11 @@ struct category_validator
/// @brief Add item_validator @a v to the list of item validators /// @brief Add item_validator @a v to the list of item validators
void add_item_validator(item_validator &&v); void add_item_validator(item_validator &&v);
/// @brief Return the item_validator for item @a tag, may return nullptr /// @brief Return the item_validator for item @a item_name, may return nullptr
const item_validator *get_validator_for_item(std::string_view tag) const; const item_validator *get_validator_for_item(std::string_view item_name) const;
/// @brief Return the item_validator for an item that has as alias name @a tag, may return nullptr /// @brief Return the item_validator for an item that has as alias name @a item_name, may return nullptr
const item_validator *get_validator_for_aliased_item(std::string_view tag) const; const item_validator *get_validator_for_aliased_item(std::string_view item_name) const;
}; };
/** /**
...@@ -308,7 +410,24 @@ class validator ...@@ -308,7 +410,24 @@ class validator
std::vector<const link_validator *> get_links_for_child(std::string_view category) const; std::vector<const link_validator *> get_links_for_child(std::string_view category) const;
/// @brief Bottleneck function to report an error in validation /// @brief Bottleneck function to report an error in validation
void report_error(const std::string &msg, bool fatal) const; void report_error(validation_error err, bool fatal = true) const
{
report_error(make_error_code(err), fatal);
}
/// @brief Bottleneck function to report an error in validation
void report_error(std::error_code ec, bool fatal = true) const;
/// @brief Bottleneck function to report an error in validation
void report_error(validation_error err, std::string_view category,
std::string_view item, bool fatal = true) const
{
report_error(make_error_code(err), category, item, fatal);
}
/// @brief Bottleneck function to report an error in validation
void report_error(std::error_code ec, std::string_view category,
std::string_view item, bool fatal = true) const;
const std::string &name() const { return m_name; } ///< Get the name of this validator const std::string &name() const { return m_name; } ///< Get the name of this validator
void set_name(const std::string &name) { m_name = name; } ///< Set the name of this validator void set_name(const std::string &name) { m_name = name; } ///< Set the name of this validator
......
...@@ -30,17 +30,17 @@ ...@@ -30,17 +30,17 @@
namespace cif namespace cif
{ {
iset get_category_fields(const category &cat) iset get_category_items(const category &cat)
{ {
return cat.key_fields(); return cat.key_items();
} }
uint16_t get_column_ix(const category &cat, std::string_view col) uint16_t get_item_ix(const category &cat, std::string_view col)
{ {
return cat.get_column_ix(col); return cat.get_item_ix(col);
} }
bool is_column_type_uchar(const category &cat, std::string_view col) bool is_item_type_uchar(const category &cat, std::string_view col)
{ {
bool result = false; bool result = false;
...@@ -63,14 +63,14 @@ namespace detail ...@@ -63,14 +63,14 @@ namespace detail
condition_impl *key_equals_condition_impl::prepare(const category &c) condition_impl *key_equals_condition_impl::prepare(const category &c)
{ {
m_item_ix = c.get_column_ix(m_item_tag); m_item_ix = c.get_item_ix(m_item_name);
m_icase = is_column_type_uchar(c, m_item_tag); m_icase = is_item_type_uchar(c, m_item_name);
if (c.get_cat_validator() != nullptr and if (c.get_cat_validator() != nullptr and
c.key_field_indices().contains(m_item_ix) and c.key_item_indices().contains(m_item_ix) and
c.key_field_indices().size() == 1) c.key_item_indices().size() == 1)
{ {
m_single_hit = c[{ { m_item_tag, m_value } }]; m_single_hit = c[{ { m_item_name, m_value } }];
} }
return this; return this;
......
...@@ -143,13 +143,6 @@ std::tuple<datablock::iterator, bool> datablock::emplace(std::string_view name) ...@@ -143,13 +143,6 @@ std::tuple<datablock::iterator, bool> datablock::emplace(std::string_view name)
if (iequals(name, i->name())) if (iequals(name, i->name()))
{ {
is_new = false; is_new = false;
if (i != begin())
{
auto n = std::next(i);
splice(begin(), *this, i, n);
}
break; break;
} }
...@@ -158,25 +151,24 @@ std::tuple<datablock::iterator, bool> datablock::emplace(std::string_view name) ...@@ -158,25 +151,24 @@ std::tuple<datablock::iterator, bool> datablock::emplace(std::string_view name)
if (is_new) if (is_new)
{ {
auto &c = emplace_back(name); i = insert(end(), {name});
c.set_validator(m_validator, *this); i->set_validator(m_validator, *this);
} }
assert(end() != begin()); assert(i != end());
return std::make_tuple(std::prev(end()), is_new); return std::make_tuple(i, is_new);
} }
std::vector<std::string> datablock::get_tag_order() const std::vector<std::string> datablock::get_item_order() const
{ {
std::vector<std::string> result; std::vector<std::string> result;
// for entry and audit_conform on top // for entry and audit_conform on top
auto ci = find_if(begin(), end(), [](const category &cat) auto ci = find_if(begin(), end(), [](const category &cat)
{ return cat.name() == "entry"; }); { return cat.name() == "entry"; });
if (ci != end()) if (ci != end())
{ {
auto cto = ci->get_tag_order(); auto cto = ci->get_item_order();
result.insert(result.end(), cto.begin(), cto.end()); result.insert(result.end(), cto.begin(), cto.end());
} }
...@@ -184,7 +176,7 @@ std::vector<std::string> datablock::get_tag_order() const ...@@ -184,7 +176,7 @@ std::vector<std::string> datablock::get_tag_order() const
{ return cat.name() == "audit_conform"; }); { return cat.name() == "audit_conform"; });
if (ci != end()) if (ci != end())
{ {
auto cto = ci->get_tag_order(); auto cto = ci->get_item_order();
result.insert(result.end(), cto.begin(), cto.end()); result.insert(result.end(), cto.begin(), cto.end());
} }
...@@ -192,7 +184,7 @@ std::vector<std::string> datablock::get_tag_order() const ...@@ -192,7 +184,7 @@ std::vector<std::string> datablock::get_tag_order() const
{ {
if (cat.name() == "entry" or cat.name() == "audit_conform") if (cat.name() == "entry" or cat.name() == "audit_conform")
continue; continue;
auto cto = cat.get_tag_order(); auto cto = cat.get_item_order();
result.insert(result.end(), cto.begin(), cto.end()); result.insert(result.end(), cto.begin(), cto.end());
} }
...@@ -253,12 +245,31 @@ void datablock::write(std::ostream &os) const ...@@ -253,12 +245,31 @@ void datablock::write(std::ostream &os) const
{ {
// If the dictionary declares an audit_conform category, put it in, // If the dictionary declares an audit_conform category, put it in,
// but only if it does not exist already! // but only if it does not exist already!
if (get("audit_conform") == nullptr and m_validator->get_validator_for_category("audit_conform") != nullptr)
if (m_validator->get_validator_for_category("audit_conform") != nullptr)
{ {
category auditConform("audit_conform"); auto *audit_conform = get("audit_conform");
auditConform.emplace({ { "dict_name", m_validator->name() }, if (audit_conform == nullptr or audit_conform->size() != 1) // There should be one entry here, I guess
{ "dict_version", m_validator->version() } }); audit_conform = nullptr;
auditConform.write(os); else
{
// And the name and version should be filled in of course
auto &e = audit_conform->front();
if (e["dict_name"].empty() or e["dict_version"].empty())
audit_conform = nullptr;
}
if (not audit_conform)
{
category auditConform("audit_conform");
// clang-format off
auditConform.emplace({
{ "dict_name", m_validator->name() },
{ "dict_version", m_validator->version() }
});
// clang-format on
auditConform.write(os);
}
} }
// base order on parent child relationships, parents first // base order on parent child relationships, parents first
...@@ -327,16 +338,16 @@ void datablock::write(std::ostream &os) const ...@@ -327,16 +338,16 @@ void datablock::write(std::ostream &os) const
} }
} }
void datablock::write(std::ostream &os, const std::vector<std::string> &tag_order) void datablock::write(std::ostream &os, const std::vector<std::string> &item_name_order)
{ {
os << "data_" << m_name << '\n' os << "data_" << m_name << '\n'
<< "# \n"; << "# \n";
std::vector<std::string> cat_order; std::vector<std::string> cat_order;
for (auto &o : tag_order) for (auto &o : item_name_order)
{ {
std::string cat_name, item_name; std::string cat_name, item_name;
std::tie(cat_name, item_name) = split_tag_name(o); std::tie(cat_name, item_name) = split_item_name(o);
if (find_if(cat_order.rbegin(), cat_order.rend(), [cat_name](const std::string &s) -> bool if (find_if(cat_order.rbegin(), cat_order.rend(), [cat_name](const std::string &s) -> bool
{ return iequals(cat_name, s); }) == cat_order.rend()) { return iequals(cat_name, s); }) == cat_order.rend())
cat_order.push_back(cat_name); cat_order.push_back(cat_name);
...@@ -349,10 +360,10 @@ void datablock::write(std::ostream &os, const std::vector<std::string> &tag_orde ...@@ -349,10 +360,10 @@ void datablock::write(std::ostream &os, const std::vector<std::string> &tag_orde
continue; continue;
std::vector<std::string> items; std::vector<std::string> items;
for (auto &o : tag_order) for (auto &o : item_name_order)
{ {
std::string cat_name, item_name; std::string cat_name, item_name;
std::tie(cat_name, item_name) = split_tag_name(o); std::tie(cat_name, item_name) = split_item_name(o);
if (cat_name == c) if (cat_name == c)
items.push_back(item_name); items.push_back(item_name);
......
...@@ -50,7 +50,7 @@ class dictionary_parser : public parser ...@@ -50,7 +50,7 @@ class dictionary_parser : public parser
try try
{ {
while (m_lookahead != CIFToken::Eof) while (m_lookahead != CIFToken::END_OF_FILE)
{ {
switch (m_lookahead) switch (m_lookahead)
{ {
...@@ -128,7 +128,7 @@ class dictionary_parser : public parser ...@@ -128,7 +128,7 @@ class dictionary_parser : public parser
datablock::iterator cat = dict.end(); datablock::iterator cat = dict.end();
match(CIFToken::SAVE_NAME); match(CIFToken::SAVE_NAME);
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag) while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::ITEM_NAME)
{ {
if (m_lookahead == CIFToken::LOOP) if (m_lookahead == CIFToken::LOOP)
{ {
...@@ -136,30 +136,30 @@ class dictionary_parser : public parser ...@@ -136,30 +136,30 @@ class dictionary_parser : public parser
match(CIFToken::LOOP); match(CIFToken::LOOP);
std::vector<std::string> tags; std::vector<std::string> item_names;
while (m_lookahead == CIFToken::Tag) while (m_lookahead == CIFToken::ITEM_NAME)
{ {
std::string catName, item_name; std::string catName, item_name;
std::tie(catName, item_name) = split_tag_name(m_token_value); std::tie(catName, item_name) = split_item_name(m_token_value);
if (cat == dict.end()) if (cat == dict.end())
std::tie(cat, std::ignore) = dict.emplace(catName); std::tie(cat, std::ignore) = dict.emplace(catName);
else if (not iequals(cat->name(), catName)) else if (not iequals(cat->name(), catName))
error("inconsistent categories in loop_"); error("inconsistent categories in loop_");
tags.push_back(item_name); item_names.push_back(item_name);
match(CIFToken::Tag); match(CIFToken::ITEM_NAME);
} }
while (m_lookahead == CIFToken::Value) while (m_lookahead == CIFToken::VALUE)
{ {
cat->emplace({}); cat->emplace({});
auto row = cat->back(); auto row = cat->back();
for (auto tag : tags) for (auto item_name : item_names)
{ {
row[tag] = m_token_value; row[item_name] = m_token_value;
match(CIFToken::Value); match(CIFToken::VALUE);
} }
} }
...@@ -168,18 +168,18 @@ class dictionary_parser : public parser ...@@ -168,18 +168,18 @@ class dictionary_parser : public parser
else else
{ {
std::string catName, item_name; std::string catName, item_name;
std::tie(catName, item_name) = split_tag_name(m_token_value); std::tie(catName, item_name) = split_item_name(m_token_value);
if (cat == dict.end() or not iequals(cat->name(), catName)) if (cat == dict.end() or not iequals(cat->name(), catName))
std::tie(cat, std::ignore) = dict.emplace(catName); std::tie(cat, std::ignore) = dict.emplace(catName);
match(CIFToken::Tag); match(CIFToken::ITEM_NAME);
if (cat->empty()) if (cat->empty())
cat->emplace({}); cat->emplace({});
cat->back()[item_name] = m_token_value; cat->back()[item_name] = m_token_value;
match(CIFToken::Value); match(CIFToken::VALUE);
} }
} }
...@@ -191,7 +191,7 @@ class dictionary_parser : public parser ...@@ -191,7 +191,7 @@ class dictionary_parser : public parser
std::vector<std::string> keys; std::vector<std::string> keys;
for (auto k : dict["category_key"]) for (auto k : dict["category_key"])
keys.push_back(std::get<1>(split_tag_name(k["name"].as<std::string>()))); keys.push_back(std::get<1>(split_item_name(k["name"].as<std::string>())));
iset groups; iset groups;
for (auto g : dict["category_group"]) for (auto g : dict["category_group"])
...@@ -234,17 +234,17 @@ class dictionary_parser : public parser ...@@ -234,17 +234,17 @@ class dictionary_parser : public parser
// collect the dict from our dataBlock and construct validators // collect the dict from our dataBlock and construct validators
for (auto i : dict["item"]) for (auto i : dict["item"])
{ {
std::string tagName, category, mandatory; std::string item, category, mandatory;
cif::tie(tagName, category, mandatory) = i.get("name", "category_id", "mandatory_code"); cif::tie(item, category, mandatory) = i.get("name", "category_id", "mandatory_code");
std::string cat_name, item_name; std::string cat_name, item_name;
std::tie(cat_name, item_name) = split_tag_name(tagName); std::tie(cat_name, item_name) = split_item_name(item);
if (cat_name.empty() or item_name.empty()) if (cat_name.empty() or item_name.empty())
error("Invalid tag name in _item.name " + tagName); error("Invalid item name in _item.name " + item);
if (not iequals(category, cat_name) and not(category.empty() or category == "?")) if (not iequals(category, cat_name) and not(category.empty() or category == "?"))
error("specified category id does match the implicit category name for tag '" + tagName + '\''); error("specified category id does match the implicit category name for item '" + item + '\'');
else else
category = cat_name; category = cat_name;
...@@ -260,22 +260,22 @@ class dictionary_parser : public parser ...@@ -260,22 +260,22 @@ class dictionary_parser : public parser
{ {
if (VERBOSE > 2) if (VERBOSE > 2)
{ {
std::cerr << "inconsistent mandatory value for " << tagName << " in dictionary\n"; std::cerr << "inconsistent mandatory value for " << item << " in dictionary\n";
if (iequals(tagName, saveFrameName)) if (iequals(item, saveFrameName))
std::cerr << "choosing " << mandatory << '\n'; std::cerr << "choosing " << mandatory << '\n';
else else
std::cerr << "choosing " << (vi->m_mandatory ? "Y" : "N") << '\n'; std::cerr << "choosing " << (vi->m_mandatory ? "Y" : "N") << '\n';
} }
if (iequals(tagName, saveFrameName)) if (iequals(item, saveFrameName))
vi->m_mandatory = (iequals(mandatory, "yes")); vi->m_mandatory = (iequals(mandatory, "yes"));
} }
if (vi->m_type != nullptr and tv != nullptr and vi->m_type != tv) if (vi->m_type != nullptr and tv != nullptr and vi->m_type != tv)
{ {
if (VERBOSE > 1) if (VERBOSE > 1)
std::cerr << "inconsistent type for " << tagName << " in dictionary\n"; std::cerr << "inconsistent type for " << item << " in dictionary\n";
} }
// vi->mMandatory = (iequals(mandatory, "yes")); // vi->mMandatory = (iequals(mandatory, "yes"));
...@@ -358,7 +358,7 @@ class dictionary_parser : public parser ...@@ -358,7 +358,7 @@ class dictionary_parser : public parser
} }
size_t ix = linkIndex.at(key); size_t ix = linkIndex.at(key);
addLink(ix, piv->m_tag, civ->m_tag); addLink(ix, piv->m_item_name, civ->m_item_name);
} }
// Only process inline linked items if the linked group list is absent // Only process inline linked items if the linked group list is absent
...@@ -386,7 +386,7 @@ class dictionary_parser : public parser ...@@ -386,7 +386,7 @@ class dictionary_parser : public parser
} }
size_t ix = linkIndex.at(key); size_t ix = linkIndex.at(key);
addLink(ix, piv->m_tag, civ->m_tag); addLink(ix, piv->m_item_name, civ->m_item_name);
} }
} }
...@@ -417,7 +417,7 @@ class dictionary_parser : public parser ...@@ -417,7 +417,7 @@ class dictionary_parser : public parser
for (auto &iv : cv.m_item_validators) for (auto &iv : cv.m_item_validators)
{ {
if (iv.m_type == nullptr and cif::VERBOSE >= 0) if (iv.m_type == nullptr and cif::VERBOSE >= 0)
std::cerr << "Missing item_type for " << iv.m_tag << '\n'; std::cerr << "Missing item_type for " << iv.m_item_name << '\n';
} }
} }
} }
......
...@@ -158,13 +158,6 @@ std::tuple<file::iterator, bool> file::emplace(std::string_view name) ...@@ -158,13 +158,6 @@ std::tuple<file::iterator, bool> file::emplace(std::string_view name)
if (iequals(name, i->name())) if (iequals(name, i->name()))
{ {
is_new = false; is_new = false;
if (i != begin())
{
auto n = std::next(i);
splice(begin(), *this, i, n);
}
break; break;
} }
...@@ -173,12 +166,12 @@ std::tuple<file::iterator, bool> file::emplace(std::string_view name) ...@@ -173,12 +166,12 @@ std::tuple<file::iterator, bool> file::emplace(std::string_view name)
if (is_new) if (is_new)
{ {
auto &db = emplace_back(name); i = insert(end(), { name });
db.set_validator(m_validator); i->set_validator(m_validator);
} }
assert(begin() != end()); assert(i != end());
return std::make_tuple(std::prev(end()), is_new); return std::make_tuple(i, is_new);
} }
void file::load(const std::filesystem::path &p) void file::load(const std::filesystem::path &p)
......
...@@ -35,7 +35,7 @@ const item_handle item_handle::s_null_item; ...@@ -35,7 +35,7 @@ const item_handle item_handle::s_null_item;
row_handle s_null_row_handle; row_handle s_null_row_handle;
item_handle::item_handle() item_handle::item_handle()
: m_column(std::numeric_limits<uint16_t>::max()) : m_item_ix(std::numeric_limits<uint16_t>::max())
, m_row_handle(s_null_row_handle) , m_row_handle(s_null_row_handle)
{ {
} }
...@@ -44,7 +44,7 @@ std::string_view item_handle::text() const ...@@ -44,7 +44,7 @@ std::string_view item_handle::text() const
{ {
if (not m_row_handle.empty()) if (not m_row_handle.empty())
{ {
auto iv = m_row_handle.m_row->get(m_column); auto iv = m_row_handle.m_row->get(m_item_ix);
if (iv != nullptr) if (iv != nullptr)
return iv->text(); return iv->text();
} }
...@@ -55,14 +55,14 @@ std::string_view item_handle::text() const ...@@ -55,14 +55,14 @@ std::string_view item_handle::text() const
void item_handle::assign_value(const item &v) void item_handle::assign_value(const item &v)
{ {
assert(not m_row_handle.empty()); assert(not m_row_handle.empty());
m_row_handle.assign(m_column, v.value(), true); m_row_handle.assign(m_item_ix, v.value(), true);
} }
void item_handle::swap(item_handle &b) void item_handle::swap(item_handle &b)
{ {
assert(m_column == b.m_column); assert(m_item_ix == b.m_item_ix);
// assert(&m_row_handle.m_category == &b.m_row_handle.m_category); // assert(&m_row_handle.m_category == &b.m_row_handle.m_category);
m_row_handle.swap(m_column, b.m_row_handle); m_row_handle.swap(m_item_ix, b.m_row_handle);
} }
} }
...@@ -163,9 +163,9 @@ int atom::atom_impl::get_charge() const ...@@ -163,9 +163,9 @@ int atom::atom_impl::get_charge() const
// const std::string atom::atom_impl::get_property(const std::string_view name) const // const std::string atom::atom_impl::get_property(const std::string_view name) const
// { // {
// for (auto &&[tag, ref] : mCachedRefs) // for (auto &&[item_name, ref] : mCachedRefs)
// { // {
// if (tag == name) // if (item_name == name)
// return ref.as<std::string>(); // return ref.as<std::string>();
// } // }
...@@ -175,9 +175,9 @@ int atom::atom_impl::get_charge() const ...@@ -175,9 +175,9 @@ int atom::atom_impl::get_charge() const
// void atom::atom_impl::set_property(const std::string_view name, const std::string &value) // void atom::atom_impl::set_property(const std::string_view name, const std::string &value)
// { // {
// for (auto &&[tag, ref] : mCachedRefs) // for (auto &&[item_name, ref] : mCachedRefs)
// { // {
// if (tag != name) // if (item_name != name)
// continue; // continue;
// ref = value; // ref = value;
......
...@@ -269,7 +269,7 @@ sac_parser::CIFToken sac_parser::get_next_token() ...@@ -269,7 +269,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
{ {
const auto kEOF = std::char_traits<char>::eof(); const auto kEOF = std::char_traits<char>::eof();
CIFToken result = CIFToken::Unknown; CIFToken result = CIFToken::UNKNOWN;
int quoteChar = 0; int quoteChar = 0;
State state = State::Start; State state = State::Start;
m_bol = false; m_bol = false;
...@@ -279,7 +279,7 @@ sac_parser::CIFToken sac_parser::get_next_token() ...@@ -279,7 +279,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
reserved_words_automaton dag; reserved_words_automaton dag;
while (result == CIFToken::Unknown) while (result == CIFToken::UNKNOWN)
{ {
auto ch = get_next_char(); auto ch = get_next_char();
...@@ -287,7 +287,7 @@ sac_parser::CIFToken sac_parser::get_next_token() ...@@ -287,7 +287,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
{ {
case State::Start: case State::Start:
if (ch == kEOF) if (ch == kEOF)
result = CIFToken::Eof; result = CIFToken::END_OF_FILE;
else if (ch == '\n') else if (ch == '\n')
{ {
m_bol = true; m_bol = true;
...@@ -298,9 +298,9 @@ sac_parser::CIFToken sac_parser::get_next_token() ...@@ -298,9 +298,9 @@ sac_parser::CIFToken sac_parser::get_next_token()
else if (ch == '#') else if (ch == '#')
state = State::Comment; state = State::Comment;
else if (ch == '_') else if (ch == '_')
state = State::Tag; state = State::ItemName;
else if (ch == ';' and m_bol) else if (ch == ';' and m_bol)
state = State::TextField; state = State::TextItem;
else if (ch == '?') else if (ch == '?')
state = State::QuestionMark; state = State::QuestionMark;
else if (ch == '\'' or ch == '"') else if (ch == '\'' or ch == '"')
...@@ -316,7 +316,7 @@ sac_parser::CIFToken sac_parser::get_next_token() ...@@ -316,7 +316,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
case State::White: case State::White:
if (ch == kEOF) if (ch == kEOF)
result = CIFToken::Eof; result = CIFToken::END_OF_FILE;
else if (not is_space(ch)) else if (not is_space(ch))
{ {
state = State::Start; state = State::Start;
...@@ -335,7 +335,7 @@ sac_parser::CIFToken sac_parser::get_next_token() ...@@ -335,7 +335,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
m_token_buffer.clear(); m_token_buffer.clear();
} }
else if (ch == kEOF) else if (ch == kEOF)
result = CIFToken::Eof; result = CIFToken::END_OF_FILE;
else if (not is_any_print(ch)) else if (not is_any_print(ch))
error("invalid character in comment"); error("invalid character in comment");
break; break;
...@@ -344,29 +344,29 @@ sac_parser::CIFToken sac_parser::get_next_token() ...@@ -344,29 +344,29 @@ sac_parser::CIFToken sac_parser::get_next_token()
if (not is_non_blank(ch)) if (not is_non_blank(ch))
{ {
retract(); retract();
result = CIFToken::Value; result = CIFToken::VALUE;
} }
else else
state = State::Value; state = State::Value;
break; break;
case State::TextField: case State::TextItem:
if (ch == '\n') if (ch == '\n')
state = State::TextFieldNL; state = State::TextItemNL;
else if (ch == kEOF) else if (ch == kEOF)
error("unterminated textfield"); error("unterminated textfield");
else if (not is_any_print(ch) and cif::VERBOSE > 2) else if (not is_any_print(ch) and cif::VERBOSE > 2)
warning("invalid character in text field '" + std::string({static_cast<char>(ch)}) + "' (" + std::to_string((int)ch) + ")"); warning("invalid character in text field '" + std::string({static_cast<char>(ch)}) + "' (" + std::to_string((int)ch) + ")");
break; break;
case State::TextFieldNL: case State::TextItemNL:
if (is_text_lead(ch) or ch == ' ' or ch == '\t') if (is_text_lead(ch) or ch == ' ' or ch == '\t')
state = State::TextField; state = State::TextItem;
else if (ch == ';') else if (ch == ';')
{ {
assert(m_token_buffer.size() >= 2); assert(m_token_buffer.size() >= 2);
m_token_value = std::string_view(m_token_buffer.data() + 1, m_token_buffer.size() - 3); m_token_value = std::string_view(m_token_buffer.data() + 1, m_token_buffer.size() - 3);
result = CIFToken::Value; result = CIFToken::VALUE;
} }
else if (ch == kEOF) else if (ch == kEOF)
error("unterminated textfield"); error("unterminated textfield");
...@@ -387,7 +387,7 @@ sac_parser::CIFToken sac_parser::get_next_token() ...@@ -387,7 +387,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
if (is_white(ch)) if (is_white(ch))
{ {
retract(); retract();
result = CIFToken::Value; result = CIFToken::VALUE;
if (m_token_buffer.size() < 2) if (m_token_buffer.size() < 2)
error("Invalid quoted string token"); error("Invalid quoted string token");
...@@ -403,11 +403,11 @@ sac_parser::CIFToken sac_parser::get_next_token() ...@@ -403,11 +403,11 @@ sac_parser::CIFToken sac_parser::get_next_token()
error("invalid character in quoted string"); error("invalid character in quoted string");
break; break;
case State::Tag: case State::ItemName:
if (not is_non_blank(ch)) if (not is_non_blank(ch))
{ {
retract(); retract();
result = CIFToken::Tag; result = CIFToken::ITEM_NAME;
m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.size()); m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.size());
} }
break; break;
...@@ -422,7 +422,7 @@ sac_parser::CIFToken sac_parser::get_next_token() ...@@ -422,7 +422,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
if (not is_non_blank(ch)) if (not is_non_blank(ch))
{ {
retract(); retract();
result = CIFToken::Value; result = CIFToken::VALUE;
m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.size()); m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.size());
} }
else else
...@@ -467,7 +467,7 @@ sac_parser::CIFToken sac_parser::get_next_token() ...@@ -467,7 +467,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
if (not is_non_blank(ch)) if (not is_non_blank(ch))
{ {
retract(); retract();
result = CIFToken::Value; result = CIFToken::VALUE;
m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.size()); m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.size());
break; break;
} }
...@@ -483,7 +483,7 @@ sac_parser::CIFToken sac_parser::get_next_token() ...@@ -483,7 +483,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
if (VERBOSE >= 5) if (VERBOSE >= 5)
{ {
std::cerr << get_token_name(result); std::cerr << get_token_name(result);
if (result != CIFToken::Eof) if (result != CIFToken::END_OF_FILE)
std::cerr << " " << std::quoted(m_token_value); std::cerr << " " << std::quoted(m_token_value);
std::cerr << '\n'; std::cerr << '\n';
} }
...@@ -710,7 +710,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock, const data ...@@ -710,7 +710,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock, const data
void sac_parser::parse_file() void sac_parser::parse_file()
{ {
while (m_lookahead != CIFToken::Eof) while (m_lookahead != CIFToken::END_OF_FILE)
{ {
switch (m_lookahead) switch (m_lookahead)
{ {
...@@ -735,10 +735,10 @@ void sac_parser::parse_file() ...@@ -735,10 +735,10 @@ void sac_parser::parse_file()
void sac_parser::parse_global() void sac_parser::parse_global()
{ {
match(CIFToken::GLOBAL); match(CIFToken::GLOBAL);
while (m_lookahead == CIFToken::Tag) while (m_lookahead == CIFToken::ITEM_NAME)
{ {
match(CIFToken::Tag); match(CIFToken::ITEM_NAME);
match(CIFToken::Value); match(CIFToken::VALUE);
} }
} }
...@@ -747,7 +747,7 @@ void sac_parser::parse_datablock() ...@@ -747,7 +747,7 @@ void sac_parser::parse_datablock()
static const std::string kUnitializedCategory("<invalid>"); static const std::string kUnitializedCategory("<invalid>");
std::string cat = kUnitializedCategory; // intial value acts as a guard for empty category names std::string cat = kUnitializedCategory; // intial value acts as a guard for empty category names
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag or m_lookahead == CIFToken::SAVE_NAME) while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::ITEM_NAME or m_lookahead == CIFToken::SAVE_NAME)
{ {
switch (m_lookahead) switch (m_lookahead)
{ {
...@@ -757,12 +757,12 @@ void sac_parser::parse_datablock() ...@@ -757,12 +757,12 @@ void sac_parser::parse_datablock()
match(CIFToken::LOOP); match(CIFToken::LOOP);
std::vector<std::string> tags; std::vector<std::string> item_names;
while (m_lookahead == CIFToken::Tag) while (m_lookahead == CIFToken::ITEM_NAME)
{ {
std::string catName, itemName; std::string catName, itemName;
std::tie(catName, itemName) = split_tag_name(m_token_value); std::tie(catName, itemName) = split_item_name(m_token_value);
if (cat == kUnitializedCategory) if (cat == kUnitializedCategory)
{ {
...@@ -772,19 +772,19 @@ void sac_parser::parse_datablock() ...@@ -772,19 +772,19 @@ void sac_parser::parse_datablock()
else if (not iequals(cat, catName)) else if (not iequals(cat, catName))
error("inconsistent categories in loop_"); error("inconsistent categories in loop_");
tags.push_back(itemName); item_names.push_back(itemName);
match(CIFToken::Tag); match(CIFToken::ITEM_NAME);
} }
while (m_lookahead == CIFToken::Value) while (m_lookahead == CIFToken::VALUE)
{ {
produce_row(); produce_row();
for (auto tag : tags) for (auto item_name : item_names)
{ {
produce_item(cat, tag, m_token_value); produce_item(cat, item_name, m_token_value);
match(CIFToken::Value); match(CIFToken::VALUE);
} }
} }
...@@ -792,10 +792,10 @@ void sac_parser::parse_datablock() ...@@ -792,10 +792,10 @@ void sac_parser::parse_datablock()
break; break;
} }
case CIFToken::Tag: case CIFToken::ITEM_NAME:
{ {
std::string catName, itemName; std::string catName, itemName;
std::tie(catName, itemName) = split_tag_name(m_token_value); std::tie(catName, itemName) = split_item_name(m_token_value);
if (not iequals(cat, catName)) if (not iequals(cat, catName))
{ {
...@@ -804,11 +804,11 @@ void sac_parser::parse_datablock() ...@@ -804,11 +804,11 @@ void sac_parser::parse_datablock()
produce_row(); produce_row();
} }
match(CIFToken::Tag); match(CIFToken::ITEM_NAME);
produce_item(cat, itemName, m_token_value); produce_item(cat, itemName, m_token_value);
match(CIFToken::Value); match(CIFToken::VALUE);
break; break;
} }
......
...@@ -1493,8 +1493,8 @@ bool Remark3Parser::parse(const std::string &expMethod, PDBRecord *r, cif::datab ...@@ -1493,8 +1493,8 @@ bool Remark3Parser::parse(const std::string &expMethod, PDBRecord *r, cif::datab
auto r1 = cat1.front(); auto r1 = cat1.front();
auto r2 = cat2.front(); auto r2 = cat2.front();
for (auto column : cat1.key_fields()) for (auto item : cat1.key_items())
r2[column] = r1[column].text(); r2[item] = r1[item].text();
} }
} }
else else
......
...@@ -237,20 +237,20 @@ void checkAtomRecords(datablock &db) ...@@ -237,20 +237,20 @@ void checkAtomRecords(datablock &db)
{ "auth_comp_id", auth_comp_id.value_or(*label_comp_id) }, { "auth_comp_id", auth_comp_id.value_or(*label_comp_id) },
{ "auth_atom_id", auth_atom_id.value_or(*label_atom_id) } }); { "auth_atom_id", auth_atom_id.value_or(*label_atom_id) } });
// Rewrite the coordinates and other fields that look better in a fixed format // Rewrite the coordinates and other items that look better in a fixed format
// Be careful not to nuke invalidly formatted data here // Be careful not to nuke invalidly formatted data here
for (auto [tag, prec] : std::vector<std::tuple<std::string_view, std::string::size_type>>{ for (auto [item_name, prec] : std::vector<std::tuple<std::string_view, std::string::size_type>>{
{ "cartn_x", 3 }, { "cartn_x", 3 },
{ "cartn_y", 3 }, { "cartn_y", 3 },
{ "cartn_z", 3 }, { "cartn_z", 3 },
{ "occupancy", 2 }, { "occupancy", 2 },
{ "b_iso_or_equiv", 2 } }) { "b_iso_or_equiv", 2 } })
{ {
if (row[tag].empty()) if (row[item_name].empty())
continue; continue;
float v; float v;
auto s = row.get<std::string>(tag); auto s = row.get<std::string>(item_name);
if (auto [ptr, ec] = cif::from_chars(s.data(), s.data() + s.length(), v); ec != std::errc()) if (auto [ptr, ec] = cif::from_chars(s.data(), s.data() + s.length(), v); ec != std::errc())
continue; continue;
...@@ -259,7 +259,7 @@ void checkAtomRecords(datablock &db) ...@@ -259,7 +259,7 @@ void checkAtomRecords(datablock &db)
char b[12]; char b[12];
if (auto [ptr, ec] = cif::to_chars(b, b + sizeof(b), v, cif::chars_format::fixed, prec); ec == std::errc()) if (auto [ptr, ec] = cif::to_chars(b, b + sizeof(b), v, cif::chars_format::fixed, prec); ec == std::errc())
row.assign(tag, { b, static_cast<std::string::size_type>(ptr - b) }, false, false); row.assign(item_name, { b, static_cast<std::string::size_type>(ptr - b) }, false, false);
} }
} }
} }
...@@ -267,22 +267,22 @@ void checkAtomRecords(datablock &db) ...@@ -267,22 +267,22 @@ void checkAtomRecords(datablock &db)
auto *cv = atom_site.get_cat_validator(); auto *cv = atom_site.get_cat_validator();
if (cv) if (cv)
{ {
// See if there are columns that are no longer known // See if there are items that are no longer known
for (auto tag : atom_site.get_columns()) for (auto item_name : atom_site.get_items())
{ {
if (cv->get_validator_for_item(tag) != nullptr) if (cv->get_validator_for_item(item_name) != nullptr)
continue; continue;
auto r = atom_site.find_first(key(tag) != null); auto r = atom_site.find_first(key(item_name) != null);
if (not r) if (not r)
{ {
if (cif::VERBOSE > 0) if (cif::VERBOSE > 0)
std::clog << "Dropping unknown column " << tag << '\n'; std::clog << "Dropping unknown item " << item_name << '\n';
atom_site.remove_column(tag); atom_site.remove_item(item_name);
} }
else if (cif::VERBOSE > 0) else if (cif::VERBOSE > 0)
std::clog << "Keeping unknown column " << std::quoted(tag) << " in atom_site since it is not empty\n"; std::clog << "Keeping unknown item " << std::quoted(item_name) << " in atom_site since it is not empty\n";
} }
} }
} }
...@@ -311,10 +311,10 @@ void createEntity(datablock &db) ...@@ -311,10 +311,10 @@ void createEntity(datablock &db)
auto &cf = compound_factory::instance(); auto &cf = compound_factory::instance();
auto &atom_site = db["atom_site"]; auto &atom_site = db["atom_site"];
atom_site.add_column("label_entity_id"); atom_site.add_item("label_entity_id");
auto &struct_asym = db["struct_asym"]; auto &struct_asym = db["struct_asym"];
struct_asym.add_column("entity_id"); struct_asym.add_item("entity_id");
std::map<std::string, std::vector<std::tuple<std::string, int>>> asyms; std::map<std::string, std::vector<std::tuple<std::string, int>>> asyms;
...@@ -617,7 +617,7 @@ void comparePolySeqSchemes(datablock &db) ...@@ -617,7 +617,7 @@ void comparePolySeqSchemes(datablock &db)
auto &ndb_poly_seq_scheme = db["ndb_poly_seq_scheme"]; auto &ndb_poly_seq_scheme = db["ndb_poly_seq_scheme"];
auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"]; auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"];
// Since often ndb_poly_seq_scheme only contains an id and mon_id field // Since often ndb_poly_seq_scheme only contains an id and mon_id item
// we assume that it should match the accompanying pdbx_poly_seq // we assume that it should match the accompanying pdbx_poly_seq
std::vector<std::string> asym_ids_ndb, asym_ids_pdbx; std::vector<std::string> asym_ids_ndb, asym_ids_pdbx;
...@@ -722,6 +722,7 @@ void reconstruct_pdbx(file &file, std::string_view dictionary) ...@@ -722,6 +722,7 @@ void reconstruct_pdbx(file &file, std::string_view dictionary)
std::vector<std::string> invalidCategories; std::vector<std::string> invalidCategories;
// clean up each category
for (auto &cat : db) for (auto &cat : db)
{ {
try try
...@@ -730,21 +731,21 @@ void reconstruct_pdbx(file &file, std::string_view dictionary) ...@@ -730,21 +731,21 @@ void reconstruct_pdbx(file &file, std::string_view dictionary)
if (not cv) if (not cv)
continue; continue;
// Start by renaming columns that may have old names based on alias info // Start by renaming items that may have old names based on alias info
for (auto tag : cat.get_columns()) for (auto item_name : cat.get_items())
{ {
auto iv = cv->get_validator_for_item(tag); auto iv = cv->get_validator_for_item(item_name);
if (iv) // know, must be OK then if (iv) // know, must be OK then`
continue; continue;
iv = cv->get_validator_for_aliased_item(tag); iv = cv->get_validator_for_aliased_item(item_name);
if (not iv) if (not iv)
continue; continue;
if (cif::VERBOSE > 0) if (cif::VERBOSE > 0)
std::clog << "Renaming " << tag << " to " << iv->m_tag << " in category " << cat.name() << '\n'; std::clog << "Renaming " << item_name << " to " << iv->m_item_name << " in category " << cat.name() << '\n';
cat.rename_column(tag, iv->m_tag); cat.rename_item(item_name, iv->m_item_name);
} }
for (auto link : validator.get_links_for_child(cat.name())) for (auto link : validator.get_links_for_child(cat.name()))
...@@ -767,14 +768,38 @@ void reconstruct_pdbx(file &file, std::string_view dictionary) ...@@ -767,14 +768,38 @@ void reconstruct_pdbx(file &file, std::string_view dictionary)
} }
} }
// Fill in all mandatory fields // Fill in all mandatory items
for (auto key : cv->m_mandatory_fields) for (auto key : cv->m_mandatory_items)
{ {
if (not cat.has_column(key)) if (not cat.has_item(key))
{ {
if (cif::VERBOSE > 0) if (cif::VERBOSE > 0)
std::clog << "Adding mandatory key " << key << " to category " << cat.name() << '\n'; std::clog << "Adding mandatory key " << key << " to category " << cat.name() << '\n';
cat.add_column(key); cat.add_item(key);
}
}
// validate all values, and if they do not validate replace the content with an unknown flag
for (auto item_name : cat.get_items())
{
auto iv = cv->get_validator_for_item(item_name);
if (not iv)
continue;
auto ix = cat.get_item_ix(item_name);
for (auto row : cat)
{
std::error_code ec;
std::string_view value = row[ix].text();
if (not iv->validate_value(value, ec))
{
if (cif::VERBOSE > 0)
std::clog << "Replacing value (" << std::quoted(value) << ") for item " << item_name << " since it does not validate\n";
row[ix] = "?";
}
} }
} }
...@@ -834,7 +859,7 @@ void reconstruct_pdbx(file &file, std::string_view dictionary) ...@@ -834,7 +859,7 @@ void reconstruct_pdbx(file &file, std::string_view dictionary)
if (cif::VERBOSE > 0) if (cif::VERBOSE > 0)
std::clog << "Attempt to fix " << cat.name() << " failed: " << ex.what() << '\n'; std::clog << "Attempt to fix " << cat.name() << " failed: " << ex.what() << '\n';
// replace fields that do not define a relation to a parent // replace items that do not define a relation to a parent
std::set<std::string> replaceableKeys; std::set<std::string> replaceableKeys;
for (auto key : cv->m_keys) for (auto key : cv->m_keys)
......
...@@ -69,26 +69,67 @@ condition get_parents_condition(const validator &validator, row_handle rh, const ...@@ -69,26 +69,67 @@ condition get_parents_condition(const validator &validator, row_handle rh, const
bool is_valid_pdbx_file(const file &file, std::string_view dictionary) bool is_valid_pdbx_file(const file &file, std::string_view dictionary)
{ {
using namespace cif::literals; std::error_code ec;
bool result = is_valid_pdbx_file(file, dictionary, ec);
if (ec != std::errc())
throw std::system_error(ec);
return result;
}
bool is_valid_pdbx_file(const file &file, std::error_code &ec)
{
bool result = false;
if (file.empty())
ec = make_error_code(validation_error::empty_file);
else
{
std::string dictionary = "mmcif_pdbx";
for (auto &db : file)
{
auto audit_conform = db.get("audit_conform");
if (audit_conform == nullptr)
continue;
if (not audit_conform->empty())
{
auto specified_dict = audit_conform->front()["dict_name"];
if (not specified_dict.empty())
dictionary = specified_dict.as<std::string>();
}
auto &cf = cif::compound_factory::instance(); break;
auto &validator = cif::validator_factory::instance().operator[](dictionary); }
result = is_valid_pdbx_file(file, dictionary, ec);
}
return result;
}
bool is_valid_pdbx_file(const file &file, std::string_view dictionary, std::error_code &ec)
{
using namespace cif::literals;
bool result = true; bool result = true;
try try
{ {
auto &cf = cif::compound_factory::instance();
auto &validator = cif::validator_factory::instance().operator[](dictionary);
if (file.empty()) if (file.empty())
throw validation_error("Empty file"); throw std::runtime_error("Empty file");
auto &db = file.front(); auto &db = file.front();
if (db.empty()) if (db.empty())
throw validation_error("Empty datablock"); throw std::runtime_error("Empty datablock");
auto &atom_site = db["atom_site"]; auto &atom_site = db["atom_site"];
if (atom_site.empty()) if (atom_site.empty())
throw validation_error("Empty or missing atom_site category"); throw std::runtime_error("Empty or missing atom_site category");
auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"]; auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"];
...@@ -111,29 +152,29 @@ bool is_valid_pdbx_file(const file &file, std::string_view dictionary) ...@@ -111,29 +152,29 @@ bool is_valid_pdbx_file(const file &file, std::string_view dictionary)
auto p = pdbx_poly_seq_scheme.find(get_parents_condition(validator, r, pdbx_poly_seq_scheme)); auto p = pdbx_poly_seq_scheme.find(get_parents_condition(validator, r, pdbx_poly_seq_scheme));
if (p.size() != 1) if (p.size() != 1)
throw validation_error("For each residue in atom_site that is a residue in a polymer there should be exactly one pdbx_poly_seq_scheme record"); throw std::runtime_error("For each residue in atom_site that is a residue in a polymer there should be exactly one pdbx_poly_seq_scheme record");
} }
auto &entity = db["entity"]; auto &entity = db["entity"];
if (entity.empty()) if (entity.empty())
throw validation_error("Entity category is missing or empty"); throw std::runtime_error("Entity category is missing or empty");
auto &entity_poly = db["entity_poly"]; auto &entity_poly = db["entity_poly"];
if (entity_poly.empty()) if (entity_poly.empty())
throw validation_error("Entity_poly category is missing or empty"); throw std::runtime_error("Entity_poly category is missing or empty");
auto &entity_poly_seq = db["entity_poly_seq"]; auto &entity_poly_seq = db["entity_poly_seq"];
if (entity_poly_seq.empty()) if (entity_poly_seq.empty())
throw validation_error("Entity_poly_seq category is missing or empty"); throw std::runtime_error("Entity_poly_seq category is missing or empty");
auto &struct_asym = db["struct_asym"]; auto &struct_asym = db["struct_asym"];
if (struct_asym.empty()) if (struct_asym.empty())
throw validation_error("struct_asym category is missing or empty"); throw std::runtime_error("struct_asym category is missing or empty");
for (auto entity_id : entity.find<std::string>("type"_key == "polymer", "id")) for (auto entity_id : entity.find<std::string>("type"_key == "polymer", "id"))
{ {
if (entity_poly.count("entity_id"_key == entity_id) != 1) if (entity_poly.count("entity_id"_key == entity_id) != 1)
throw validation_error("There should be exactly one entity_poly record per polymer entity"); throw std::runtime_error("There should be exactly one entity_poly record per polymer entity");
const auto entity_poly_type = entity_poly.find1<std::string>("entity_id"_key == entity_id, "type"); const auto entity_poly_type = entity_poly.find1<std::string>("entity_id"_key == entity_id, "type");
...@@ -151,7 +192,7 @@ bool is_valid_pdbx_file(const file &file, std::string_view dictionary) ...@@ -151,7 +192,7 @@ bool is_valid_pdbx_file(const file &file, std::string_view dictionary)
"seq_id"_key == num and "seq_id"_key == num and
"hetero"_key == hetero) != 1) "hetero"_key == hetero) != 1)
{ {
throw validation_error("For each entity_poly_seq record there should be exactly one pdbx_poly_seq record"); throw std::runtime_error("For each entity_poly_seq record there should be exactly one pdbx_poly_seq record");
} }
} }
} }
...@@ -163,11 +204,11 @@ bool is_valid_pdbx_file(const file &file, std::string_view dictionary) ...@@ -163,11 +204,11 @@ bool is_valid_pdbx_file(const file &file, std::string_view dictionary)
"num"_key == seq_id and "num"_key == seq_id and
"hetero"_key == hetero) != 1) "hetero"_key == hetero) != 1)
{ {
throw validation_error("For each pdbx_poly_seq/struct_asym record there should be exactly one entity_poly_seq record"); throw std::runtime_error("For each pdbx_poly_seq/struct_asym record there should be exactly one entity_poly_seq record");
} }
if ((mon_per_seq_id[seq_id].size() > 1) != hetero) if ((mon_per_seq_id[seq_id].size() > 1) != hetero)
throw validation_error("Mismatch between the hetero flag in the poly seq schemes and the number residues per seq_id"); throw std::runtime_error("Mismatch between the hetero flag in the poly seq schemes and the number residues per seq_id");
} }
for (const auto &[seq_id, mon_ids] : mon_per_seq_id) for (const auto &[seq_id, mon_ids] : mon_per_seq_id)
...@@ -184,7 +225,7 @@ bool is_valid_pdbx_file(const file &file, std::string_view dictionary) ...@@ -184,7 +225,7 @@ bool is_valid_pdbx_file(const file &file, std::string_view dictionary)
"label_seq_id"_key == seq_id and not std::move(cond); "label_seq_id"_key == seq_id and not std::move(cond);
if (atom_site.contains(std::move(cond))) if (atom_site.contains(std::move(cond)))
throw validation_error("An atom_site record exists that has no parent in the poly seq scheme categories"); throw std::runtime_error("An atom_site record exists that has no parent in the poly seq scheme categories");
} }
} }
...@@ -250,7 +291,7 @@ bool is_valid_pdbx_file(const file &file, std::string_view dictionary) ...@@ -250,7 +291,7 @@ bool is_valid_pdbx_file(const file &file, std::string_view dictionary)
seq->erase(std::remove_if(seq->begin(), seq->end(), [](char ch) { return std::isspace(ch); }), seq->end()); seq->erase(std::remove_if(seq->begin(), seq->end(), [](char ch) { return std::isspace(ch); }), seq->end());
if (not seq_match(false, seq->begin(), seq->end())) if (not seq_match(false, seq->begin(), seq->end()))
throw validation_error("Sequences do not match for entity " + entity_id); throw std::runtime_error("Sequences do not match for entity " + entity_id);
} }
if (not seq_can.has_value()) if (not seq_can.has_value())
...@@ -263,7 +304,7 @@ bool is_valid_pdbx_file(const file &file, std::string_view dictionary) ...@@ -263,7 +304,7 @@ bool is_valid_pdbx_file(const file &file, std::string_view dictionary)
seq_can->erase(std::remove_if(seq_can->begin(), seq_can->end(), [](char ch) { return std::isspace(ch); }), seq_can->end()); seq_can->erase(std::remove_if(seq_can->begin(), seq_can->end(), [](char ch) { return std::isspace(ch); }), seq_can->end());
if (not seq_match(true, seq_can->begin(), seq_can->end())) if (not seq_match(true, seq_can->begin(), seq_can->end()))
throw validation_error("Canonical sequences do not match for entity " + entity_id); throw std::runtime_error("Canonical sequences do not match for entity " + entity_id);
} }
} }
...@@ -275,6 +316,7 @@ bool is_valid_pdbx_file(const file &file, std::string_view dictionary) ...@@ -275,6 +316,7 @@ bool is_valid_pdbx_file(const file &file, std::string_view dictionary)
result = false; result = false;
if (cif::VERBOSE > 0) if (cif::VERBOSE > 0)
std::clog << ex.what() << '\n'; std::clog << ex.what() << '\n';
ec = make_error_code(validation_error::not_valid_pdbx);
} }
return result; return result;
......
...@@ -29,44 +29,44 @@ ...@@ -29,44 +29,44 @@
namespace cif namespace cif
{ {
void row_handle::assign(uint16_t column, std::string_view value, bool updateLinked, bool validate) void row_handle::assign(uint16_t item, std::string_view value, bool updateLinked, bool validate)
{ {
if (not m_category) if (not m_category)
throw std::runtime_error("uninitialized row"); throw std::runtime_error("uninitialized row");
m_category->update_value(m_row, column, value, updateLinked, validate); m_category->update_value(m_row, item, value, updateLinked, validate);
} }
uint16_t row_handle::get_column_ix(std::string_view name) const uint16_t row_handle::get_item_ix(std::string_view name) const
{ {
if (not m_category) if (not m_category)
throw std::runtime_error("uninitialized row"); throw std::runtime_error("uninitialized row");
return m_category->get_column_ix(name); return m_category->get_item_ix(name);
} }
std::string_view row_handle::get_column_name(uint16_t ix) const std::string_view row_handle::get_item_name(uint16_t ix) const
{ {
if (not m_category) if (not m_category)
throw std::runtime_error("uninitialized row"); throw std::runtime_error("uninitialized row");
return m_category->get_column_name(ix); return m_category->get_item_name(ix);
} }
uint16_t row_handle::add_column(std::string_view name) uint16_t row_handle::add_item(std::string_view name)
{ {
if (not m_category) if (not m_category)
throw std::runtime_error("uninitialized row"); throw std::runtime_error("uninitialized row");
return m_category->add_column(name); return m_category->add_item(name);
} }
void row_handle::swap(uint16_t column, row_handle &b) void row_handle::swap(uint16_t item, row_handle &b)
{ {
if (not m_category) if (not m_category)
throw std::runtime_error("uninitialized row"); throw std::runtime_error("uninitialized row");
m_category->swap_item(column, *this, b); m_category->swap_item(item, *this, b);
} }
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -86,7 +86,7 @@ row_initializer::row_initializer(row_handle rh) ...@@ -86,7 +86,7 @@ row_initializer::row_initializer(row_handle rh)
auto &i = r->operator[](ix); auto &i = r->operator[](ix);
if (not i) if (not i)
continue; continue;
emplace_back(cat.get_column_name(ix), i.text()); emplace_back(cat.get_item_name(ix), i.text());
} }
} }
......
...@@ -215,19 +215,19 @@ std::string trim_copy(std::string_view s) ...@@ -215,19 +215,19 @@ std::string trim_copy(std::string_view s)
// -------------------------------------------------------------------- // --------------------------------------------------------------------
std::tuple<std::string, std::string> split_tag_name(std::string_view tag) std::tuple<std::string, std::string> split_item_name(std::string_view item_name)
{ {
if (tag.empty()) if (item_name.empty())
throw std::runtime_error("empty tag"); throw std::runtime_error("empty item_name");
if (tag[0] != '_') if (item_name[0] != '_')
throw std::runtime_error("tag '" + std::string { tag } + "' does not start with underscore"); throw std::runtime_error("item_name '" + std::string { item_name } + "' does not start with underscore");
auto s = tag.find('.'); auto s = item_name.find('.');
if (s == std::string::npos) if (s == std::string::npos)
// throw std::runtime_error("tag does not contain dot (" + std::string{ tag } + ')'); // throw std::runtime_error("item_name does not contain dot (" + std::string{ item_name } + ')');
return std::tuple<std::string, std::string>{ "", tag.substr(1) }; return std::tuple<std::string, std::string>{ "", item_name.substr(1) };
else else
return std::tuple<std::string, std::string>{tag.substr(1, s - 1), tag.substr(s + 1)}; return std::tuple<std::string, std::string>{item_name.substr(1, s - 1), item_name.substr(s + 1)};
} }
// -------------------------------------------------------------------- // --------------------------------------------------------------------
......
...@@ -39,10 +39,10 @@ ...@@ -39,10 +39,10 @@
// the code will use boost::regex instead. // the code will use boost::regex instead.
#if USE_BOOST_REGEX #if USE_BOOST_REGEX
#include <boost/regex.hpp> # include <boost/regex.hpp>
using boost::regex; using boost::regex;
#else #else
#include <regex> # include <regex>
using std::regex; using std::regex;
#endif #endif
...@@ -57,20 +57,11 @@ struct regex_impl : public regex ...@@ -57,20 +57,11 @@ struct regex_impl : public regex
} }
}; };
validation_error::validation_error(const std::string &msg)
: m_msg(msg)
{
}
validation_error::validation_error(const std::string &cat, const std::string &item, const std::string &msg)
: m_msg("When validating _" + cat + '.' + item + ": " + msg)
{
}
// -------------------------------------------------------------------- // --------------------------------------------------------------------
DDL_PrimitiveType map_to_primitive_type(std::string_view s) DDL_PrimitiveType map_to_primitive_type(std::string_view s, std::error_code &ec) noexcept
{ {
ec = {};
DDL_PrimitiveType result; DDL_PrimitiveType result;
if (iequals(s, "char")) if (iequals(s, "char"))
result = DDL_PrimitiveType::Char; result = DDL_PrimitiveType::Char;
...@@ -79,7 +70,16 @@ DDL_PrimitiveType map_to_primitive_type(std::string_view s) ...@@ -79,7 +70,16 @@ DDL_PrimitiveType map_to_primitive_type(std::string_view s)
else if (iequals(s, "numb")) else if (iequals(s, "numb"))
result = DDL_PrimitiveType::Numb; result = DDL_PrimitiveType::Numb;
else else
throw validation_error("Not a known primitive type"); ec = make_error_code(validation_error::not_a_known_primitive_type);
return result;
}
DDL_PrimitiveType map_to_primitive_type(std::string_view s)
{
std::error_code ec;
auto result = map_to_primitive_type(s, ec);
if (ec)
throw std::system_error(ec, std::string{ s });
return result; return result;
} }
...@@ -196,39 +196,26 @@ int type_validator::compare(std::string_view a, std::string_view b) const ...@@ -196,39 +196,26 @@ int type_validator::compare(std::string_view a, std::string_view b) const
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// void ValidateItem::addLinked(ValidateItem* parent, const std::string& parentItem, const std::string& childItem)
//{
//// if (mParent != nullptr and VERBOSE)
//// cerr << "replacing parent in " << mCategory->m_name << " from " << mParent->mCategory->m_name << " to " << parent->mCategory->m_name << endl;
//// mParent = parent;
//
// if (m_type == nullptr and parent != nullptr)
// m_type = parent->m_type;
//
// if (parent != nullptr)
// {
// mLinked.push_back({parent, parentItem, childItem});
//
// parent->mChildren.insert(this);
////
//// if (mCategory->mKeys == std::vector<std::string>{mTag})
//// parent->mForeignKeys.insert(this);
// }
//}
void item_validator::operator()(std::string_view value) const void item_validator::operator()(std::string_view value) const
{ {
std::error_code ec;
if (not validate_value(value, ec))
throw std::system_error(ec, std::string{ value } + " does not match rx for " + m_item_name);
}
bool item_validator::validate_value(std::string_view value, std::error_code &ec) const noexcept
{
ec = {};
if (not value.empty() and value != "?" and value != ".") if (not value.empty() and value != "?" and value != ".")
{ {
if (m_type != nullptr and not regex_match(value.begin(), value.end(), *m_type->m_rx)) if (m_type != nullptr and not regex_match(value.begin(), value.end(), *m_type->m_rx))
throw validation_error(m_category->m_name, m_tag, "Value '" + std::string{ value } + "' does not match type expression for type " + m_type->m_name); ec = make_error_code(validation_error::value_does_not_match_rx);
else if (not m_enums.empty() and m_enums.count(std::string{ value }) == 0)
if (not m_enums.empty()) ec = make_error_code(validation_error::value_is_not_in_enumeration_list);
{
if (m_enums.count(std::string{ value }) == 0)
throw validation_error(m_category->m_name, m_tag, "Value '" + std::string{ value } + "' is not in the list of allowed values");
}
} }
return ec == std::errc();
} }
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -236,27 +223,27 @@ void item_validator::operator()(std::string_view value) const ...@@ -236,27 +223,27 @@ void item_validator::operator()(std::string_view value) const
void category_validator::add_item_validator(item_validator &&v) void category_validator::add_item_validator(item_validator &&v)
{ {
if (v.m_mandatory) if (v.m_mandatory)
m_mandatory_fields.insert(v.m_tag); m_mandatory_items.insert(v.m_item_name);
v.m_category = this; v.m_category = this;
auto r = m_item_validators.insert(std::move(v)); auto r = m_item_validators.insert(std::move(v));
if (not r.second and VERBOSE >= 4) if (not r.second and VERBOSE >= 4)
std::cout << "Could not add validator for item " << v.m_tag << " to category " << m_name << '\n'; std::cout << "Could not add validator for item " << v.m_item_name << " to category " << m_name << '\n';
} }
const item_validator *category_validator::get_validator_for_item(std::string_view tag) const const item_validator *category_validator::get_validator_for_item(std::string_view item_name) const
{ {
const item_validator *result = nullptr; const item_validator *result = nullptr;
auto i = m_item_validators.find(item_validator{ std::string(tag) }); auto i = m_item_validators.find(item_validator{ std::string(item_name) });
if (i != m_item_validators.end()) if (i != m_item_validators.end())
result = &*i; result = &*i;
else if (VERBOSE > 4) else if (VERBOSE > 4)
std::cout << "No validator for tag " << tag << '\n'; std::cout << "No validator for item " << item_name << '\n';
return result; return result;
} }
const item_validator *category_validator::get_validator_for_aliased_item(std::string_view tag) const const item_validator *category_validator::get_validator_for_aliased_item(std::string_view item_name) const
{ {
const item_validator *result = nullptr; const item_validator *result = nullptr;
...@@ -264,8 +251,8 @@ const item_validator *category_validator::get_validator_for_aliased_item(std::st ...@@ -264,8 +251,8 @@ const item_validator *category_validator::get_validator_for_aliased_item(std::st
{ {
for (auto &ai : iv.m_aliases) for (auto &ai : iv.m_aliases)
{ {
const auto &[cat, name] = split_tag_name(ai.m_name); const auto &[cat, name] = split_item_name(ai.m_name);
if (name == tag and cat == m_name) if (iequals(name, item_name) and iequals(cat, m_name))
{ {
result = &iv; result = &iv;
break; break;
...@@ -317,19 +304,19 @@ const category_validator *validator::get_validator_for_category(std::string_view ...@@ -317,19 +304,19 @@ const category_validator *validator::get_validator_for_category(std::string_view
return result; return result;
} }
item_validator *validator::get_validator_for_item(std::string_view tag) const item_validator *validator::get_validator_for_item(std::string_view item_name) const
{ {
item_validator *result = nullptr; item_validator *result = nullptr;
std::string cat, item; std::string cat, item;
std::tie(cat, item) = split_tag_name(tag); std::tie(cat, item) = split_item_name(item_name);
auto *cv = get_validator_for_category(cat); auto *cv = get_validator_for_category(cat);
if (cv != nullptr) if (cv != nullptr)
result = const_cast<item_validator *>(cv->get_validator_for_item(item)); result = const_cast<item_validator *>(cv->get_validator_for_item(item));
if (result == nullptr and VERBOSE > 4) if (result == nullptr and VERBOSE > 4)
std::cout << "No validator for item " << tag << '\n'; std::cout << "No validator for item " << item_name << '\n';
return result; return result;
} }
...@@ -354,11 +341,11 @@ void validator::add_link_validator(link_validator &&v) ...@@ -354,11 +341,11 @@ void validator::add_link_validator(link_validator &&v)
auto piv = pcv->get_validator_for_item(v.m_parent_keys[i]); auto piv = pcv->get_validator_for_item(v.m_parent_keys[i]);
if (piv == nullptr) if (piv == nullptr)
throw std::runtime_error("unknown parent tag _" + v.m_parent_category + '.' + v.m_parent_keys[i]); throw std::runtime_error("unknown parent item _" + v.m_parent_category + '.' + v.m_parent_keys[i]);
auto civ = ccv->get_validator_for_item(v.m_child_keys[i]); auto civ = ccv->get_validator_for_item(v.m_child_keys[i]);
if (civ == nullptr) if (civ == nullptr)
throw std::runtime_error("unknown child tag _" + v.m_child_category + '.' + v.m_child_keys[i]); throw std::runtime_error("unknown child item _" + v.m_child_category + '.' + v.m_child_keys[i]);
if (civ->m_type == nullptr and piv->m_type != nullptr) if (civ->m_type == nullptr and piv->m_type != nullptr)
const_cast<item_validator *>(civ)->m_type = piv->m_type; const_cast<item_validator *>(civ)->m_type = piv->m_type;
...@@ -373,7 +360,7 @@ std::vector<const link_validator *> validator::get_links_for_parent(std::string_ ...@@ -373,7 +360,7 @@ std::vector<const link_validator *> validator::get_links_for_parent(std::string_
for (auto &l : m_link_validators) for (auto &l : m_link_validators)
{ {
if (l.m_parent_category == category) if (iequals(l.m_parent_category, category))
result.push_back(&l); result.push_back(&l);
} }
...@@ -386,19 +373,41 @@ std::vector<const link_validator *> validator::get_links_for_child(std::string_v ...@@ -386,19 +373,41 @@ std::vector<const link_validator *> validator::get_links_for_child(std::string_v
for (auto &l : m_link_validators) for (auto &l : m_link_validators)
{ {
if (l.m_child_category == category) if (iequals(l.m_child_category, category))
result.push_back(&l); result.push_back(&l);
} }
return result; return result;
} }
void validator::report_error(const std::string &msg, bool fatal) const // void validator::report_error(const std::string &msg, bool fatal) const
// {
// if (m_strict or fatal)
// throw validation_error(msg);
// else if (VERBOSE > 0)
// std::cerr << msg << '\n';
// }
void validator::report_error(std::error_code ec, bool fatal) const
{ {
if (m_strict or fatal) if (m_strict or fatal)
throw validation_error(msg); throw std::system_error(ec);
else if (VERBOSE > 0) else
std::cerr << msg << '\n'; std::cerr << ec.message() << '\n';
}
void validator::report_error(std::error_code ec, std::string_view category,
std::string_view item, bool fatal) const
{
std::ostringstream os;
os << "category: "<< category;
if (not item.empty())
os << "; item: " << item;
if (m_strict or fatal)
throw std::system_error(ec, os.str());
else
std::cerr << ec.message() << ": " << os.str() << '\n';
} }
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -460,12 +469,12 @@ const validator &validator_factory::operator[](std::string_view dictionary_name) ...@@ -460,12 +469,12 @@ const validator &validator_factory::operator[](std::string_view dictionary_name)
if (not std::filesystem::exists(p, ec) or ec) if (not std::filesystem::exists(p, ec) or ec)
{ {
for (const char *dir : { for (const char *dir : {
#if defined(CACHE_DIR) # if defined(CACHE_DIR)
CACHE_DIR, CACHE_DIR,
#endif # endif
#if defined(DATA_DIR) # if defined(DATA_DIR)
DATA_DIR DATA_DIR
#endif # endif
}) })
{ {
auto p2 = std::filesystem::path(dir) / p; auto p2 = std::filesystem::path(dir) / p;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment