Commit e84282cb by Maarten L. Hekkelman

documenting symmetry and text

parent 8b2e02e1
......@@ -44,6 +44,12 @@
#include <zeep/type-traits.hpp>
#endif
/**
* \file text.hpp
*
* Various text manipulating routines
*/
namespace cif
{
......@@ -52,18 +58,40 @@ namespace cif
// some basic utilities: Since we're using ASCII input only, we define for optimisation
// our own case conversion routines.
/// \brief return whether string @a is equal to string @a b ignoring changes in character case
bool iequals(std::string_view a, std::string_view b);
/// \brief compare string @a is to string @a b ignoring changes in character case
int icompare(std::string_view a, std::string_view b);
/// \brief return whether string @a is equal to string @a b ignoring changes in character case
bool iequals(const char *a, const char *b);
/// \brief compare string @a is to string @a b ignoring changes in character case
int icompare(const char *a, const char *b);
/// \brief convert the string @a s to lower case in situ
void to_lower(std::string &s);
/// \brief return a lower case copy of string @a s
std::string to_lower_copy(std::string_view s);
/// \brief convert the string @a s to upper case in situ
void to_upper(std::string &s);
// std::string toUpperCopy(const std::string &s);
/**
* @brief Join the strings in the range [ @a a, @a e ) using
* @a sep as separator
*
* Example usage:
*
* @code {.cpp}
* std::vector<std::string> v{ "aap", "noot", "mies" };
*
* assert(cif::join(v.begin(), v.end(), ", ") == "aap, noot, mies");
* @endcode
*
*/
template <typename IterType>
std::string join(IterType b, IterType e, std::string_view sep)
{
......@@ -91,12 +119,41 @@ std::string join(IterType b, IterType e, std::string_view sep)
return s.str();
}
/**
* @brief Join the strings in the array @a arr using @a sep as separator
*
* Example usage:
*
* @code {.cpp}
* std::list<std::string> v{ "aap", "noot", "mies" };
*
* assert(cif::join(v, ", ") == "aap, noot, mies");
* @endcode
*
*/
template <typename V>
std::string join(const V &arr, std::string_view sep)
{
return join(arr.begin(), arr.end(), sep);
}
/**
* @brief Split the string in @a s based on the characters in @a separators
*
* Each of the characters in @a separators induces a split.
*
* When suppress_empty is true, empty strings are not produced in the
* resulting array.
*
* Example:
*
* @code {.cpp}
* auto v = cif::split("aap:noot,,mies", ":,", true);
*
* assert(v == std::vector{"aap", "noot", "mies"});
* @endcode
*
*/
template <typename StringType = std::string_view>
std::vector<StringType> split(std::string_view s, std::string_view separators, bool suppress_empty = false)
{
......@@ -124,15 +181,23 @@ std::vector<StringType> split(std::string_view s, std::string_view separators, b
return result;
}
/**
* @brief Replace all occurrences of @a what in string @a s with the string @a with
*
* The string @a with may be empty in which case each occurrence of @a what is simply
* deleted.
*/
void replace_all(std::string &s, std::string_view what, std::string_view with = {});
#if defined(__cpp_lib_starts_ends_with)
/// \brief return whether string @a s starts with @a with
inline bool starts_with(std::string s, std::string_view with)
{
return s.starts_with(with);
}
/// \brief return whether string @a s ends with @a with
inline bool ends_with(std::string_view s, std::string_view with)
{
return s.ends_with(with);
......@@ -140,11 +205,13 @@ inline bool ends_with(std::string_view s, std::string_view with)
#else
/// \brief return whether string @a s starts with @a with
inline bool starts_with(std::string s, std::string_view with)
{
return s.compare(0, with.length(), with) == 0;
}
/// \brief return whether string @a s ends with @a with
inline bool ends_with(std::string_view s, std::string_view with)
{
return s.length() >= with.length() and s.compare(s.length() - with.length(), with.length(), with) == 0;
......@@ -154,6 +221,7 @@ inline bool ends_with(std::string_view s, std::string_view with)
#if defined(__cpp_lib_string_contains)
/// \brief return whether string @a s contains @a q
inline bool contains(std::string_view s, std::string_view q)
{
return s.contains(q);
......@@ -161,6 +229,7 @@ inline bool contains(std::string_view s, std::string_view q)
#else
/// \brief return whether string @a s contains @a q
inline bool contains(std::string_view s, std::string_view q)
{
return s.find(q) != std::string_view::npos;
......@@ -168,20 +237,33 @@ inline bool contains(std::string_view s, std::string_view q)
#endif
/// \brief return whether string @a s contains @a q ignoring character case
bool icontains(std::string_view s, std::string_view q);
/// \brief trim white space at the start of string @a s in situ
void trim_left(std::string &s);
/// \brief trim white space at the end of string @a s in situ
void trim_right(std::string &s);
/// \brief trim white space at both the start and the end of string @a s in situ
void trim(std::string &s);
/// \brief return a string trimmed of white space at the start of string @a s
std::string trim_left_copy(std::string_view s);
/// \brief return a string trimmed of white space at the end of string @a s
std::string trim_right_copy(std::string_view s);
/// \brief return a string trimmed of white space at both the start and the end of string @a s
std::string trim_copy(std::string_view s);
// To make life easier, we also define iless and iset using iequals
/// \brief an operator object you can use to compare strings ignoring their character case
struct iless
{
/// \brief return the result of icompare for @a a and @a b
bool operator()(const std::string &a, const std::string &b) const
{
return icompare(a, b) < 0;
......@@ -196,8 +278,10 @@ using iset = std::set<std::string, iless>;
// --------------------------------------------------------------------
// This really makes a difference, having our own tolower routines
/// \brief global list containing the lower case version of each ASCII character
extern CIFPP_EXPORT const uint8_t kCharToLowerMap[256];
/// \brief a very fast tolower implementation
inline char tolower(int ch)
{
return static_cast<char>(kCharToLowerMap[static_cast<uint8_t>(ch)]);
......@@ -205,22 +289,37 @@ inline char tolower(int ch)
// --------------------------------------------------------------------
/** \brief return a tuple consisting of the category and item name for @a tag
*
* The category name is stripped of its leading underscore character.
*
* If no dot character was found, the category name is empty. That's for
* cif 1.0 formatted data.
*/
std::tuple<std::string, std::string> split_tag_name(std::string_view tag);
// --------------------------------------------------------------------
// generate a cif name, mainly used to generate asym_id's
/// \brief generate a cif name, used e.g. to generate asym_id's
std::string cif_id_for_number(int number);
// --------------------------------------------------------------------
// custom wordwrapping routine
/** \brief custom word wrapping routine.
*
* Wrap the text in @a text based on a maximum line width @a width using
* a dynamic programming approach to get the most efficient filling of
* the space.
*/
std::vector<std::string> word_wrap(const std::string &text, size_t width);
// --------------------------------------------------------------------
/// std::from_chars for floating point types.
/// \brief std::from_chars for floating point types.
///
/// These are optional, there's a selected_charconv class below that selects
/// the best option to used based on support by the stl library
/// the best option to use based on support by the stl library.
///
/// I.e. that in case of GNU < 12 (or something) the cif implementation will
/// be used, all other cases will use the stl version.
......@@ -345,6 +444,7 @@ std::from_chars_result from_chars(const char *first, const char *last, FloatType
return result;
}
/// \brief duplication of std::chars_format for deficient STL implementations
enum class chars_format
{
scientific = 1,
......@@ -353,6 +453,7 @@ enum class chars_format
general = fixed | scientific
};
/// \brief a simplistic implementation of std::to_chars for old STL implementations
template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
std::to_chars_result to_chars(char *first, char *last, FloatType &value, chars_format fmt)
{
......@@ -392,6 +493,7 @@ std::to_chars_result to_chars(char *first, char *last, FloatType &value, chars_f
return result;
}
/// \brief a simplistic implementation of std::to_chars for old STL implementations
template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
std::to_chars_result to_chars(char *first, char *last, FloatType &value, chars_format fmt, int precision)
{
......@@ -431,6 +533,7 @@ std::to_chars_result to_chars(char *first, char *last, FloatType &value, chars_f
return result;
}
/// \brief class that uses our implementation of std::from_chars and std::to_chars
template <typename T>
struct my_charconv
{
......@@ -445,6 +548,7 @@ struct my_charconv
}
};
/// \brief class that uses the STL implementation of std::from_chars and std::to_chars
template <typename T>
struct std_charconv
{
......@@ -459,9 +563,16 @@ struct std_charconv
}
};
/// \brief helper to find a from_chars function
template <typename T>
using from_chars_function = decltype(std::from_chars(std::declval<const char *>(), std::declval<const char *>(), std::declval<T &>()));
/**
* @brief Helper to select the best implementation of charconv based on availability of the
* function in the std:: namespace
*
* @tparam T The type for which we want to find a from_chars/to_chars function
*/
template <typename T>
using selected_charconv = typename std::conditional_t<std::experimental::is_detected_v<from_chars_function, T>, std_charconv<T>, my_charconv<T>>;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment