Commit 1f6b86d5 by Maarten L. Hekkelman

Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop

parents 31499b97 7f39d401
......@@ -36,6 +36,7 @@
#include <regex>
#include <set>
#include <sstream>
#include <shared_mutex>
#include "cif++/CifUtils.hpp"
......@@ -142,13 +143,13 @@ class Item
Item() {}
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
Item(const std::string &name, const T &value)
Item(const std::string_view name, const T &value)
: mName(name)
, mValue(std::to_string(value))
{
}
Item(const std::string &name, const std::string &value)
Item(const std::string_view name, const std::string_view value)
: mName(name)
, mValue(value)
{
......@@ -221,7 +222,7 @@ class Datablock
using iterator = CategoryList::iterator;
using const_iterator = CategoryList::const_iterator;
Datablock(const std::string &name);
Datablock(const std::string_view name);
~Datablock();
Datablock(const Datablock &) = delete;
......@@ -230,8 +231,6 @@ class Datablock
std::string getName() const { return mName; }
void setName(const std::string &n) { mName = n; }
std::string firstItem(const std::string &tag) const;
iterator begin() { return mCategories.begin(); }
iterator end() { return mCategories.end(); }
......@@ -245,7 +244,7 @@ class Datablock
bool isValid();
void validateLinks() const;
void setValidator(Validator *v);
void setValidator(const Validator *v);
// this one only looks up a Category, returns nullptr if it does not exist
const Category *get(std::string_view name) const;
......@@ -256,7 +255,7 @@ class Datablock
void write(std::ostream &os);
// convenience function, add a line to the software category
void add_software(const std::string &name, const std::string &classification,
void add_software(const std::string_view name, const std::string &classification,
const std::string &versionNr, const std::string &versionDate);
friend bool operator==(const Datablock &lhs, const Datablock &rhs);
......@@ -264,9 +263,10 @@ class Datablock
friend std::ostream& operator<<(std::ostream &os, const Datablock &data);
private:
std::list<Category> mCategories;
CategoryList mCategories; // LRU
mutable std::shared_mutex mLock;
std::string mName;
Validator *mValidator;
const Validator *mValidator;
Datablock *mNext;
};
......@@ -1816,12 +1816,12 @@ class Category
friend class Row;
friend class detail::ItemReference;
Category(Datablock &db, const std::string &name, Validator *Validator);
Category(Datablock &db, const std::string_view name, const Validator *Validator);
Category(const Category &) = delete;
Category &operator=(const Category &) = delete;
~Category();
const std::string name() const { return mName; }
const std::string &name() const { return mName; }
using iterator = iterator_impl<Row>;
using const_iterator = iterator_impl<const Row>;
......@@ -2064,7 +2064,7 @@ class Category
Datablock &db() { return mDb; }
void setValidator(Validator *v);
void setValidator(const Validator *v);
iset fields() const;
iset mandatoryFields() const;
......@@ -2121,14 +2121,24 @@ class Category
size_t addColumn(std::string_view name);
struct Linked
{
Category *linked;
const ValidateLink *v;
};
void updateLinks();
Datablock &mDb;
std::string mName;
Validator *mValidator;
const Validator *mValidator;
const ValidateCategory *mCatValidator = nullptr;
std::vector<ItemColumn> mColumns;
ItemRow *mHead;
ItemRow *mTail;
class CatIndex *mIndex;
std::vector<Linked> mParentLinks, mChildLinks;
};
// --------------------------------------------------------------------
......@@ -2162,7 +2172,8 @@ class File
void loadDictionary(); // load the default dictionary, that is mmcifDdl in this case
void loadDictionary(const char *dict); // load one of the compiled in dictionaries
void loadDictionary(std::istream &is); // load dictionary from input stream
void setValidator(const Validator *v);
bool isValid();
void validateLinks() const;
......@@ -2226,10 +2237,8 @@ class File
void getTagOrder(std::vector<std::string> &tags) const;
private:
void setValidator(Validator *v);
Datablock *mHead;
Validator *mValidator;
const Validator *mValidator;
};
// --------------------------------------------------------------------
......
......@@ -28,8 +28,8 @@
#include "cif++/Cif++.hpp"
#include <stack>
#include <map>
#include <stack>
namespace cif
{
......@@ -39,7 +39,7 @@ namespace cif
class CifParserError : public std::runtime_error
{
public:
CifParserError(uint32_t lineNr, const std::string& message);
CifParserError(uint32_t lineNr, const std::string &message);
};
// --------------------------------------------------------------------
......@@ -48,7 +48,8 @@ extern const uint32_t kMaxLineLength;
extern const uint8_t kCharTraitsTable[128];
enum CharTraitsMask: uint8_t {
enum CharTraitsMask : uint8_t
{
kOrdinaryMask = 1 << 0,
kNonBlankMask = 1 << 1,
kTextLeadMask = 1 << 2,
......@@ -75,13 +76,13 @@ inline bool isTextLead(int ch)
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kTextLeadMask) != 0;
}
inline bool isAnyPrint(int ch)
inline bool isAnyPrint(int ch)
{
return ch == '\t' or
(ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
return ch == '\t' or
(ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
}
inline bool isUnquotedString(const char* s)
inline bool isUnquotedString(const char *s)
{
bool result = isOrdinary(*s++);
while (result and *s != 0)
......@@ -94,7 +95,7 @@ inline bool isUnquotedString(const char* s)
// --------------------------------------------------------------------
using DatablockIndex = std::map<std::string,std::size_t>;
using DatablockIndex = std::map<std::string, std::size_t>;
// --------------------------------------------------------------------
// sac Parser, analogous to SAX Parser (simple api for xml)
......@@ -102,15 +103,15 @@ using DatablockIndex = std::map<std::string,std::size_t>;
class SacParser
{
public:
SacParser(std::istream& is, bool init = true);
SacParser(std::istream &is, bool init = true);
virtual ~SacParser() {}
enum CIFToken
{
eCIFTokenUnknown,
eCIFTokenEOF,
eCIFTokenDATA,
eCIFTokenLOOP,
eCIFTokenGLOBAL,
......@@ -120,7 +121,7 @@ class SacParser
eCIFTokenValue,
};
static const char* kTokenName[];
static const char *kTokenName[];
enum CIFValueType
{
......@@ -133,40 +134,39 @@ class SacParser
eCIFValueUnknown
};
static const char* kValueName[];
static const char *kValueName[];
int getNextChar();
void retract();
void restart();
CIFToken getNextToken();
void match(CIFToken token);
bool parseSingleDatablock(const std::string& datablock);
bool parseSingleDatablock(const std::string &datablock);
DatablockIndex indexDatablocks();
bool parseSingleDatablock(const std::string& datablock, const DatablockIndex &index);
bool parseSingleDatablock(const std::string &datablock, const DatablockIndex &index);
void parseFile();
void parseGlobal();
void parseDataBlock();
virtual void parseSaveFrame();
void parseDictionary();
void error(const std::string& msg);
void error(const std::string &msg);
// production methods, these are pure virtual here
virtual void produceDatablock(const std::string& name) = 0;
virtual void produceCategory(const std::string& name) = 0;
virtual void produceDatablock(const std::string &name) = 0;
virtual void produceCategory(const std::string &name) = 0;
virtual void produceRow() = 0;
virtual void produceItem(const std::string& category, const std::string& item, const std::string& value) = 0;
virtual void produceItem(const std::string &category, const std::string &item, const std::string &value) = 0;
protected:
enum State
{
eStateStart,
......@@ -181,21 +181,21 @@ class SacParser
eStateTextField,
eStateFloat = 100,
eStateInt = 110,
// eStateNumericSuffix = 200,
// eStateNumericSuffix = 200,
eStateValue = 300
};
std::istream& mData;
std::istream &mData;
// Parser state
bool mValidate;
uint32_t mLineNr;
bool mBol;
int mState, mStart;
CIFToken mLookahead;
std::string mTokenValue;
CIFValueType mTokenType;
std::stack<int> mBuffer;
bool mValidate;
uint32_t mLineNr;
bool mBol;
int mState, mStart;
CIFToken mLookahead;
std::string mTokenValue;
CIFValueType mTokenType;
std::stack<int> mBuffer;
};
// --------------------------------------------------------------------
......@@ -203,18 +203,18 @@ class SacParser
class Parser : public SacParser
{
public:
Parser(std::istream& is, File& f, bool init = true);
Parser(std::istream &is, File &f, bool init = true);
virtual void produceDatablock(const std::string& name);
virtual void produceCategory(const std::string& name);
virtual void produceDatablock(const std::string &name);
virtual void produceCategory(const std::string &name);
virtual void produceRow();
virtual void produceItem(const std::string& category, const std::string& item, const std::string& value);
virtual void produceItem(const std::string &category, const std::string &item, const std::string &value);
protected:
File& mFile;
Datablock* mDataBlock;
Datablock::iterator mCat;
Row mRow;
File &mFile;
Datablock *mDataBlock;
Datablock::iterator mCat;
Row mRow;
};
// --------------------------------------------------------------------
......@@ -222,23 +222,21 @@ class Parser : public SacParser
class DictParser : public Parser
{
public:
DictParser(Validator& validator, std::istream& is);
DictParser(Validator &validator, std::istream &is);
~DictParser();
void loadDictionary();
private:
private:
virtual void parseSaveFrame();
bool collectItemTypes();
void linkItems();
Validator& mValidator;
File mFile;
struct DictParserDataImpl* mImpl;
bool mCollectedItemTypes = false;
Validator &mValidator;
File mFile;
struct DictParserDataImpl *mImpl;
bool mCollectedItemTypes = false;
};
}
} // namespace cif
......@@ -67,9 +67,7 @@ std::string get_version_nr();
// some basic utilities: Since we're using ASCII input only, we define for optimisation
// our own case conversion routines.
// bool iequals(const std::string &a, const std::string &b);
bool iequals(std::string_view a, std::string_view b);
// int icompare(const std::string &a, const std::string &b);
int icompare(std::string_view a, std::string_view b);
bool iequals(const char *a, const char *b);
......
......@@ -38,6 +38,7 @@ namespace cif
{
struct ValidateCategory;
class ValidatorFactory;
// --------------------------------------------------------------------
......@@ -154,9 +155,8 @@ struct ValidateLink
class Validator
{
public:
friend class DictParser;
Validator();
Validator(std::string_view name, std::istream &is);
~Validator();
Validator(const Validator &rhs) = delete;
......@@ -165,6 +165,9 @@ class Validator
Validator(Validator &&rhs);
Validator &operator=(Validator &&rhs);
friend class DictParser;
friend class ValidatorFactory;
void addTypeValidator(ValidateType &&v);
const ValidateType *getValidatorForType(std::string_view typeCode) const;
......@@ -175,7 +178,7 @@ class Validator
std::vector<const ValidateLink *> getLinksForParent(std::string_view category) const;
std::vector<const ValidateLink *> getLinksForChild(std::string_view category) const;
void reportError(const std::string &msg, bool fatal);
void reportError(const std::string &msg, bool fatal) const;
std::string dictName() const { return mName; }
void dictName(const std::string &name) { mName = name; }
......@@ -184,6 +187,7 @@ class Validator
void dictVersion(const std::string &version) { mVersion = version; }
private:
// name is fully qualified here:
ValidateItem *getValidatorForItem(std::string_view name) const;
......@@ -196,4 +200,27 @@ class Validator
std::vector<ValidateLink> mLinkValidators;
};
// --------------------------------------------------------------------
class ValidatorFactory
{
public:
static ValidatorFactory &instance()
{
return sInstance;
}
const Validator &operator[](std::string_view dictionary);
private:
static ValidatorFactory sInstance;
ValidatorFactory();
std::mutex mMutex;
std::list<Validator> mValidators;
};
} // namespace cif
......@@ -109,12 +109,12 @@ class Atom
float occupancy() const;
template <typename T>
T property(const std::string &name) const;
T property(const std::string_view name) const;
void property(const std::string &name, const std::string &value);
void property(const std::string_view name, const std::string &value);
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
void property(const std::string &name, const T &value)
void property(const std::string_view name, const T &value)
{
property(name, std::to_string(value));
}
......@@ -404,7 +404,7 @@ class File : public std::enable_shared_from_this<File>
File(const File &) = delete;
File &operator=(const File &) = delete;
cif::Datablock& createDatablock(const std::string &name);
cif::Datablock& createDatablock(const std::string_view name);
void load(const std::filesystem::path &path);
void save(const std::filesystem::path &path);
......
......@@ -37,6 +37,11 @@ namespace mmcif
// --------------------------------------------------------------------
enum class SpacegroupName
{
full, xHM, Hall
};
struct Spacegroup
{
const char* name;
......@@ -133,6 +138,7 @@ CIFPP_EXPORT extern const std::size_t kSymopNrTableSize;
// --------------------------------------------------------------------
int GetSpacegroupNumber(std::string spacegroup); // alternative for clipper's parsing code
int GetSpacegroupNumber(std::string spacegroup); // alternative for clipper's parsing code, using SpacegroupName::full
int GetSpacegroupNumber(std::string spacegroup, SpacegroupName type); // alternative for clipper's parsing code
}
......@@ -33,6 +33,7 @@
#include <stack>
#include <tuple>
#include <unordered_map>
#include <shared_mutex>
#include <filesystem>
......@@ -351,7 +352,7 @@ namespace detail
// --------------------------------------------------------------------
// Datablock implementation
Datablock::Datablock(const std::string &name)
Datablock::Datablock(const std::string_view name)
: mName(name)
, mValidator(nullptr)
, mNext(nullptr)
......@@ -363,43 +364,42 @@ Datablock::~Datablock()
delete mNext;
}
std::string Datablock::firstItem(const std::string &tag) const
auto Datablock::emplace(std::string_view name) -> std::tuple<iterator, bool>
{
std::string result;
// LRU code
std::string catName, itemName;
std::tie(catName, itemName) = splitTagName(tag);
std::shared_lock lock(mLock);
for (auto &cat : mCategories)
bool isNew = true;
auto i = begin();
while (i != end())
{
if (iequals(cat.name(), catName))
if (iequals(name, i->name()))
{
for (auto row : cat)
isNew = false;
if (i != begin())
{
result = row[itemName].as<std::string>();
break;
auto n = std::next(i);
mCategories.splice(begin(), mCategories, i, n);
}
break;
}
}
return result;
}
auto Datablock::emplace(std::string_view name) -> std::tuple<iterator, bool>
{
bool isNew = false;
iterator i = find_if(begin(), end(), [name](const Category &cat) -> bool
{ return iequals(cat.name(), name); });
++i;
}
if (i == end())
if (isNew)
{
isNew = true;
i = mCategories.emplace(end(), *this, std::string(name), mValidator);
mCategories.emplace(begin(), *this, std::string(name), mValidator);
for (auto &cat : mCategories)
cat.updateLinks();
}
return std::make_tuple(i, isNew);
return std::make_tuple(begin(), isNew);
}
Category &Datablock::operator[](std::string_view name)
......@@ -411,22 +411,34 @@ Category &Datablock::operator[](std::string_view name)
Category *Datablock::get(std::string_view name)
{
auto i = find_if(begin(), end(), [name](const Category &cat) -> bool
{ return iequals(cat.name(), name); });
std::shared_lock lock(mLock);
return i == end() ? nullptr : &*i;
for (auto &cat : mCategories)
{
if (iequals(cat.name(), name))
return &cat;
}
return nullptr;
}
const Category *Datablock::get(std::string_view name) const
{
auto i = find_if(begin(), end(), [name](const Category &cat) -> bool
{ return iequals(cat.name(), name); });
std::shared_lock lock(mLock);
return i == end() ? nullptr : &*i;
for (auto &cat : mCategories)
{
if (iequals(cat.name(), name))
return &cat;
}
return nullptr;
}
bool Datablock::isValid()
{
std::shared_lock lock(mLock);
if (mValidator == nullptr)
throw std::runtime_error("Validator not specified");
......@@ -438,20 +450,26 @@ bool Datablock::isValid()
void Datablock::validateLinks() const
{
std::shared_lock lock(mLock);
for (auto &cat : *this)
cat.validateLinks();
}
void Datablock::setValidator(Validator *v)
void Datablock::setValidator(const Validator *v)
{
std::shared_lock lock(mLock);
mValidator = v;
for (auto &cat : *this)
cat.setValidator(v);
}
void Datablock::add_software(const std::string &name, const std::string &classification, const std::string &versionNr, const std::string &versionDate)
void Datablock::add_software(const std::string_view name, const std::string &classification, const std::string &versionNr, const std::string &versionDate)
{
std::shared_lock lock(mLock);
Category &cat = operator[]("software");
auto ordNr = cat.size() + 1;
// TODO: should we check this ordinal number???
......@@ -465,12 +483,16 @@ void Datablock::add_software(const std::string &name, const std::string &classif
void Datablock::getTagOrder(std::vector<std::string> &tags) const
{
std::shared_lock lock(mLock);
for (auto &cat : *this)
cat.getTagOrder(tags);
}
void Datablock::write(std::ostream &os)
{
std::shared_lock lock(mLock);
os << "data_" << mName << std::endl
<< "# " << std::endl;
......@@ -505,6 +527,8 @@ void Datablock::write(std::ostream &os)
void Datablock::write(std::ostream &os, const std::vector<std::string> &order)
{
std::shared_lock lock(mLock);
os << "data_" << mName << std::endl
<< "# " << std::endl;
......@@ -580,6 +604,9 @@ void Datablock::write(std::ostream &os, const std::vector<std::string> &order)
bool operator==(const cif::Datablock &dbA, const cif::Datablock &dbB)
{
std::shared_lock lockA(dbA.mLock);
std::shared_lock lockB(dbB.mLock);
std::vector<std::string> catA, catB;
for (auto &cat : dbA)
......@@ -1311,7 +1338,7 @@ RowSet &RowSet::orderBy(std::initializer_list<std::string> items)
// --------------------------------------------------------------------
Category::Category(Datablock &db, const std::string &name, Validator *Validator)
Category::Category(Datablock &db, const std::string_view name, const Validator *Validator)
: mDb(db)
, mName(name)
, mValidator(Validator)
......@@ -1346,7 +1373,7 @@ Category::~Category()
delete mIndex;
}
void Category::setValidator(Validator *v)
void Category::setValidator(const Validator *v)
{
mValidator = v;
......@@ -1371,6 +1398,33 @@ void Category::setValidator(Validator *v)
}
else
mCatValidator = nullptr;
updateLinks();
}
void Category::updateLinks()
{
mChildLinks.clear();
mParentLinks.clear();
if (mValidator != nullptr)
{
for (auto link : mValidator->getLinksForParent(mName))
{
auto childCat = mDb.get(link->mChildCategory);
if (childCat == nullptr)
continue;
mChildLinks.push_back({ childCat, link });
}
for (auto link : mValidator->getLinksForChild(mName))
{
auto parentCat = mDb.get(link->mParentCategory);
if (parentCat == nullptr)
continue;
mParentLinks.push_back({ parentCat, link });
}
}
}
bool Category::hasColumn(std::string_view name) const
......@@ -1816,12 +1870,8 @@ auto Category::erase(iterator pos) -> iterator
if (mValidator != nullptr)
{
for (auto &link : mValidator->getLinksForParent(mName))
for (auto &&[childCat, link] : mChildLinks)
{
auto childCat = mDb.get(link->mChildCategory);
if (childCat == nullptr)
continue;
Condition cond;
for (size_t ix = 0; ix < link->mParentKeys.size(); ++ix)
......@@ -1959,12 +2009,8 @@ bool Category::isOrphan(Row r)
return false;
bool isOrphan = true;
for (auto &link : mValidator->getLinksForChild(mName))
for (auto &&[parentCat, link] : mParentLinks)
{
auto parentCat = mDb.get(link->mParentCategory);
if (parentCat == nullptr)
continue;
Condition cond;
for (size_t ix = 0; ix < link->mChildKeys.size(); ++ix)
{
......@@ -1995,12 +2041,8 @@ bool Category::hasChildren(Row r) const
bool result = false;
for (auto &link : mValidator->getLinksForParent(mName))
for (auto &&[childCat, link] : mChildLinks)
{
auto childCat = mDb.get(link->mChildCategory);
if (childCat == nullptr)
continue;
Condition cond;
for (size_t ix = 0; ix < link->mParentKeys.size(); ++ix)
......@@ -2026,12 +2068,8 @@ bool Category::hasParents(Row r) const
bool result = false;
for (auto &link : mValidator->getLinksForChild(mName))
for (auto &&[parentCat, link] : mParentLinks)
{
auto parentCat = mDb.get(link->mParentCategory);
if (parentCat == nullptr)
continue;
Condition cond;
for (size_t ix = 0; ix < link->mChildKeys.size(); ++ix)
......@@ -2240,23 +2278,17 @@ bool Category::isValid()
void Category::validateLinks() const
{
auto &validator = getValidator();
for (auto linkValidator : validator.getLinksForChild(mName))
for (auto &&[parentCat, link] : mParentLinks)
{
auto parent = mDb.get(linkValidator->mParentCategory);
if (parent == nullptr)
continue;
size_t missing = 0;
for (auto r : *this)
if (not hasParent(r, *parent, *linkValidator))
if (not hasParent(r, *parentCat, *link))
++missing;
if (missing)
{
std::cerr << "Links for " << linkValidator->mLinkGroupLabel << " are incomplete" << std::endl
<< " There are " << missing << " items in " << mName << " that don't have matching parent items in " << parent->mName << std::endl;
std::cerr << "Links for " << link->mLinkGroupLabel << " are incomplete" << std::endl
<< " There are " << missing << " items in " << mName << " that don't have matching parent items in " << parentCat->mName << std::endl;
}
}
}
......@@ -2697,17 +2729,10 @@ void Category::update_value(RowSet &&rows, const std::string &tag, const std::st
row.assign(colIx, value, true);
// see if we need to update any child categories that depend on this value
auto &validator = getValidator();
auto &db = mDb;
for (auto parent : rows)
{
for (auto linked : validator.getLinksForParent(mName))
for (auto &&[childCat, linked] : mChildLinks)
{
auto childCat = db.get(linked->mChildCategory);
if (childCat == nullptr)
continue;
if (std::find(linked->mParentKeys.begin(), linked->mParentKeys.end(), tag) == linked->mParentKeys.end())
continue;
......@@ -2864,18 +2889,8 @@ void Row::assign(const std::vector<Item> &values)
// auto iv = col.mValidator;
if (mCascade)
{
auto &validator = cat->getValidator();
auto &db = cat->db();
for (auto linked : validator.getLinksForParent(cat->mName))
for (auto &&[childCat, linked] : cat->mChildLinks)
{
auto childCat = db.get(linked->mChildCategory);
if (childCat == nullptr)
continue;
// if (find(linked->mParentKeys.begin(), linked->mParentKeys.end(), iv->mTag) == linked->mParentKeys.end())
// continue;
Condition cond;
std::string childTag;
......@@ -3016,15 +3031,8 @@ void Row::assign(size_t column, const std::string &value, bool skipUpdateLinked)
auto iv = col.mValidator;
if (not skipUpdateLinked and iv != nullptr and mCascade)
{
auto &validator = cat->getValidator();
auto &db = cat->db();
for (auto linked : validator.getLinksForParent(cat->mName))
for (auto &&[childCat, linked] : cat->mChildLinks)
{
auto childCat = db.get(linked->mChildCategory);
if (childCat == nullptr)
continue;
if (find(linked->mParentKeys.begin(), linked->mParentKeys.end(), iv->mTag) == linked->mParentKeys.end())
continue;
......@@ -3203,18 +3211,13 @@ void Row::swap(size_t cix, ItemRow *a, ItemRow *b)
auto parentColName = cat->getColumnName(cix);
// see if we need to update any child categories that depend on these values
auto &validator = cat->getValidator();
auto parentCatValidator = cat->getCatValidator();
for (auto &link : validator.getLinksForParent(cat->mName))
for (auto &&[childCat, link] : cat->mChildLinks)
{
if (find(link->mParentKeys.begin(), link->mParentKeys.end(), parentColName) == link->mParentKeys.end())
continue;
auto childCat = cat->db().get(link->mChildCategory);
if (childCat == nullptr or childCat->empty())
continue;
auto childCatValidator = childCat->getCatValidator();
if (childCatValidator == nullptr)
continue;
......@@ -3426,7 +3429,6 @@ File::File(File &&rhs)
File::~File()
{
delete mHead;
delete mValidator;
}
void File::append(Datablock *e)
......@@ -3503,7 +3505,7 @@ void File::save(const std::filesystem::path &p)
void File::load(std::istream &is)
{
Validator *saved = mValidator;
auto saved = mValidator;
setValidator(nullptr);
Parser p(is, *this);
......@@ -3518,7 +3520,7 @@ void File::load(std::istream &is)
void File::load(std::istream &is, const std::string &datablock)
{
Validator *saved = mValidator;
auto saved = mValidator;
setValidator(nullptr);
Parser p(is, *this);
......@@ -3607,67 +3609,10 @@ void File::loadDictionary()
void File::loadDictionary(const char *dict)
{
fs::path dict_name(dict);
auto data = loadResource(dict);
if (not data and dict_name.extension().string() != ".dic")
data = loadResource(dict_name.parent_path() / (dict_name.filename().string() + ".dic"));
if (data)
loadDictionary(*data);
else
{
// might be a compressed dictionary on disk
fs::path p = dict;
if (p.extension() == ".dic")
p = p.parent_path() / (p.filename().string() + ".gz");
else
p = p.parent_path() / (p.filename().string() + ".dic.gz");
#if defined(CACHE_DIR) and defined(DATA_DIR)
if (not fs::exists(p))
{
for (const char *dir : {CACHE_DIR, DATA_DIR})
{
auto p2 = fs::path(dir) / p;
if (fs::exists(p2))
{
swap(p, p2);
break;
}
}
}
#endif
if (fs::exists(p))
{
std::ifstream file(p, std::ios::binary);
if (not file.is_open())
throw std::runtime_error("Could not open dictionary (" + p.string() + ")");
io::filtering_stream<io::input> in;
in.push(io::gzip_decompressor());
in.push(file);
loadDictionary(in);
}
else
throw std::runtime_error("Dictionary not found or defined (" + dict_name.string() + ")");
}
}
void File::loadDictionary(std::istream &is)
{
std::unique_ptr<Validator> v(new Validator());
DictParser p(*v, is);
p.loadDictionary();
setValidator(v.release());
setValidator(&ValidatorFactory::instance()[dict]);
}
void File::setValidator(Validator *v)
void File::setValidator(const Validator *v)
{
mValidator = v;
......
......@@ -42,25 +42,25 @@ namespace cif
const uint32_t kMaxLineLength = 132;
const uint8_t kCharTraitsTable[128] = {
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
14, 15, 14, 14, 14, 15, 15, 14, 15, 15, 15, 15, 15, 15, 15, 15, // 2
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 10, 15, 15, 15, 15, // 3
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 4
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 15, 14, 15, 14, // 5
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 6
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, // 7
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
14, 15, 14, 14, 14, 15, 15, 14, 15, 15, 15, 15, 15, 15, 15, 15, // 2
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 10, 15, 15, 15, 15, // 3
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 4
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 15, 14, 15, 14, // 5
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 6
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, // 7
};
// --------------------------------------------------------------------
CifParserError::CifParserError(uint32_t lineNr, const std::string& message)
CifParserError::CifParserError(uint32_t lineNr, const std::string &message)
: std::runtime_error("parse error at line " + std::to_string(lineNr) + ": " + message)
{
}
// --------------------------------------------------------------------
const char* SacParser::kTokenName[] = {
const char *SacParser::kTokenName[] = {
"unknown",
"EOF",
"DATA",
......@@ -69,22 +69,20 @@ const char* SacParser::kTokenName[] = {
"SAVE",
"STOP",
"Tag",
"Value"
};
"Value"};
const char* SacParser::kValueName[] = {
const char *SacParser::kValueName[] = {
"Int",
"Float",
"Numeric",
"String",
"TextField",
"Inapplicable",
"Unknown"
};
"Unknown"};
// --------------------------------------------------------------------
SacParser::SacParser(std::istream& is, bool init)
SacParser::SacParser(std::istream &is, bool init)
: mData(is)
{
mValidate = true;
......@@ -95,7 +93,7 @@ SacParser::SacParser(std::istream& is, bool init)
mLookahead = getNextToken();
}
void SacParser::error(const std::string& msg)
void SacParser::error(const std::string &msg)
{
throw CifParserError(mLineNr, msg);
}
......@@ -114,7 +112,7 @@ int SacParser::getNextChar()
result = mBuffer.top();
mBuffer.pop();
}
// very simple CR/LF translation into LF
if (result == '\r')
{
......@@ -123,12 +121,12 @@ int SacParser::getNextChar()
mBuffer.push(lookahead);
result = '\n';
}
mTokenValue += static_cast<char>(result);
if (result == '\n')
++mLineNr;
if (VERBOSE >= 6)
{
std::cerr << "getNextChar => ";
......@@ -137,7 +135,7 @@ int SacParser::getNextChar()
else
std::cerr << char(result) << std::endl;
}
return result;
}
......@@ -148,7 +146,7 @@ void SacParser::retract()
char ch = mTokenValue.back();
if (ch == '\n')
--mLineNr;
mBuffer.push(ch);
mTokenValue.pop_back();
}
......@@ -157,25 +155,25 @@ void SacParser::restart()
{
while (not mTokenValue.empty())
retract();
switch (mStart)
{
case eStateStart:
mState = mStart = eStateFloat;
break;
case eStateFloat:
mState = mStart = eStateInt;
break;
case eStateInt:
mState = mStart = eStateValue;
break;
default:
error("Invalid state in SacParser");
}
mBol = false;
}
......@@ -183,26 +181,26 @@ void SacParser::match(SacParser::CIFToken t)
{
if (mLookahead != t)
error(std::string("Unexpected token, expected ") + kTokenName[t] + " but found " + kTokenName[mLookahead]);
mLookahead = getNextToken();
}
SacParser::CIFToken SacParser::getNextToken()
{
const auto kEOF = std::char_traits<char>::eof();
CIFToken result = eCIFTokenUnknown;
int quoteChar = 0;
mState = mStart = eStateStart;
mBol = false;
mTokenValue.clear();
mTokenType = eCIFValueUnknown;
while (result == eCIFTokenUnknown)
{
auto ch = getNextChar();
switch (mState)
{
case eStateStart:
......@@ -233,7 +231,7 @@ SacParser::CIFToken SacParser::getNextToken()
else
restart();
break;
case eStateWhite:
if (ch == kEOF)
result = eCIFTokenEOF;
......@@ -246,7 +244,7 @@ SacParser::CIFToken SacParser::getNextToken()
else
mBol = (ch == '\n');
break;
case eStateComment:
if (ch == '\n')
{
......@@ -259,7 +257,7 @@ SacParser::CIFToken SacParser::getNextToken()
else if (not isAnyPrint(ch))
error("invalid character in comment");
break;
case eStateQuestionMark:
if (isNonBlank(ch))
mState = eStateValue;
......@@ -291,10 +289,10 @@ SacParser::CIFToken SacParser::getNextToken()
else if (ch == kEOF)
error("unterminated textfield");
else if (not isAnyPrint(ch))
// error("invalid character in text field '" + string({ static_cast<char>(ch) }) + "' (" + to_string((int)ch) + ")");
std::cerr << "invalid character in text field '" << std::string({ static_cast<char>(ch) }) << "' (" << ch << ") line: " << mLineNr << std::endl;
// error("invalid character in text field '" + string({ static_cast<char>(ch) }) + "' (" + to_string((int)ch) + ")");
std::cerr << "invalid character in text field '" << std::string({static_cast<char>(ch)}) << "' (" << ch << ") line: " << mLineNr << std::endl;
break;
case eStateTextField + 1:
if (isTextLead(ch) or ch == ' ' or ch == '\t')
mState = eStateTextField;
......@@ -310,7 +308,7 @@ SacParser::CIFToken SacParser::getNextToken()
else if (ch != '\n')
error("invalid character in text field");
break;
case eStateQuotedString:
if (ch == kEOF)
error("unterminated quoted string");
......@@ -319,14 +317,14 @@ SacParser::CIFToken SacParser::getNextToken()
else if (not isAnyPrint(ch))
error("invalid character in quoted string");
break;
case eStateQuotedStringQuote:
if (isWhite(ch))
{
retract();
result = eCIFTokenValue;
mTokenType = eCIFValueString;
assert(mTokenValue.length() >= 3);
mTokenValue = mTokenValue.substr(1, mTokenValue.length() - 2);
}
......@@ -339,7 +337,7 @@ SacParser::CIFToken SacParser::getNextToken()
else
error("invalid character in quoted string");
break;
case eStateTag:
if (not isNonBlank(ch))
{
......@@ -347,7 +345,7 @@ SacParser::CIFToken SacParser::getNextToken()
result = eCIFTokenTag;
}
break;
case eStateFloat:
if (ch == '+' or ch == '-')
{
......@@ -358,11 +356,11 @@ SacParser::CIFToken SacParser::getNextToken()
else
restart();
break;
case eStateFloat + 1:
// if (ch == '(') // numeric???
// mState = eStateNumericSuffix;
// else
// if (ch == '(') // numeric???
// mState = eStateNumericSuffix;
// else
if (ch == '.')
mState = eStateFloat + 2;
else if (tolower(ch) == 'e')
......@@ -376,12 +374,12 @@ SacParser::CIFToken SacParser::getNextToken()
else
restart();
break;
// parsed '.'
case eStateFloat + 2:
// if (ch == '(') // numeric???
// mState = eStateNumericSuffix;
// else
// if (ch == '(') // numeric???
// mState = eStateNumericSuffix;
// else
if (tolower(ch) == 'e')
mState = eStateFloat + 3;
else if (isWhite(ch) or ch == kEOF)
......@@ -393,7 +391,7 @@ SacParser::CIFToken SacParser::getNextToken()
else
restart();
break;
// parsed 'e'
case eStateFloat + 3:
if (ch == '-' or ch == '+')
......@@ -410,11 +408,11 @@ SacParser::CIFToken SacParser::getNextToken()
else
restart();
break;
case eStateFloat + 5:
// if (ch == '(')
// mState = eStateNumericSuffix;
// else
// if (ch == '(')
// mState = eStateNumericSuffix;
// else
if (isWhite(ch) or ch == kEOF)
{
retract();
......@@ -424,14 +422,14 @@ SacParser::CIFToken SacParser::getNextToken()
else
restart();
break;
case eStateInt:
if (isdigit(ch) or ch == '+' or ch == '-')
mState = eStateInt + 1;
else
restart();
break;
case eStateInt + 1:
if (isWhite(ch) or ch == kEOF)
{
......@@ -442,36 +440,36 @@ SacParser::CIFToken SacParser::getNextToken()
else
restart();
break;
// case eStateNumericSuffix:
// if (isdigit(ch))
// mState = eStateNumericSuffix + 1;
// else
// restart();
// break;
//
// case eStateNumericSuffix + 1:
// if (ch == ')')
// {
// result = eCIFTokenValue;
// mTokenType = eCIFValueNumeric;
// }
// else if (not isdigit(ch))
// restart();
// break;
// case eStateNumericSuffix:
// if (isdigit(ch))
// mState = eStateNumericSuffix + 1;
// else
// restart();
// break;
//
// case eStateNumericSuffix + 1:
// if (ch == ')')
// {
// result = eCIFTokenValue;
// mTokenType = eCIFValueNumeric;
// }
// else if (not isdigit(ch))
// restart();
// break;
case eStateValue:
if (isNonBlank(ch))
mState = eStateValue + 1;
else
error("invalid character at this position");
break;
case eStateValue + 1:
if (ch == '_') // first _, check for keywords
if (ch == '_') // first _, check for keywords
{
std::string s = toLowerCopy(mTokenValue);
if (s == "global_")
result = eCIFTokenGLOBAL;
else if (s == "stop_")
......@@ -493,16 +491,16 @@ SacParser::CIFToken SacParser::getNextToken()
if (not isNonBlank(ch))
{
retract();
if (tolower(mTokenValue[0]) == 'd')
result = eCIFTokenDATA;
else
result = eCIFTokenSAVE;
mTokenValue.erase(mTokenValue.begin(), mTokenValue.begin() + 5);
mTokenValue.erase(mTokenValue.begin(), mTokenValue.begin() + 5);
}
break;
default:
assert(false);
error("Invalid state in getNextToken");
......@@ -519,7 +517,7 @@ SacParser::CIFToken SacParser::getNextToken()
std::cerr << " '" << mTokenValue << '\'';
std::cerr << std::endl;
}
return result;
}
......@@ -530,8 +528,15 @@ DatablockIndex SacParser::indexDatablocks()
// first locate the start, as fast as we can
auto &sb = *mData.rdbuf();
enum {
start, comment, string, string_quote, qstring, data, data_name
enum
{
start,
comment,
string,
string_quote,
qstring,
data,
data_name
} state = start;
int quote = 0;
......@@ -547,7 +552,7 @@ DatablockIndex SacParser::indexDatablocks()
case start:
switch (ch)
{
case '#': state = comment; break;
case '#': state = comment; break;
case 'd':
case 'D':
state = data;
......@@ -564,7 +569,7 @@ DatablockIndex SacParser::indexDatablocks()
break;
}
break;
case comment:
if (ch == '\n')
state = start;
......@@ -574,29 +579,29 @@ DatablockIndex SacParser::indexDatablocks()
if (ch == quote)
state = string_quote;
break;
case string_quote:
if (std::isspace(ch))
state = start;
else
state = string;
break;
case qstring:
if (ch == ';' and bol)
state = start;
break;
case data:
if (dblk[si] == 0 and isNonBlank(ch))
{
datablock = { static_cast<char>(ch) };
datablock = {static_cast<char>(ch)};
state = data_name;
}
else if (dblk[si++] != ch)
state = start;
break;
case data_name:
if (isNonBlank(ch))
datablock.insert(datablock.end(), char(ch));
......@@ -604,7 +609,7 @@ DatablockIndex SacParser::indexDatablocks()
{
if (not datablock.empty())
index[datablock] = mData.tellg();
state = start;
}
else
......@@ -618,13 +623,19 @@ DatablockIndex SacParser::indexDatablocks()
return index;
}
bool SacParser::parseSingleDatablock(const std::string& datablock)
bool SacParser::parseSingleDatablock(const std::string &datablock)
{
// first locate the start, as fast as we can
auto &sb = *mData.rdbuf();
enum {
start, comment, string, string_quote, qstring, data
enum
{
start,
comment,
string,
string_quote,
qstring,
data
} state = start;
int quote = 0;
......@@ -640,7 +651,7 @@ bool SacParser::parseSingleDatablock(const std::string& datablock)
case start:
switch (ch)
{
case '#': state = comment; break;
case '#': state = comment; break;
case 'd':
case 'D':
state = data;
......@@ -657,7 +668,7 @@ bool SacParser::parseSingleDatablock(const std::string& datablock)
break;
}
break;
case comment:
if (ch == '\n')
state = start;
......@@ -667,19 +678,19 @@ bool SacParser::parseSingleDatablock(const std::string& datablock)
if (ch == quote)
state = string_quote;
break;
case string_quote:
if (std::isspace(ch))
state = start;
else
state = string;
break;
case qstring:
if (ch == ';' and bol)
state = start;
break;
case data:
if (isspace(ch) and dblk[si] == 0)
found = true;
......@@ -701,7 +712,7 @@ bool SacParser::parseSingleDatablock(const std::string& datablock)
return found;
}
bool SacParser::parseSingleDatablock(const std::string& datablock, const DatablockIndex &index)
bool SacParser::parseSingleDatablock(const std::string &datablock, const DatablockIndex &index)
{
bool result = false;
......@@ -729,14 +740,14 @@ void SacParser::parseFile()
case eCIFTokenGLOBAL:
parseGlobal();
break;
case eCIFTokenDATA:
produceDatablock(mTokenValue);
match(eCIFTokenDATA);
parseDataBlock();
break;
default:
error("This file does not seem to be an mmCIF file");
break;
......@@ -757,24 +768,24 @@ void SacParser::parseGlobal()
void SacParser::parseDataBlock()
{
std::string cat;
while (mLookahead == eCIFTokenLOOP or mLookahead == eCIFTokenTag or mLookahead == eCIFTokenSAVE)
{
switch (mLookahead)
{
case eCIFTokenLOOP:
{
cat.clear(); // should start a new category
cat.clear(); // should start a new category
match(eCIFTokenLOOP);
std::vector<std::string> tags;
while (mLookahead == eCIFTokenTag)
{
std::string catName, itemName;
std::tie(catName, itemName) = splitTagName(mTokenValue);
if (cat.empty())
{
produceCategory(catName);
......@@ -782,27 +793,27 @@ void SacParser::parseDataBlock()
}
else if (not iequals(cat, catName))
error("inconsistent categories in loop_");
tags.push_back(itemName);
match(eCIFTokenTag);
}
while (mLookahead == eCIFTokenValue)
{
produceRow();
for (auto tag: tags)
for (auto tag : tags)
{
produceItem(cat, tag, mTokenValue);
match(eCIFTokenValue);
}
}
cat.clear();
break;
}
case eCIFTokenTag:
{
std::string catName, itemName;
......@@ -816,17 +827,17 @@ void SacParser::parseDataBlock()
}
match(eCIFTokenTag);
produceItem(cat, itemName, mTokenValue);
match(eCIFTokenValue);
break;
}
case eCIFTokenSAVE:
parseSaveFrame();
break;
default:
assert(false);
break;
......@@ -841,18 +852,20 @@ void SacParser::parseSaveFrame()
// --------------------------------------------------------------------
Parser::Parser(std::istream& is, File& f, bool init)
: SacParser(is, init), mFile(f), mDataBlock(nullptr)
Parser::Parser(std::istream &is, File &f, bool init)
: SacParser(is, init)
, mFile(f)
, mDataBlock(nullptr)
{
}
void Parser::produceDatablock(const std::string& name)
void Parser::produceDatablock(const std::string &name)
{
mDataBlock = new Datablock(name);
mFile.append(mDataBlock);
}
void Parser::produceCategory(const std::string& name)
void Parser::produceCategory(const std::string &name)
{
if (VERBOSE >= 4)
std::cerr << "producing category " << name << std::endl;
......@@ -870,7 +883,7 @@ void Parser::produceRow()
mRow.lineNr(mLineNr);
}
void Parser::produceItem(const std::string& category, const std::string& item, const std::string& value)
void Parser::produceItem(const std::string &category, const std::string &item, const std::string &value)
{
if (VERBOSE >= 4)
std::cerr << "producing _" << category << '.' << item << " -> " << value << std::endl;
......@@ -886,13 +899,15 @@ void Parser::produceItem(const std::string& category, const std::string& item, c
struct DictParserDataImpl
{
// temporary values for constructing dictionaries
std::vector<ValidateCategory> mCategoryValidators;
std::map<std::string,std::vector<ValidateItem>> mItemValidators;
std::set<std::tuple<std::string,std::string>> mLinkedItems;
std::vector<ValidateCategory> mCategoryValidators;
std::map<std::string, std::vector<ValidateItem>> mItemValidators;
std::set<std::tuple<std::string, std::string>> mLinkedItems;
};
DictParser::DictParser(Validator& validator, std::istream& is)
: Parser(is, mFile), mValidator(validator), mImpl(new DictParserDataImpl)
DictParser::DictParser(Validator &validator, std::istream &is)
: Parser(is, mFile)
, mValidator(validator)
, mImpl(new DictParserDataImpl)
{
}
......@@ -910,9 +925,9 @@ void DictParser::parseSaveFrame()
if (saveFrameName.empty())
error("Invalid save frame, should contain more than just 'save_' here");
bool isCategorySaveFrame = mTokenValue[0] != '_';
Datablock dict(mTokenValue);
Datablock::iterator cat = dict.end();
......@@ -921,37 +936,37 @@ void DictParser::parseSaveFrame()
{
if (mLookahead == eCIFTokenLOOP)
{
cat = dict.end(); // should start a new category
cat = dict.end(); // should start a new category
match(eCIFTokenLOOP);
std::vector<std::string> tags;
while (mLookahead == eCIFTokenTag)
{
std::string catName, itemName;
std::tie(catName, itemName) = splitTagName(mTokenValue);
if (cat == dict.end())
std::tie(cat, std::ignore) = dict.emplace(catName);
else if (not iequals(cat->name(), catName))
error("inconsistent categories in loop_");
tags.push_back(itemName);
match(eCIFTokenTag);
}
while (mLookahead == eCIFTokenValue)
{
cat->emplace({});
auto row = cat->back();
for (auto tag: tags)
for (auto tag : tags)
{
row[tag] = mTokenValue;
match(eCIFTokenValue);
}
}
cat = dict.end();
}
else
......@@ -963,75 +978,78 @@ void DictParser::parseSaveFrame()
std::tie(cat, std::ignore) = dict.emplace(catName);
match(eCIFTokenTag);
if (cat->empty())
cat->emplace({});
cat->back()[itemName] = mTokenValue;
match(eCIFTokenValue);
}
}
match(eCIFTokenSAVE);
if (isCategorySaveFrame)
{
std::string category = dict.firstItem("_category.id");
std::string category;
cif::tie(category) = dict["category"].front().get("id");
std::vector<std::string> keys;
for (auto k: dict["category_key"])
for (auto k : dict["category_key"])
keys.push_back(std::get<1>(splitTagName(k["name"].as<std::string>())));
iset groups;
for (auto g: dict["category_group"])
for (auto g : dict["category_group"])
groups.insert(g["id"].as<std::string>());
mImpl->mCategoryValidators.push_back(ValidateCategory{category, keys, groups});
}
else
{
// if the type code is missing, this must be a pointer, just skip it
std::string typeCode = dict.firstItem("_item_type.code");
std::string typeCode;
cif::tie(typeCode) = dict["item_type"].front().get("code");
const ValidateType* tv = nullptr;
if (not (typeCode.empty() or typeCode == "?"))
const ValidateType *tv = nullptr;
if (not(typeCode.empty() or typeCode == "?"))
tv = mValidator.getValidatorForType(typeCode);
iset ess;
for (auto e: dict["item_enumeration"])
for (auto e : dict["item_enumeration"])
ess.insert(e["value"].as<std::string>());
std::string defaultValue = dict.firstItem("_item_default.value");
std::string defaultValue;
cif::tie(defaultValue) = dict["item_default"].front().get("value");
bool defaultIsNull = false;
if (defaultValue.empty())
{
for (auto& r: dict["_item_default"])
for (auto &r : dict["_item_default"])
{
defaultIsNull = r["value"].is_null();
break;
}
}
// collect the dict from our dataBlock and construct validators
for (auto i: dict["item"])
for (auto i : dict["item"])
{
std::string tagName, category, mandatory;
cif::tie(tagName, category, mandatory) = i.get("name", "category_id", "mandatory_code");
std::string catName, itemName;
std::tie(catName, itemName) = splitTagName(tagName);
if (catName.empty() or itemName.empty())
error("Invalid tag name in _item.name " + tagName);
if (not iequals(category, catName) and not (category.empty() or category == "?"))
if (not iequals(category, catName) and not(category.empty() or category == "?"))
error("specified category id does match the implicit category name for tag '" + tagName + '\'');
else
category = catName;
auto& ivs = mImpl->mItemValidators[category];
auto &ivs = mImpl->mItemValidators[category];
auto vi = find(ivs.begin(), ivs.end(), ValidateItem{itemName});
if (vi == ivs.end())
ivs.push_back(ValidateItem{itemName, iequals(mandatory, "yes"), tv, ess, defaultValue, defaultIsNull});
......@@ -1043,7 +1061,7 @@ void DictParser::parseSaveFrame()
if (VERBOSE > 2)
{
std::cerr << "inconsistent mandatory value for " << tagName << " in dictionary" << std::endl;
if (iequals(tagName, saveFrameName))
std::cerr << "choosing " << mandatory << std::endl;
else
......@@ -1060,7 +1078,7 @@ void DictParser::parseSaveFrame()
std::cerr << "inconsistent type for " << tagName << " in dictionary" << std::endl;
}
// vi->mMandatory = (iequals(mandatory, "yes"));
// vi->mMandatory = (iequals(mandatory, "yes"));
if (vi->mType == nullptr)
vi->mType = tv;
......@@ -1070,14 +1088,14 @@ void DictParser::parseSaveFrame()
// ...
}
}
// collect the dict from our dataBlock and construct validators
for (auto i: dict["item_linked"])
for (auto i : dict["item_linked"])
{
std::string childTagName, parentTagName;
cif::tie(childTagName, parentTagName) = i.get("child_name", "parent_name");
mImpl->mLinkedItems.emplace(childTagName, parentTagName);
}
}
......@@ -1088,20 +1106,20 @@ void DictParser::linkItems()
if (not mDataBlock)
error("no datablock");
auto& dict = *mDataBlock;
auto &dict = *mDataBlock;
// links are identified by a parent category, a child category and a group ID
using key_type = std::tuple<std::string,std::string,int>;
using key_type = std::tuple<std::string, std::string, int>;
std::map<key_type,size_t> linkIndex;
std::map<key_type, size_t> linkIndex;
// Each link group consists of a set of keys
std::vector<std::tuple<std::vector<std::string>,std::vector<std::string>>> linkKeys;
std::vector<std::tuple<std::vector<std::string>, std::vector<std::string>>> linkKeys;
auto addLink = [&](size_t ix, const std::string& pk, const std::string& ck)
auto addLink = [&](size_t ix, const std::string &pk, const std::string &ck)
{
auto&& [pkeys, ckeys] = linkKeys.at(ix);
auto &&[pkeys, ckeys] = linkKeys.at(ix);
bool found = false;
for (size_t i = 0; i < pkeys.size(); ++i)
......@@ -1120,29 +1138,29 @@ void DictParser::linkItems()
}
};
auto& linkedGroupList = dict["pdbx_item_linked_group_list"];
auto &linkedGroupList = dict["pdbx_item_linked_group_list"];
for (auto gl: linkedGroupList)
for (auto gl : linkedGroupList)
{
std::string child, parent;
int link_group_id;
cif::tie(child, parent, link_group_id) = gl.get("child_name", "parent_name", "link_group_id");
auto civ = mValidator.getValidatorForItem(child);
if (civ == nullptr)
error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
auto piv = mValidator.getValidatorForItem(parent);
if (piv == nullptr)
error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
key_type key{ piv->mCategory->mName, civ->mCategory->mName, link_group_id };
key_type key{piv->mCategory->mName, civ->mCategory->mName, link_group_id};
if (not linkIndex.count(key))
{
linkIndex[key] = linkKeys.size();
linkKeys.push_back({});
}
size_t ix = linkIndex.at(key);
addLink(ix, piv->mTag, civ->mTag);
}
......@@ -1151,35 +1169,35 @@ void DictParser::linkItems()
if (linkedGroupList.empty())
{
// for links recorded in categories but not in pdbx_item_linked_group_list
for (auto li: mImpl->mLinkedItems)
for (auto li : mImpl->mLinkedItems)
{
std::string child, parent;
std::tie(child, parent) = li;
auto civ = mValidator.getValidatorForItem(child);
if (civ == nullptr)
error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
auto piv = mValidator.getValidatorForItem(parent);
if (piv == nullptr)
error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
key_type key{ piv->mCategory->mName, civ->mCategory->mName, 0 };
key_type key{piv->mCategory->mName, civ->mCategory->mName, 0};
if (not linkIndex.count(key))
{
linkIndex[key] = linkKeys.size();
linkKeys.push_back({});
}
size_t ix = linkIndex.at(key);
addLink(ix, piv->mTag, civ->mTag);
}
}
auto& linkedGroup = dict["pdbx_item_linked_group"];
auto &linkedGroup = dict["pdbx_item_linked_group"];
// now store the links in the validator
for (auto& kv: linkIndex)
for (auto &kv : linkIndex)
{
ValidateLink link = {};
std::tie(link.mParentCategory, link.mChildCategory, link.mLinkGroupID) = kv.first;
......@@ -1187,7 +1205,7 @@ void DictParser::linkItems()
std::tie(link.mParentKeys, link.mChildKeys) = linkKeys[kv.second];
// look up the label
for (auto r: linkedGroup.find(cif::Key("category_id") == link.mChildCategory and cif::Key("link_group_id") == link.mLinkGroupID))
for (auto r : linkedGroup.find(cif::Key("category_id") == link.mChildCategory and cif::Key("link_group_id") == link.mLinkGroupID))
{
link.mLinkGroupLabel = r["label"].as<std::string>();
break;
......@@ -1197,22 +1215,22 @@ void DictParser::linkItems()
}
// now make sure the itemType is specified for all itemValidators
for (auto& cv: mValidator.mCategoryValidators)
for (auto &cv : mValidator.mCategoryValidators)
{
for (auto& iv: cv.mItemValidators)
for (auto &iv : cv.mItemValidators)
{
if (iv.mType == nullptr)
std::cerr << "Missing item_type for " << iv.mTag << std::endl;
}
}
}
}
void DictParser::loadDictionary()
{
std::unique_ptr<Datablock> dict;
Datablock* savedDatablock = mDataBlock;
Datablock *savedDatablock = mDataBlock;
try
{
while (mLookahead != eCIFTokenEOF)
......@@ -1222,12 +1240,12 @@ void DictParser::loadDictionary()
case eCIFTokenGLOBAL:
parseGlobal();
break;
default:
{
dict.reset(new Datablock(mTokenValue)); // dummy datablock, for constructing the validator only
dict.reset(new Datablock(mTokenValue)); // dummy datablock, for constructing the validator only
mDataBlock = dict.get();
match(eCIFTokenDATA);
parseDataBlock();
break;
......@@ -1235,29 +1253,29 @@ void DictParser::loadDictionary()
}
}
}
catch (const std::exception&)
catch (const std::exception &)
{
std::cerr << "Error parsing dictionary" << std::endl;
throw;
}
// store all validators
for (auto& ic: mImpl->mCategoryValidators)
for (auto &ic : mImpl->mCategoryValidators)
mValidator.addCategoryValidator(std::move(ic));
mImpl->mCategoryValidators.clear();
for (auto& iv: mImpl->mItemValidators)
for (auto &iv : mImpl->mItemValidators)
{
auto cv = mValidator.getValidatorForCategory(iv.first);
if (cv == nullptr)
error("Undefined category '" + iv.first);
for (auto& v: iv.second)
const_cast<ValidateCategory*>(cv)->addItemValidator(std::move(v));
for (auto &v : iv.second)
const_cast<ValidateCategory *>(cv)->addItemValidator(std::move(v));
}
// check all item validators for having a typeValidator
if (dict)
linkItems();
......@@ -1280,47 +1298,45 @@ void DictParser::loadDictionary()
bool DictParser::collectItemTypes()
{
bool result = false;
if (not mDataBlock)
error("no datablock");
auto& dict = *mDataBlock;
for (auto& t: dict["item_type_list"])
auto &dict = *mDataBlock;
for (auto &t : dict["item_type_list"])
{
std::string code, primitiveCode, construct;
cif::tie(code, primitiveCode, construct) = t.get("code", "primitive_code", "construct");
ba::replace_all(construct, "\\n", "\n");
ba::replace_all(construct, "\\t", "\t");
ba::replace_all(construct, "\\\n", "");
try
{
ValidateType v = {
code, mapToPrimitiveType(primitiveCode), boost::regex(construct, boost::regex::extended | boost::regex::optimize)
};
code, mapToPrimitiveType(primitiveCode), boost::regex(construct, boost::regex::extended | boost::regex::optimize)};
mValidator.addTypeValidator(std::move(v));
}
catch (const std::exception&)
catch (const std::exception &)
{
throw_with_nested(CifParserError(t.lineNr(), "error in regular expression"));
}
// Do not replace an already defined type validator, this won't work with pdbx_v40
// as it has a name that is too strict for its own names :-)
// if (mFileImpl.mTypeValidators.count(v))
// mFileImpl.mTypeValidators.erase(v);
// Do not replace an already defined type validator, this won't work with pdbx_v40
// as it has a name that is too strict for its own names :-)
// if (mFileImpl.mTypeValidators.count(v))
// mFileImpl.mTypeValidators.erase(v);
if (VERBOSE >= 5)
std::cerr << "Added type " << code << " (" << primitiveCode << ") => " << construct << std::endl;
result = true;
}
return result;
}
}
} // namespace cif
......@@ -123,19 +123,12 @@ const uint8_t kCharToLowerMap[256] =
// --------------------------------------------------------------------
bool iequals(const std::string &a, const std::string &b)
{
bool result = a.length() == b.length();
for (auto ai = a.begin(), bi = b.begin(); result and ai != a.end() and bi != b.end(); ++ai, ++bi)
result = tolower(*ai) == tolower(*bi);
return result;
}
bool iequals(std::string_view a, std::string_view b)
{
bool result = a.length() == b.length();
for (auto ai = a.begin(), bi = b.begin(); result and ai != a.end() and bi != b.end(); ++ai, ++bi)
result = tolower(*ai) == tolower(*bi);
for (auto ai = a.begin(), bi = b.begin(); result and ai != a.end(); ++ai, ++bi)
result = kCharToLowerMap[uint8_t(*ai)] == kCharToLowerMap[uint8_t(*bi)];
// result = tolower(*ai) == tolower(*bi);
return result;
}
......@@ -148,25 +141,6 @@ bool iequals(const char *a, const char *b)
return result and *a == *b;
}
int icompare(const std::string &a, const std::string &b)
{
int d = 0;
auto ai = a.begin(), bi = b.begin();
for (; d == 0 and ai != a.end() and bi != b.end(); ++ai, ++bi)
d = tolower(*ai) - tolower(*bi);
if (d == 0)
{
if (ai != a.end())
d = 1;
else if (bi != b.end())
d = -1;
}
return d;
}
int icompare(std::string_view a, std::string_view b)
{
int d = 0;
......
......@@ -24,13 +24,20 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <fstream>
#include <filesystem>
#include <boost/algorithm/string.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include "cif++/Cif++.hpp"
#include "cif++/CifParser.hpp"
#include "cif++/CifValidator.hpp"
namespace ba = boost::algorithm;
namespace fs = std::filesystem;
namespace io = boost::iostreams;
extern int VERBOSE;
......@@ -219,8 +226,11 @@ const ValidateItem *ValidateCategory::getValidatorForItem(std::string_view tag)
// --------------------------------------------------------------------
Validator::Validator()
Validator::Validator(std::string_view name, std::istream &is)
: mName(name)
{
DictParser p(*this, is);
p.loadDictionary();
}
Validator::~Validator()
......@@ -340,7 +350,7 @@ std::vector<const ValidateLink *> Validator::getLinksForChild(std::string_view c
return result;
}
void Validator::reportError(const std::string &msg, bool fatal)
void Validator::reportError(const std::string &msg, bool fatal) const
{
if (mStrict or fatal)
throw ValidationError(msg);
......@@ -348,4 +358,78 @@ void Validator::reportError(const std::string &msg, bool fatal)
std::cerr << msg << std::endl;
}
// --------------------------------------------------------------------
ValidatorFactory ValidatorFactory::sInstance;
ValidatorFactory::ValidatorFactory()
{
}
const Validator &ValidatorFactory::operator[](std::string_view dictionary)
{
std::lock_guard lock(mMutex);
for (auto &validator : mValidators)
{
if (iequals(validator.mName, dictionary))
return validator;
}
// not found, add it
fs::path dict_name(dictionary);
auto data = loadResource(dictionary);
if (not data and dict_name.extension().string() != ".dic")
data = loadResource(dict_name.parent_path() / (dict_name.filename().string() + ".dic"));
if (data)
mValidators.emplace_back(dictionary, *data);
else
{
// might be a compressed dictionary on disk
fs::path p = dictionary;
if (p.extension() == ".dic")
p = p.parent_path() / (p.filename().string() + ".gz");
else
p = p.parent_path() / (p.filename().string() + ".dic.gz");
#if defined(CACHE_DIR) and defined(DATA_DIR)
if (not fs::exists(p))
{
for (const char *dir : {CACHE_DIR, DATA_DIR})
{
auto p2 = fs::path(dir) / p;
if (fs::exists(p2))
{
swap(p, p2);
break;
}
}
}
#endif
if (fs::exists(p))
{
std::ifstream file(p, std::ios::binary);
if (not file.is_open())
throw std::runtime_error("Could not open dictionary (" + p.string() + ")");
io::filtering_stream<io::input> in;
in.push(io::gzip_decompressor());
in.push(file);
mValidators.emplace_back(dictionary, in);
}
else
throw std::runtime_error("Dictionary not found or defined (" + dict_name.string() + ")");
}
assert(iequals(mValidators.back().mName, dictionary));
return mValidators.back();
}
} // namespace cif
......@@ -468,14 +468,14 @@ Point CenterPoints(std::vector<Point>& Points)
return t;
}
Point Centroid(std::vector<Point>& Points)
Point Centroid(const std::vector<Point>& pts)
{
Point result;
for (Point& pt : Points)
for (auto &pt : pts)
result += pt;
result /= static_cast<float>(Points.size());
result /= static_cast<float>(pts.size());
return result;
}
......
......@@ -216,9 +216,9 @@ struct AtomImpl
, mLocation(i.mLocation)
, mRefcount(1)
, mRow(i.mRow)
, mCachedRefs(i.mCachedRefs)
, mCompound(i.mCompound)
, mRadius(i.mRadius)
, mCachedProperties(i.mCachedProperties)
, mSymmetryCopy(i.mSymmetryCopy)
, mClone(true)
// , mRTop(i.mRTop), mD(i.mD)
......@@ -270,9 +270,9 @@ struct AtomImpl
, mLocation(loc)
, mRefcount(1)
, mRow(impl.mRow)
, mCachedRefs(impl.mCachedRefs)
, mCompound(impl.mCompound)
, mRadius(impl.mRadius)
, mCachedProperties(impl.mCachedProperties)
, mSymmetryCopy(true)
, mSymmetryOperator(sym_op)
{
......@@ -317,13 +317,15 @@ struct AtomImpl
auto cat = mDb.get("atom_site_anisotrop");
if (cat)
{
auto r = cat->find1(cif::Key("id") == mID);
if (not r.empty())
try
{
result = true;
auto r = cat->find1(cif::Key("id") == mID);
cif::tie(anisou[0], anisou[1], anisou[2], anisou[3], anisou[4], anisou[5]) =
r.get("U[1][1]", "U[1][2]", "U[1][3]", "U[2][2]", "U[2][3]", "U[3][3]");
result = true;
}
catch(const std::exception& e)
{
}
}
......@@ -338,9 +340,9 @@ struct AtomImpl
if (not mClone)
{
mRow["Cartn_x"] = p.getX();
mRow["Cartn_y"] = p.getY();
mRow["Cartn_z"] = p.getZ();
property("Cartn_x", std::to_string(p.getX()));
property("Cartn_y", std::to_string(p.getY()));
property("Cartn_z", std::to_string(p.getZ()));
}
// boost::format kPosFmt("%.3f");
......@@ -382,26 +384,31 @@ struct AtomImpl
return mRadius;
}
const std::string &property(const std::string &name) const
const std::string property(const std::string_view name) const
{
static std::string kEmptyString;
auto i = mCachedProperties.find(name);
if (i == mCachedProperties.end())
for (auto &&[tag, ref] : mCachedRefs)
{
auto v = mRow[name];
if (v.empty())
return kEmptyString;
return mCachedProperties[name] = v.as<std::string>();
if (tag == name)
return ref.as<std::string>();
}
else
return i->second;
mCachedRefs.emplace_back(name, mRow[name]);
return std::get<1>(mCachedRefs.back()).as<std::string>();
}
void property(const std::string &name, const std::string &value)
void property(const std::string_view name, const std::string &value)
{
mRow[name] = value;
for (auto &&[tag, ref] : mCachedRefs)
{
if (tag != name)
continue;
ref = value;
return;
}
mCachedRefs.emplace_back(name, mRow[name]);
std::get<1>(mCachedRefs.back()) = value;
}
int compare(const AtomImpl &b) const
......@@ -432,9 +439,11 @@ struct AtomImpl
Point mLocation;
int mRefcount;
cif::Row mRow;
mutable std::vector<std::tuple<std::string,cif::detail::ItemReference>> mCachedRefs;
mutable const Compound *mCompound = nullptr;
float mRadius = std::nanf("4");
mutable std::map<std::string, std::string> mCachedProperties;
bool mSymmetryCopy = false;
bool mClone = false;
......@@ -533,25 +542,25 @@ const cif::Row Atom::getRowAniso() const
}
template <>
std::string Atom::property<std::string>(const std::string &name) const
std::string Atom::property<std::string>(const std::string_view name) const
{
return impl()->property(name);
}
template <>
int Atom::property<int>(const std::string &name) const
int Atom::property<int>(const std::string_view name) const
{
auto v = impl()->property(name);
return v.empty() ? 0 : stoi(v);
}
template <>
float Atom::property<float>(const std::string &name) const
float Atom::property<float>(const std::string_view name) const
{
return stof(impl()->property(name));
}
void Atom::property(const std::string &name, const std::string &value)
void Atom::property(const std::string_view name, const std::string &value)
{
impl()->property(name, value);
}
......@@ -1736,7 +1745,7 @@ File::~File()
delete mImpl;
}
cif::Datablock& File::createDatablock(const std::string &name)
cif::Datablock& File::createDatablock(const std::string_view name)
{
auto db = new cif::Datablock(name);
......@@ -1807,9 +1816,9 @@ Structure::Structure(File &f, size_t modelNr, StructureOpenOptions options)
}
if (mAtoms.empty())
throw std::runtime_error("No atoms loaded, refuse to continue");
loadData();
std::cerr << "Warning: no atoms loaded" << std::endl;
else
loadData();
}
void Structure::loadAtomsForModel(StructureOpenOptions options)
......
......@@ -90,4 +90,66 @@ int GetSpacegroupNumber(std::string spacegroup)
return result;
}
// --------------------------------------------------------------------
int GetSpacegroupNumber(std::string spacegroup, SpacegroupName type)
{
if (spacegroup == "P 21 21 2 A")
spacegroup = "P 21 21 2 (a)";
else if (spacegroup.empty())
throw std::runtime_error("No spacegroup, cannot continue");
int result = 0;
if (type == SpacegroupName::full)
{
const size_t N = kNrOfSpaceGroups;
int32_t L = 0, R = static_cast<int32_t>(N - 1);
while (L <= R)
{
int32_t i = (L + R) / 2;
int d = spacegroup.compare(kSpaceGroups[i].name);
if (d > 0)
L = i + 1;
else if (d < 0)
R = i - 1;
else
{
result = kSpaceGroups[i].nr;
break;
}
}
}
else if (type == SpacegroupName::xHM)
{
for (auto &sg : kSpaceGroups)
{
if (sg.xHM == spacegroup)
{
result = sg.nr;
break;
}
}
}
else
{
for (auto &sg : kSpaceGroups)
{
if (sg.Hall == spacegroup)
{
result = sg.nr;
break;
}
}
}
// not found, see if we can find a match based on xHM name
if (result == 0)
throw std::runtime_error("Spacegroup name " + spacegroup + " was not found in table");
return result;
}
}
......@@ -18,7 +18,6 @@ int main(int argc, char* argv[])
desc.add_options()
("input,i", po::value<std::string>(), "Input file")
("help,h", "Display help message")
("version", "Print version")
("verbose,v", "Verbose output")
("debug,d", po::value<int>(), "Debug level (for even more verbose output)");
......@@ -29,12 +28,6 @@ int main(int argc, char* argv[])
po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
po::notify(vm);
if (vm.count("version"))
{
std::cout << argv[0] << " version " PACKAGE_VERSION << std::endl;
exit(0);
}
if (vm.count("help") or vm.count("input") == 0)
{
std::cerr << desc << std::endl;
......
......@@ -33,6 +33,7 @@
// #include "cif++/DistanceMap.hpp"
#include "cif++/Cif++.hpp"
#include "cif++/BondMap.hpp"
#include "cif++/CifValidator.hpp"
std::filesystem::path gTestDir = std::filesystem::current_path(); // filled in first test
......@@ -259,8 +260,10 @@ save__cat_2.desc
std::istream is_dict(&buffer);
cif::Validator validator("test", is_dict);
cif::File f;
f.loadDictionary(is_dict);
f.setValidator(&validator);
// --------------------------------------------------------------------
......@@ -387,8 +390,10 @@ save__cat_1.c
std::istream is_dict(&buffer);
cif::Validator validator("test", is_dict);
cif::File f;
f.loadDictionary(is_dict);
f.setValidator(&validator);
// --------------------------------------------------------------------
......@@ -535,8 +540,10 @@ save__cat_2.desc
std::istream is_dict(&buffer);
cif::Validator validator("test", is_dict);
cif::File f;
f.loadDictionary(is_dict);
f.setValidator(&validator);
// --------------------------------------------------------------------
......@@ -741,8 +748,10 @@ save__cat_2.parent_id3
std::istream is_dict(&buffer);
cif::Validator validator("test", is_dict);
cif::File f;
f.loadDictionary(is_dict);
f.setValidator(&validator);
// --------------------------------------------------------------------
......@@ -963,8 +972,10 @@ cat_2 3 cat_2:cat_1:3
std::istream is_dict(&buffer);
cif::Validator validator("test", is_dict);
cif::File f;
f.loadDictionary(is_dict);
f.setValidator(&validator);
// --------------------------------------------------------------------
......@@ -1389,9 +1400,10 @@ cat_2 1 '_cat_2.num' '_cat_3.num' cat_3
} buffer(const_cast<char*>(dict), sizeof(dict) - 1);
std::istream is_dict(&buffer);
cif::Validator validator("test", is_dict);
cif::File f;
f.loadDictionary(is_dict);
f.setValidator(&validator);
// --------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment