Commit 7f39d401 by Maarten L. Hekkelman

Optimised assigning data

parent af412c28
...@@ -244,7 +244,7 @@ class Datablock ...@@ -244,7 +244,7 @@ class Datablock
bool isValid(); bool isValid();
void validateLinks() const; void validateLinks() const;
void setValidator(Validator *v); void setValidator(const Validator *v);
// this one only looks up a Category, returns nullptr if it does not exist // this one only looks up a Category, returns nullptr if it does not exist
const Category *get(std::string_view name) const; const Category *get(std::string_view name) const;
...@@ -266,7 +266,7 @@ class Datablock ...@@ -266,7 +266,7 @@ class Datablock
CategoryList mCategories; // LRU CategoryList mCategories; // LRU
mutable std::shared_mutex mLock; mutable std::shared_mutex mLock;
std::string mName; std::string mName;
Validator *mValidator; const Validator *mValidator;
Datablock *mNext; Datablock *mNext;
}; };
...@@ -1816,7 +1816,7 @@ class Category ...@@ -1816,7 +1816,7 @@ class Category
friend class Row; friend class Row;
friend class detail::ItemReference; friend class detail::ItemReference;
Category(Datablock &db, const std::string_view name, Validator *Validator); Category(Datablock &db, const std::string_view name, const Validator *Validator);
Category(const Category &) = delete; Category(const Category &) = delete;
Category &operator=(const Category &) = delete; Category &operator=(const Category &) = delete;
~Category(); ~Category();
...@@ -2064,7 +2064,7 @@ class Category ...@@ -2064,7 +2064,7 @@ class Category
Datablock &db() { return mDb; } Datablock &db() { return mDb; }
void setValidator(Validator *v); void setValidator(const Validator *v);
iset fields() const; iset fields() const;
iset mandatoryFields() const; iset mandatoryFields() const;
...@@ -2121,14 +2121,24 @@ class Category ...@@ -2121,14 +2121,24 @@ class Category
size_t addColumn(std::string_view name); size_t addColumn(std::string_view name);
struct Linked
{
Category *linked;
const ValidateLink *v;
};
void updateLinks();
Datablock &mDb; Datablock &mDb;
std::string mName; std::string mName;
Validator *mValidator; const Validator *mValidator;
const ValidateCategory *mCatValidator = nullptr; const ValidateCategory *mCatValidator = nullptr;
std::vector<ItemColumn> mColumns; std::vector<ItemColumn> mColumns;
ItemRow *mHead; ItemRow *mHead;
ItemRow *mTail; ItemRow *mTail;
class CatIndex *mIndex; class CatIndex *mIndex;
std::vector<Linked> mParentLinks, mChildLinks;
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -2162,7 +2172,8 @@ class File ...@@ -2162,7 +2172,8 @@ class File
void loadDictionary(); // load the default dictionary, that is mmcifDdl in this case void loadDictionary(); // load the default dictionary, that is mmcifDdl in this case
void loadDictionary(const char *dict); // load one of the compiled in dictionaries void loadDictionary(const char *dict); // load one of the compiled in dictionaries
void loadDictionary(std::istream &is); // load dictionary from input stream
void setValidator(const Validator *v);
bool isValid(); bool isValid();
void validateLinks() const; void validateLinks() const;
...@@ -2226,10 +2237,8 @@ class File ...@@ -2226,10 +2237,8 @@ class File
void getTagOrder(std::vector<std::string> &tags) const; void getTagOrder(std::vector<std::string> &tags) const;
private: private:
void setValidator(Validator *v);
Datablock *mHead; Datablock *mHead;
Validator *mValidator; const Validator *mValidator;
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
......
...@@ -28,8 +28,8 @@ ...@@ -28,8 +28,8 @@
#include "cif++/Cif++.hpp" #include "cif++/Cif++.hpp"
#include <stack>
#include <map> #include <map>
#include <stack>
namespace cif namespace cif
{ {
...@@ -39,7 +39,7 @@ namespace cif ...@@ -39,7 +39,7 @@ namespace cif
class CifParserError : public std::runtime_error class CifParserError : public std::runtime_error
{ {
public: public:
CifParserError(uint32_t lineNr, const std::string& message); CifParserError(uint32_t lineNr, const std::string &message);
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -48,7 +48,8 @@ extern const uint32_t kMaxLineLength; ...@@ -48,7 +48,8 @@ extern const uint32_t kMaxLineLength;
extern const uint8_t kCharTraitsTable[128]; extern const uint8_t kCharTraitsTable[128];
enum CharTraitsMask: uint8_t { enum CharTraitsMask : uint8_t
{
kOrdinaryMask = 1 << 0, kOrdinaryMask = 1 << 0,
kNonBlankMask = 1 << 1, kNonBlankMask = 1 << 1,
kTextLeadMask = 1 << 2, kTextLeadMask = 1 << 2,
...@@ -75,13 +76,13 @@ inline bool isTextLead(int ch) ...@@ -75,13 +76,13 @@ inline bool isTextLead(int ch)
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kTextLeadMask) != 0; return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kTextLeadMask) != 0;
} }
inline bool isAnyPrint(int ch) inline bool isAnyPrint(int ch)
{ {
return ch == '\t' or return ch == '\t' or
(ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0); (ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
} }
inline bool isUnquotedString(const char* s) inline bool isUnquotedString(const char *s)
{ {
bool result = isOrdinary(*s++); bool result = isOrdinary(*s++);
while (result and *s != 0) while (result and *s != 0)
...@@ -94,7 +95,7 @@ inline bool isUnquotedString(const char* s) ...@@ -94,7 +95,7 @@ inline bool isUnquotedString(const char* s)
// -------------------------------------------------------------------- // --------------------------------------------------------------------
using DatablockIndex = std::map<std::string,std::size_t>; using DatablockIndex = std::map<std::string, std::size_t>;
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// sac Parser, analogous to SAX Parser (simple api for xml) // sac Parser, analogous to SAX Parser (simple api for xml)
...@@ -102,15 +103,15 @@ using DatablockIndex = std::map<std::string,std::size_t>; ...@@ -102,15 +103,15 @@ using DatablockIndex = std::map<std::string,std::size_t>;
class SacParser class SacParser
{ {
public: public:
SacParser(std::istream& is, bool init = true); SacParser(std::istream &is, bool init = true);
virtual ~SacParser() {} virtual ~SacParser() {}
enum CIFToken enum CIFToken
{ {
eCIFTokenUnknown, eCIFTokenUnknown,
eCIFTokenEOF, eCIFTokenEOF,
eCIFTokenDATA, eCIFTokenDATA,
eCIFTokenLOOP, eCIFTokenLOOP,
eCIFTokenGLOBAL, eCIFTokenGLOBAL,
...@@ -120,7 +121,7 @@ class SacParser ...@@ -120,7 +121,7 @@ class SacParser
eCIFTokenValue, eCIFTokenValue,
}; };
static const char* kTokenName[]; static const char *kTokenName[];
enum CIFValueType enum CIFValueType
{ {
...@@ -133,40 +134,39 @@ class SacParser ...@@ -133,40 +134,39 @@ class SacParser
eCIFValueUnknown eCIFValueUnknown
}; };
static const char* kValueName[]; static const char *kValueName[];
int getNextChar(); int getNextChar();
void retract(); void retract();
void restart(); void restart();
CIFToken getNextToken(); CIFToken getNextToken();
void match(CIFToken token); void match(CIFToken token);
bool parseSingleDatablock(const std::string& datablock); bool parseSingleDatablock(const std::string &datablock);
DatablockIndex indexDatablocks(); DatablockIndex indexDatablocks();
bool parseSingleDatablock(const std::string& datablock, const DatablockIndex &index); bool parseSingleDatablock(const std::string &datablock, const DatablockIndex &index);
void parseFile(); void parseFile();
void parseGlobal(); void parseGlobal();
void parseDataBlock(); void parseDataBlock();
virtual void parseSaveFrame(); virtual void parseSaveFrame();
void parseDictionary(); void parseDictionary();
void error(const std::string& msg); void error(const std::string &msg);
// production methods, these are pure virtual here // production methods, these are pure virtual here
virtual void produceDatablock(const std::string& name) = 0; virtual void produceDatablock(const std::string &name) = 0;
virtual void produceCategory(const std::string& name) = 0; virtual void produceCategory(const std::string &name) = 0;
virtual void produceRow() = 0; virtual void produceRow() = 0;
virtual void produceItem(const std::string& category, const std::string& item, const std::string& value) = 0; virtual void produceItem(const std::string &category, const std::string &item, const std::string &value) = 0;
protected: protected:
enum State enum State
{ {
eStateStart, eStateStart,
...@@ -181,21 +181,21 @@ class SacParser ...@@ -181,21 +181,21 @@ class SacParser
eStateTextField, eStateTextField,
eStateFloat = 100, eStateFloat = 100,
eStateInt = 110, eStateInt = 110,
// eStateNumericSuffix = 200, // eStateNumericSuffix = 200,
eStateValue = 300 eStateValue = 300
}; };
std::istream& mData; std::istream &mData;
// Parser state // Parser state
bool mValidate; bool mValidate;
uint32_t mLineNr; uint32_t mLineNr;
bool mBol; bool mBol;
int mState, mStart; int mState, mStart;
CIFToken mLookahead; CIFToken mLookahead;
std::string mTokenValue; std::string mTokenValue;
CIFValueType mTokenType; CIFValueType mTokenType;
std::stack<int> mBuffer; std::stack<int> mBuffer;
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -203,18 +203,18 @@ class SacParser ...@@ -203,18 +203,18 @@ class SacParser
class Parser : public SacParser class Parser : public SacParser
{ {
public: public:
Parser(std::istream& is, File& f, bool init = true); Parser(std::istream &is, File &f, bool init = true);
virtual void produceDatablock(const std::string& name); virtual void produceDatablock(const std::string &name);
virtual void produceCategory(const std::string& name); virtual void produceCategory(const std::string &name);
virtual void produceRow(); virtual void produceRow();
virtual void produceItem(const std::string& category, const std::string& item, const std::string& value); virtual void produceItem(const std::string &category, const std::string &item, const std::string &value);
protected: protected:
File& mFile; File &mFile;
Datablock* mDataBlock; Datablock *mDataBlock;
Datablock::iterator mCat; Datablock::iterator mCat;
Row mRow; Row mRow;
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -222,23 +222,21 @@ class Parser : public SacParser ...@@ -222,23 +222,21 @@ class Parser : public SacParser
class DictParser : public Parser class DictParser : public Parser
{ {
public: public:
DictParser(Validator &validator, std::istream &is);
DictParser(Validator& validator, std::istream& is);
~DictParser(); ~DictParser();
void loadDictionary(); void loadDictionary();
private:
private:
virtual void parseSaveFrame(); virtual void parseSaveFrame();
bool collectItemTypes(); bool collectItemTypes();
void linkItems(); void linkItems();
Validator& mValidator; Validator &mValidator;
File mFile; File mFile;
struct DictParserDataImpl* mImpl; struct DictParserDataImpl *mImpl;
bool mCollectedItemTypes = false; bool mCollectedItemTypes = false;
}; };
} } // namespace cif
...@@ -38,6 +38,7 @@ namespace cif ...@@ -38,6 +38,7 @@ namespace cif
{ {
struct ValidateCategory; struct ValidateCategory;
class ValidatorFactory;
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -154,9 +155,8 @@ struct ValidateLink ...@@ -154,9 +155,8 @@ struct ValidateLink
class Validator class Validator
{ {
public: public:
friend class DictParser;
Validator(); Validator(std::string_view name, std::istream &is);
~Validator(); ~Validator();
Validator(const Validator &rhs) = delete; Validator(const Validator &rhs) = delete;
...@@ -165,6 +165,9 @@ class Validator ...@@ -165,6 +165,9 @@ class Validator
Validator(Validator &&rhs); Validator(Validator &&rhs);
Validator &operator=(Validator &&rhs); Validator &operator=(Validator &&rhs);
friend class DictParser;
friend class ValidatorFactory;
void addTypeValidator(ValidateType &&v); void addTypeValidator(ValidateType &&v);
const ValidateType *getValidatorForType(std::string_view typeCode) const; const ValidateType *getValidatorForType(std::string_view typeCode) const;
...@@ -175,7 +178,7 @@ class Validator ...@@ -175,7 +178,7 @@ class Validator
std::vector<const ValidateLink *> getLinksForParent(std::string_view category) const; std::vector<const ValidateLink *> getLinksForParent(std::string_view category) const;
std::vector<const ValidateLink *> getLinksForChild(std::string_view category) const; std::vector<const ValidateLink *> getLinksForChild(std::string_view category) const;
void reportError(const std::string &msg, bool fatal); void reportError(const std::string &msg, bool fatal) const;
std::string dictName() const { return mName; } std::string dictName() const { return mName; }
void dictName(const std::string &name) { mName = name; } void dictName(const std::string &name) { mName = name; }
...@@ -184,6 +187,7 @@ class Validator ...@@ -184,6 +187,7 @@ class Validator
void dictVersion(const std::string &version) { mVersion = version; } void dictVersion(const std::string &version) { mVersion = version; }
private: private:
// name is fully qualified here: // name is fully qualified here:
ValidateItem *getValidatorForItem(std::string_view name) const; ValidateItem *getValidatorForItem(std::string_view name) const;
...@@ -196,4 +200,27 @@ class Validator ...@@ -196,4 +200,27 @@ class Validator
std::vector<ValidateLink> mLinkValidators; std::vector<ValidateLink> mLinkValidators;
}; };
// --------------------------------------------------------------------
class ValidatorFactory
{
public:
static ValidatorFactory &instance()
{
return sInstance;
}
const Validator &operator[](std::string_view dictionary);
private:
static ValidatorFactory sInstance;
ValidatorFactory();
std::mutex mMutex;
std::list<Validator> mValidators;
};
} // namespace cif } // namespace cif
...@@ -392,8 +392,13 @@ auto Datablock::emplace(std::string_view name) -> std::tuple<iterator, bool> ...@@ -392,8 +392,13 @@ auto Datablock::emplace(std::string_view name) -> std::tuple<iterator, bool>
} }
if (isNew) if (isNew)
{
mCategories.emplace(begin(), *this, std::string(name), mValidator); mCategories.emplace(begin(), *this, std::string(name), mValidator);
for (auto &cat : mCategories)
cat.updateLinks();
}
return std::make_tuple(begin(), isNew); return std::make_tuple(begin(), isNew);
} }
...@@ -406,17 +411,28 @@ Category &Datablock::operator[](std::string_view name) ...@@ -406,17 +411,28 @@ Category &Datablock::operator[](std::string_view name)
Category *Datablock::get(std::string_view name) Category *Datablock::get(std::string_view name)
{ {
return &operator[](name); std::shared_lock lock(mLock);
for (auto &cat : mCategories)
{
if (iequals(cat.name(), name))
return &cat;
}
return nullptr;
} }
const Category *Datablock::get(std::string_view name) const const Category *Datablock::get(std::string_view name) const
{ {
std::shared_lock lock(mLock); std::shared_lock lock(mLock);
auto i = find_if(begin(), end(), [name](const Category &cat) -> bool for (auto &cat : mCategories)
{ return iequals(cat.name(), name); }); {
if (iequals(cat.name(), name))
return &cat;
}
return i == end() ? nullptr : &*i; return nullptr;
} }
bool Datablock::isValid() bool Datablock::isValid()
...@@ -440,7 +456,7 @@ void Datablock::validateLinks() const ...@@ -440,7 +456,7 @@ void Datablock::validateLinks() const
cat.validateLinks(); cat.validateLinks();
} }
void Datablock::setValidator(Validator *v) void Datablock::setValidator(const Validator *v)
{ {
std::shared_lock lock(mLock); std::shared_lock lock(mLock);
...@@ -1322,7 +1338,7 @@ RowSet &RowSet::orderBy(std::initializer_list<std::string> items) ...@@ -1322,7 +1338,7 @@ RowSet &RowSet::orderBy(std::initializer_list<std::string> items)
// -------------------------------------------------------------------- // --------------------------------------------------------------------
Category::Category(Datablock &db, const std::string_view name, Validator *Validator) Category::Category(Datablock &db, const std::string_view name, const Validator *Validator)
: mDb(db) : mDb(db)
, mName(name) , mName(name)
, mValidator(Validator) , mValidator(Validator)
...@@ -1357,7 +1373,7 @@ Category::~Category() ...@@ -1357,7 +1373,7 @@ Category::~Category()
delete mIndex; delete mIndex;
} }
void Category::setValidator(Validator *v) void Category::setValidator(const Validator *v)
{ {
mValidator = v; mValidator = v;
...@@ -1382,6 +1398,33 @@ void Category::setValidator(Validator *v) ...@@ -1382,6 +1398,33 @@ void Category::setValidator(Validator *v)
} }
else else
mCatValidator = nullptr; mCatValidator = nullptr;
updateLinks();
}
void Category::updateLinks()
{
mChildLinks.clear();
mParentLinks.clear();
if (mValidator != nullptr)
{
for (auto link : mValidator->getLinksForParent(mName))
{
auto childCat = mDb.get(link->mChildCategory);
if (childCat == nullptr)
continue;
mChildLinks.push_back({ childCat, link });
}
for (auto link : mValidator->getLinksForChild(mName))
{
auto parentCat = mDb.get(link->mParentCategory);
if (parentCat == nullptr)
continue;
mParentLinks.push_back({ parentCat, link });
}
}
} }
bool Category::hasColumn(std::string_view name) const bool Category::hasColumn(std::string_view name) const
...@@ -1827,12 +1870,8 @@ auto Category::erase(iterator pos) -> iterator ...@@ -1827,12 +1870,8 @@ auto Category::erase(iterator pos) -> iterator
if (mValidator != nullptr) if (mValidator != nullptr)
{ {
for (auto &link : mValidator->getLinksForParent(mName)) for (auto &&[childCat, link] : mChildLinks)
{ {
auto childCat = mDb.get(link->mChildCategory);
if (childCat == nullptr)
continue;
Condition cond; Condition cond;
for (size_t ix = 0; ix < link->mParentKeys.size(); ++ix) for (size_t ix = 0; ix < link->mParentKeys.size(); ++ix)
...@@ -1970,12 +2009,8 @@ bool Category::isOrphan(Row r) ...@@ -1970,12 +2009,8 @@ bool Category::isOrphan(Row r)
return false; return false;
bool isOrphan = true; bool isOrphan = true;
for (auto &link : mValidator->getLinksForChild(mName)) for (auto &&[parentCat, link] : mParentLinks)
{ {
auto parentCat = mDb.get(link->mParentCategory);
if (parentCat == nullptr)
continue;
Condition cond; Condition cond;
for (size_t ix = 0; ix < link->mChildKeys.size(); ++ix) for (size_t ix = 0; ix < link->mChildKeys.size(); ++ix)
{ {
...@@ -2006,12 +2041,8 @@ bool Category::hasChildren(Row r) const ...@@ -2006,12 +2041,8 @@ bool Category::hasChildren(Row r) const
bool result = false; bool result = false;
for (auto &link : mValidator->getLinksForParent(mName)) for (auto &&[childCat, link] : mChildLinks)
{ {
auto childCat = mDb.get(link->mChildCategory);
if (childCat == nullptr)
continue;
Condition cond; Condition cond;
for (size_t ix = 0; ix < link->mParentKeys.size(); ++ix) for (size_t ix = 0; ix < link->mParentKeys.size(); ++ix)
...@@ -2037,12 +2068,8 @@ bool Category::hasParents(Row r) const ...@@ -2037,12 +2068,8 @@ bool Category::hasParents(Row r) const
bool result = false; bool result = false;
for (auto &link : mValidator->getLinksForChild(mName)) for (auto &&[parentCat, link] : mParentLinks)
{ {
auto parentCat = mDb.get(link->mParentCategory);
if (parentCat == nullptr)
continue;
Condition cond; Condition cond;
for (size_t ix = 0; ix < link->mChildKeys.size(); ++ix) for (size_t ix = 0; ix < link->mChildKeys.size(); ++ix)
...@@ -2251,23 +2278,17 @@ bool Category::isValid() ...@@ -2251,23 +2278,17 @@ bool Category::isValid()
void Category::validateLinks() const void Category::validateLinks() const
{ {
auto &validator = getValidator(); for (auto &&[parentCat, link] : mParentLinks)
for (auto linkValidator : validator.getLinksForChild(mName))
{ {
auto parent = mDb.get(linkValidator->mParentCategory);
if (parent == nullptr)
continue;
size_t missing = 0; size_t missing = 0;
for (auto r : *this) for (auto r : *this)
if (not hasParent(r, *parent, *linkValidator)) if (not hasParent(r, *parentCat, *link))
++missing; ++missing;
if (missing) if (missing)
{ {
std::cerr << "Links for " << linkValidator->mLinkGroupLabel << " are incomplete" << std::endl std::cerr << "Links for " << link->mLinkGroupLabel << " are incomplete" << std::endl
<< " There are " << missing << " items in " << mName << " that don't have matching parent items in " << parent->mName << std::endl; << " There are " << missing << " items in " << mName << " that don't have matching parent items in " << parentCat->mName << std::endl;
} }
} }
} }
...@@ -2708,17 +2729,10 @@ void Category::update_value(RowSet &&rows, const std::string &tag, const std::st ...@@ -2708,17 +2729,10 @@ void Category::update_value(RowSet &&rows, const std::string &tag, const std::st
row.assign(colIx, value, true); row.assign(colIx, value, true);
// see if we need to update any child categories that depend on this value // see if we need to update any child categories that depend on this value
auto &validator = getValidator();
auto &db = mDb;
for (auto parent : rows) for (auto parent : rows)
{ {
for (auto linked : validator.getLinksForParent(mName)) for (auto &&[childCat, linked] : mChildLinks)
{ {
auto childCat = db.get(linked->mChildCategory);
if (childCat == nullptr)
continue;
if (std::find(linked->mParentKeys.begin(), linked->mParentKeys.end(), tag) == linked->mParentKeys.end()) if (std::find(linked->mParentKeys.begin(), linked->mParentKeys.end(), tag) == linked->mParentKeys.end())
continue; continue;
...@@ -2875,18 +2889,8 @@ void Row::assign(const std::vector<Item> &values) ...@@ -2875,18 +2889,8 @@ void Row::assign(const std::vector<Item> &values)
// auto iv = col.mValidator; // auto iv = col.mValidator;
if (mCascade) if (mCascade)
{ {
auto &validator = cat->getValidator(); for (auto &&[childCat, linked] : cat->mChildLinks)
auto &db = cat->db();
for (auto linked : validator.getLinksForParent(cat->mName))
{ {
auto childCat = db.get(linked->mChildCategory);
if (childCat == nullptr)
continue;
// if (find(linked->mParentKeys.begin(), linked->mParentKeys.end(), iv->mTag) == linked->mParentKeys.end())
// continue;
Condition cond; Condition cond;
std::string childTag; std::string childTag;
...@@ -3027,15 +3031,8 @@ void Row::assign(size_t column, const std::string &value, bool skipUpdateLinked) ...@@ -3027,15 +3031,8 @@ void Row::assign(size_t column, const std::string &value, bool skipUpdateLinked)
auto iv = col.mValidator; auto iv = col.mValidator;
if (not skipUpdateLinked and iv != nullptr and mCascade) if (not skipUpdateLinked and iv != nullptr and mCascade)
{ {
auto &validator = cat->getValidator(); for (auto &&[childCat, linked] : cat->mChildLinks)
auto &db = cat->db();
for (auto linked : validator.getLinksForParent(cat->mName))
{ {
auto childCat = db.get(linked->mChildCategory);
if (childCat == nullptr)
continue;
if (find(linked->mParentKeys.begin(), linked->mParentKeys.end(), iv->mTag) == linked->mParentKeys.end()) if (find(linked->mParentKeys.begin(), linked->mParentKeys.end(), iv->mTag) == linked->mParentKeys.end())
continue; continue;
...@@ -3214,18 +3211,13 @@ void Row::swap(size_t cix, ItemRow *a, ItemRow *b) ...@@ -3214,18 +3211,13 @@ void Row::swap(size_t cix, ItemRow *a, ItemRow *b)
auto parentColName = cat->getColumnName(cix); auto parentColName = cat->getColumnName(cix);
// see if we need to update any child categories that depend on these values // see if we need to update any child categories that depend on these values
auto &validator = cat->getValidator();
auto parentCatValidator = cat->getCatValidator(); auto parentCatValidator = cat->getCatValidator();
for (auto &link : validator.getLinksForParent(cat->mName)) for (auto &&[childCat, link] : cat->mChildLinks)
{ {
if (find(link->mParentKeys.begin(), link->mParentKeys.end(), parentColName) == link->mParentKeys.end()) if (find(link->mParentKeys.begin(), link->mParentKeys.end(), parentColName) == link->mParentKeys.end())
continue; continue;
auto childCat = cat->db().get(link->mChildCategory);
if (childCat == nullptr or childCat->empty())
continue;
auto childCatValidator = childCat->getCatValidator(); auto childCatValidator = childCat->getCatValidator();
if (childCatValidator == nullptr) if (childCatValidator == nullptr)
continue; continue;
...@@ -3437,7 +3429,6 @@ File::File(File &&rhs) ...@@ -3437,7 +3429,6 @@ File::File(File &&rhs)
File::~File() File::~File()
{ {
delete mHead; delete mHead;
delete mValidator;
} }
void File::append(Datablock *e) void File::append(Datablock *e)
...@@ -3514,7 +3505,7 @@ void File::save(const std::filesystem::path &p) ...@@ -3514,7 +3505,7 @@ void File::save(const std::filesystem::path &p)
void File::load(std::istream &is) void File::load(std::istream &is)
{ {
Validator *saved = mValidator; auto saved = mValidator;
setValidator(nullptr); setValidator(nullptr);
Parser p(is, *this); Parser p(is, *this);
...@@ -3529,7 +3520,7 @@ void File::load(std::istream &is) ...@@ -3529,7 +3520,7 @@ void File::load(std::istream &is)
void File::load(std::istream &is, const std::string &datablock) void File::load(std::istream &is, const std::string &datablock)
{ {
Validator *saved = mValidator; auto saved = mValidator;
setValidator(nullptr); setValidator(nullptr);
Parser p(is, *this); Parser p(is, *this);
...@@ -3618,67 +3609,10 @@ void File::loadDictionary() ...@@ -3618,67 +3609,10 @@ void File::loadDictionary()
void File::loadDictionary(const char *dict) void File::loadDictionary(const char *dict)
{ {
fs::path dict_name(dict); setValidator(&ValidatorFactory::instance()[dict]);
auto data = loadResource(dict);
if (not data and dict_name.extension().string() != ".dic")
data = loadResource(dict_name.parent_path() / (dict_name.filename().string() + ".dic"));
if (data)
loadDictionary(*data);
else
{
// might be a compressed dictionary on disk
fs::path p = dict;
if (p.extension() == ".dic")
p = p.parent_path() / (p.filename().string() + ".gz");
else
p = p.parent_path() / (p.filename().string() + ".dic.gz");
#if defined(CACHE_DIR) and defined(DATA_DIR)
if (not fs::exists(p))
{
for (const char *dir : {CACHE_DIR, DATA_DIR})
{
auto p2 = fs::path(dir) / p;
if (fs::exists(p2))
{
swap(p, p2);
break;
}
}
}
#endif
if (fs::exists(p))
{
std::ifstream file(p, std::ios::binary);
if (not file.is_open())
throw std::runtime_error("Could not open dictionary (" + p.string() + ")");
io::filtering_stream<io::input> in;
in.push(io::gzip_decompressor());
in.push(file);
loadDictionary(in);
}
else
throw std::runtime_error("Dictionary not found or defined (" + dict_name.string() + ")");
}
}
void File::loadDictionary(std::istream &is)
{
std::unique_ptr<Validator> v(new Validator());
DictParser p(*v, is);
p.loadDictionary();
setValidator(v.release());
} }
void File::setValidator(Validator *v) void File::setValidator(const Validator *v)
{ {
mValidator = v; mValidator = v;
......
...@@ -42,25 +42,25 @@ namespace cif ...@@ -42,25 +42,25 @@ namespace cif
const uint32_t kMaxLineLength = 132; const uint32_t kMaxLineLength = 132;
const uint8_t kCharTraitsTable[128] = { const uint8_t kCharTraitsTable[128] = {
// 0 1 2 3 4 5 6 7 8 9 a b c d e f // 0 1 2 3 4 5 6 7 8 9 a b c d e f
14, 15, 14, 14, 14, 15, 15, 14, 15, 15, 15, 15, 15, 15, 15, 15, // 2 14, 15, 14, 14, 14, 15, 15, 14, 15, 15, 15, 15, 15, 15, 15, 15, // 2
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 10, 15, 15, 15, 15, // 3 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 10, 15, 15, 15, 15, // 3
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 4 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 4
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 15, 14, 15, 14, // 5 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 15, 14, 15, 14, // 5
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 6 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 6
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, // 7 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, // 7
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
CifParserError::CifParserError(uint32_t lineNr, const std::string& message) CifParserError::CifParserError(uint32_t lineNr, const std::string &message)
: std::runtime_error("parse error at line " + std::to_string(lineNr) + ": " + message) : std::runtime_error("parse error at line " + std::to_string(lineNr) + ": " + message)
{ {
} }
// -------------------------------------------------------------------- // --------------------------------------------------------------------
const char* SacParser::kTokenName[] = { const char *SacParser::kTokenName[] = {
"unknown", "unknown",
"EOF", "EOF",
"DATA", "DATA",
...@@ -69,22 +69,20 @@ const char* SacParser::kTokenName[] = { ...@@ -69,22 +69,20 @@ const char* SacParser::kTokenName[] = {
"SAVE", "SAVE",
"STOP", "STOP",
"Tag", "Tag",
"Value" "Value"};
};
const char* SacParser::kValueName[] = { const char *SacParser::kValueName[] = {
"Int", "Int",
"Float", "Float",
"Numeric", "Numeric",
"String", "String",
"TextField", "TextField",
"Inapplicable", "Inapplicable",
"Unknown" "Unknown"};
};
// -------------------------------------------------------------------- // --------------------------------------------------------------------
SacParser::SacParser(std::istream& is, bool init) SacParser::SacParser(std::istream &is, bool init)
: mData(is) : mData(is)
{ {
mValidate = true; mValidate = true;
...@@ -95,7 +93,7 @@ SacParser::SacParser(std::istream& is, bool init) ...@@ -95,7 +93,7 @@ SacParser::SacParser(std::istream& is, bool init)
mLookahead = getNextToken(); mLookahead = getNextToken();
} }
void SacParser::error(const std::string& msg) void SacParser::error(const std::string &msg)
{ {
throw CifParserError(mLineNr, msg); throw CifParserError(mLineNr, msg);
} }
...@@ -114,7 +112,7 @@ int SacParser::getNextChar() ...@@ -114,7 +112,7 @@ int SacParser::getNextChar()
result = mBuffer.top(); result = mBuffer.top();
mBuffer.pop(); mBuffer.pop();
} }
// very simple CR/LF translation into LF // very simple CR/LF translation into LF
if (result == '\r') if (result == '\r')
{ {
...@@ -123,12 +121,12 @@ int SacParser::getNextChar() ...@@ -123,12 +121,12 @@ int SacParser::getNextChar()
mBuffer.push(lookahead); mBuffer.push(lookahead);
result = '\n'; result = '\n';
} }
mTokenValue += static_cast<char>(result); mTokenValue += static_cast<char>(result);
if (result == '\n') if (result == '\n')
++mLineNr; ++mLineNr;
if (VERBOSE >= 6) if (VERBOSE >= 6)
{ {
std::cerr << "getNextChar => "; std::cerr << "getNextChar => ";
...@@ -137,7 +135,7 @@ int SacParser::getNextChar() ...@@ -137,7 +135,7 @@ int SacParser::getNextChar()
else else
std::cerr << char(result) << std::endl; std::cerr << char(result) << std::endl;
} }
return result; return result;
} }
...@@ -148,7 +146,7 @@ void SacParser::retract() ...@@ -148,7 +146,7 @@ void SacParser::retract()
char ch = mTokenValue.back(); char ch = mTokenValue.back();
if (ch == '\n') if (ch == '\n')
--mLineNr; --mLineNr;
mBuffer.push(ch); mBuffer.push(ch);
mTokenValue.pop_back(); mTokenValue.pop_back();
} }
...@@ -157,25 +155,25 @@ void SacParser::restart() ...@@ -157,25 +155,25 @@ void SacParser::restart()
{ {
while (not mTokenValue.empty()) while (not mTokenValue.empty())
retract(); retract();
switch (mStart) switch (mStart)
{ {
case eStateStart: case eStateStart:
mState = mStart = eStateFloat; mState = mStart = eStateFloat;
break; break;
case eStateFloat: case eStateFloat:
mState = mStart = eStateInt; mState = mStart = eStateInt;
break; break;
case eStateInt: case eStateInt:
mState = mStart = eStateValue; mState = mStart = eStateValue;
break; break;
default: default:
error("Invalid state in SacParser"); error("Invalid state in SacParser");
} }
mBol = false; mBol = false;
} }
...@@ -183,26 +181,26 @@ void SacParser::match(SacParser::CIFToken t) ...@@ -183,26 +181,26 @@ void SacParser::match(SacParser::CIFToken t)
{ {
if (mLookahead != t) if (mLookahead != t)
error(std::string("Unexpected token, expected ") + kTokenName[t] + " but found " + kTokenName[mLookahead]); error(std::string("Unexpected token, expected ") + kTokenName[t] + " but found " + kTokenName[mLookahead]);
mLookahead = getNextToken(); mLookahead = getNextToken();
} }
SacParser::CIFToken SacParser::getNextToken() SacParser::CIFToken SacParser::getNextToken()
{ {
const auto kEOF = std::char_traits<char>::eof(); const auto kEOF = std::char_traits<char>::eof();
CIFToken result = eCIFTokenUnknown; CIFToken result = eCIFTokenUnknown;
int quoteChar = 0; int quoteChar = 0;
mState = mStart = eStateStart; mState = mStart = eStateStart;
mBol = false; mBol = false;
mTokenValue.clear(); mTokenValue.clear();
mTokenType = eCIFValueUnknown; mTokenType = eCIFValueUnknown;
while (result == eCIFTokenUnknown) while (result == eCIFTokenUnknown)
{ {
auto ch = getNextChar(); auto ch = getNextChar();
switch (mState) switch (mState)
{ {
case eStateStart: case eStateStart:
...@@ -233,7 +231,7 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -233,7 +231,7 @@ SacParser::CIFToken SacParser::getNextToken()
else else
restart(); restart();
break; break;
case eStateWhite: case eStateWhite:
if (ch == kEOF) if (ch == kEOF)
result = eCIFTokenEOF; result = eCIFTokenEOF;
...@@ -246,7 +244,7 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -246,7 +244,7 @@ SacParser::CIFToken SacParser::getNextToken()
else else
mBol = (ch == '\n'); mBol = (ch == '\n');
break; break;
case eStateComment: case eStateComment:
if (ch == '\n') if (ch == '\n')
{ {
...@@ -259,7 +257,7 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -259,7 +257,7 @@ SacParser::CIFToken SacParser::getNextToken()
else if (not isAnyPrint(ch)) else if (not isAnyPrint(ch))
error("invalid character in comment"); error("invalid character in comment");
break; break;
case eStateQuestionMark: case eStateQuestionMark:
if (isNonBlank(ch)) if (isNonBlank(ch))
mState = eStateValue; mState = eStateValue;
...@@ -291,10 +289,10 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -291,10 +289,10 @@ SacParser::CIFToken SacParser::getNextToken()
else if (ch == kEOF) else if (ch == kEOF)
error("unterminated textfield"); error("unterminated textfield");
else if (not isAnyPrint(ch)) else if (not isAnyPrint(ch))
// error("invalid character in text field '" + string({ static_cast<char>(ch) }) + "' (" + to_string((int)ch) + ")"); // error("invalid character in text field '" + string({ static_cast<char>(ch) }) + "' (" + to_string((int)ch) + ")");
std::cerr << "invalid character in text field '" << std::string({ static_cast<char>(ch) }) << "' (" << ch << ") line: " << mLineNr << std::endl; std::cerr << "invalid character in text field '" << std::string({static_cast<char>(ch)}) << "' (" << ch << ") line: " << mLineNr << std::endl;
break; break;
case eStateTextField + 1: case eStateTextField + 1:
if (isTextLead(ch) or ch == ' ' or ch == '\t') if (isTextLead(ch) or ch == ' ' or ch == '\t')
mState = eStateTextField; mState = eStateTextField;
...@@ -310,7 +308,7 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -310,7 +308,7 @@ SacParser::CIFToken SacParser::getNextToken()
else if (ch != '\n') else if (ch != '\n')
error("invalid character in text field"); error("invalid character in text field");
break; break;
case eStateQuotedString: case eStateQuotedString:
if (ch == kEOF) if (ch == kEOF)
error("unterminated quoted string"); error("unterminated quoted string");
...@@ -319,14 +317,14 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -319,14 +317,14 @@ SacParser::CIFToken SacParser::getNextToken()
else if (not isAnyPrint(ch)) else if (not isAnyPrint(ch))
error("invalid character in quoted string"); error("invalid character in quoted string");
break; break;
case eStateQuotedStringQuote: case eStateQuotedStringQuote:
if (isWhite(ch)) if (isWhite(ch))
{ {
retract(); retract();
result = eCIFTokenValue; result = eCIFTokenValue;
mTokenType = eCIFValueString; mTokenType = eCIFValueString;
assert(mTokenValue.length() >= 3); assert(mTokenValue.length() >= 3);
mTokenValue = mTokenValue.substr(1, mTokenValue.length() - 2); mTokenValue = mTokenValue.substr(1, mTokenValue.length() - 2);
} }
...@@ -339,7 +337,7 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -339,7 +337,7 @@ SacParser::CIFToken SacParser::getNextToken()
else else
error("invalid character in quoted string"); error("invalid character in quoted string");
break; break;
case eStateTag: case eStateTag:
if (not isNonBlank(ch)) if (not isNonBlank(ch))
{ {
...@@ -347,7 +345,7 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -347,7 +345,7 @@ SacParser::CIFToken SacParser::getNextToken()
result = eCIFTokenTag; result = eCIFTokenTag;
} }
break; break;
case eStateFloat: case eStateFloat:
if (ch == '+' or ch == '-') if (ch == '+' or ch == '-')
{ {
...@@ -358,11 +356,11 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -358,11 +356,11 @@ SacParser::CIFToken SacParser::getNextToken()
else else
restart(); restart();
break; break;
case eStateFloat + 1: case eStateFloat + 1:
// if (ch == '(') // numeric??? // if (ch == '(') // numeric???
// mState = eStateNumericSuffix; // mState = eStateNumericSuffix;
// else // else
if (ch == '.') if (ch == '.')
mState = eStateFloat + 2; mState = eStateFloat + 2;
else if (tolower(ch) == 'e') else if (tolower(ch) == 'e')
...@@ -376,12 +374,12 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -376,12 +374,12 @@ SacParser::CIFToken SacParser::getNextToken()
else else
restart(); restart();
break; break;
// parsed '.' // parsed '.'
case eStateFloat + 2: case eStateFloat + 2:
// if (ch == '(') // numeric??? // if (ch == '(') // numeric???
// mState = eStateNumericSuffix; // mState = eStateNumericSuffix;
// else // else
if (tolower(ch) == 'e') if (tolower(ch) == 'e')
mState = eStateFloat + 3; mState = eStateFloat + 3;
else if (isWhite(ch) or ch == kEOF) else if (isWhite(ch) or ch == kEOF)
...@@ -393,7 +391,7 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -393,7 +391,7 @@ SacParser::CIFToken SacParser::getNextToken()
else else
restart(); restart();
break; break;
// parsed 'e' // parsed 'e'
case eStateFloat + 3: case eStateFloat + 3:
if (ch == '-' or ch == '+') if (ch == '-' or ch == '+')
...@@ -410,11 +408,11 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -410,11 +408,11 @@ SacParser::CIFToken SacParser::getNextToken()
else else
restart(); restart();
break; break;
case eStateFloat + 5: case eStateFloat + 5:
// if (ch == '(') // if (ch == '(')
// mState = eStateNumericSuffix; // mState = eStateNumericSuffix;
// else // else
if (isWhite(ch) or ch == kEOF) if (isWhite(ch) or ch == kEOF)
{ {
retract(); retract();
...@@ -424,14 +422,14 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -424,14 +422,14 @@ SacParser::CIFToken SacParser::getNextToken()
else else
restart(); restart();
break; break;
case eStateInt: case eStateInt:
if (isdigit(ch) or ch == '+' or ch == '-') if (isdigit(ch) or ch == '+' or ch == '-')
mState = eStateInt + 1; mState = eStateInt + 1;
else else
restart(); restart();
break; break;
case eStateInt + 1: case eStateInt + 1:
if (isWhite(ch) or ch == kEOF) if (isWhite(ch) or ch == kEOF)
{ {
...@@ -442,36 +440,36 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -442,36 +440,36 @@ SacParser::CIFToken SacParser::getNextToken()
else else
restart(); restart();
break; break;
// case eStateNumericSuffix: // case eStateNumericSuffix:
// if (isdigit(ch)) // if (isdigit(ch))
// mState = eStateNumericSuffix + 1; // mState = eStateNumericSuffix + 1;
// else // else
// restart(); // restart();
// break; // break;
// //
// case eStateNumericSuffix + 1: // case eStateNumericSuffix + 1:
// if (ch == ')') // if (ch == ')')
// { // {
// result = eCIFTokenValue; // result = eCIFTokenValue;
// mTokenType = eCIFValueNumeric; // mTokenType = eCIFValueNumeric;
// } // }
// else if (not isdigit(ch)) // else if (not isdigit(ch))
// restart(); // restart();
// break; // break;
case eStateValue: case eStateValue:
if (isNonBlank(ch)) if (isNonBlank(ch))
mState = eStateValue + 1; mState = eStateValue + 1;
else else
error("invalid character at this position"); error("invalid character at this position");
break; break;
case eStateValue + 1: case eStateValue + 1:
if (ch == '_') // first _, check for keywords if (ch == '_') // first _, check for keywords
{ {
std::string s = toLowerCopy(mTokenValue); std::string s = toLowerCopy(mTokenValue);
if (s == "global_") if (s == "global_")
result = eCIFTokenGLOBAL; result = eCIFTokenGLOBAL;
else if (s == "stop_") else if (s == "stop_")
...@@ -493,16 +491,16 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -493,16 +491,16 @@ SacParser::CIFToken SacParser::getNextToken()
if (not isNonBlank(ch)) if (not isNonBlank(ch))
{ {
retract(); retract();
if (tolower(mTokenValue[0]) == 'd') if (tolower(mTokenValue[0]) == 'd')
result = eCIFTokenDATA; result = eCIFTokenDATA;
else else
result = eCIFTokenSAVE; result = eCIFTokenSAVE;
mTokenValue.erase(mTokenValue.begin(), mTokenValue.begin() + 5); mTokenValue.erase(mTokenValue.begin(), mTokenValue.begin() + 5);
} }
break; break;
default: default:
assert(false); assert(false);
error("Invalid state in getNextToken"); error("Invalid state in getNextToken");
...@@ -519,7 +517,7 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -519,7 +517,7 @@ SacParser::CIFToken SacParser::getNextToken()
std::cerr << " '" << mTokenValue << '\''; std::cerr << " '" << mTokenValue << '\'';
std::cerr << std::endl; std::cerr << std::endl;
} }
return result; return result;
} }
...@@ -530,8 +528,15 @@ DatablockIndex SacParser::indexDatablocks() ...@@ -530,8 +528,15 @@ DatablockIndex SacParser::indexDatablocks()
// first locate the start, as fast as we can // first locate the start, as fast as we can
auto &sb = *mData.rdbuf(); auto &sb = *mData.rdbuf();
enum { enum
start, comment, string, string_quote, qstring, data, data_name {
start,
comment,
string,
string_quote,
qstring,
data,
data_name
} state = start; } state = start;
int quote = 0; int quote = 0;
...@@ -547,7 +552,7 @@ DatablockIndex SacParser::indexDatablocks() ...@@ -547,7 +552,7 @@ DatablockIndex SacParser::indexDatablocks()
case start: case start:
switch (ch) switch (ch)
{ {
case '#': state = comment; break; case '#': state = comment; break;
case 'd': case 'd':
case 'D': case 'D':
state = data; state = data;
...@@ -564,7 +569,7 @@ DatablockIndex SacParser::indexDatablocks() ...@@ -564,7 +569,7 @@ DatablockIndex SacParser::indexDatablocks()
break; break;
} }
break; break;
case comment: case comment:
if (ch == '\n') if (ch == '\n')
state = start; state = start;
...@@ -574,29 +579,29 @@ DatablockIndex SacParser::indexDatablocks() ...@@ -574,29 +579,29 @@ DatablockIndex SacParser::indexDatablocks()
if (ch == quote) if (ch == quote)
state = string_quote; state = string_quote;
break; break;
case string_quote: case string_quote:
if (std::isspace(ch)) if (std::isspace(ch))
state = start; state = start;
else else
state = string; state = string;
break; break;
case qstring: case qstring:
if (ch == ';' and bol) if (ch == ';' and bol)
state = start; state = start;
break; break;
case data: case data:
if (dblk[si] == 0 and isNonBlank(ch)) if (dblk[si] == 0 and isNonBlank(ch))
{ {
datablock = { static_cast<char>(ch) }; datablock = {static_cast<char>(ch)};
state = data_name; state = data_name;
} }
else if (dblk[si++] != ch) else if (dblk[si++] != ch)
state = start; state = start;
break; break;
case data_name: case data_name:
if (isNonBlank(ch)) if (isNonBlank(ch))
datablock.insert(datablock.end(), char(ch)); datablock.insert(datablock.end(), char(ch));
...@@ -604,7 +609,7 @@ DatablockIndex SacParser::indexDatablocks() ...@@ -604,7 +609,7 @@ DatablockIndex SacParser::indexDatablocks()
{ {
if (not datablock.empty()) if (not datablock.empty())
index[datablock] = mData.tellg(); index[datablock] = mData.tellg();
state = start; state = start;
} }
else else
...@@ -618,13 +623,19 @@ DatablockIndex SacParser::indexDatablocks() ...@@ -618,13 +623,19 @@ DatablockIndex SacParser::indexDatablocks()
return index; return index;
} }
bool SacParser::parseSingleDatablock(const std::string& datablock) bool SacParser::parseSingleDatablock(const std::string &datablock)
{ {
// first locate the start, as fast as we can // first locate the start, as fast as we can
auto &sb = *mData.rdbuf(); auto &sb = *mData.rdbuf();
enum { enum
start, comment, string, string_quote, qstring, data {
start,
comment,
string,
string_quote,
qstring,
data
} state = start; } state = start;
int quote = 0; int quote = 0;
...@@ -640,7 +651,7 @@ bool SacParser::parseSingleDatablock(const std::string& datablock) ...@@ -640,7 +651,7 @@ bool SacParser::parseSingleDatablock(const std::string& datablock)
case start: case start:
switch (ch) switch (ch)
{ {
case '#': state = comment; break; case '#': state = comment; break;
case 'd': case 'd':
case 'D': case 'D':
state = data; state = data;
...@@ -657,7 +668,7 @@ bool SacParser::parseSingleDatablock(const std::string& datablock) ...@@ -657,7 +668,7 @@ bool SacParser::parseSingleDatablock(const std::string& datablock)
break; break;
} }
break; break;
case comment: case comment:
if (ch == '\n') if (ch == '\n')
state = start; state = start;
...@@ -667,19 +678,19 @@ bool SacParser::parseSingleDatablock(const std::string& datablock) ...@@ -667,19 +678,19 @@ bool SacParser::parseSingleDatablock(const std::string& datablock)
if (ch == quote) if (ch == quote)
state = string_quote; state = string_quote;
break; break;
case string_quote: case string_quote:
if (std::isspace(ch)) if (std::isspace(ch))
state = start; state = start;
else else
state = string; state = string;
break; break;
case qstring: case qstring:
if (ch == ';' and bol) if (ch == ';' and bol)
state = start; state = start;
break; break;
case data: case data:
if (isspace(ch) and dblk[si] == 0) if (isspace(ch) and dblk[si] == 0)
found = true; found = true;
...@@ -701,7 +712,7 @@ bool SacParser::parseSingleDatablock(const std::string& datablock) ...@@ -701,7 +712,7 @@ bool SacParser::parseSingleDatablock(const std::string& datablock)
return found; return found;
} }
bool SacParser::parseSingleDatablock(const std::string& datablock, const DatablockIndex &index) bool SacParser::parseSingleDatablock(const std::string &datablock, const DatablockIndex &index)
{ {
bool result = false; bool result = false;
...@@ -729,14 +740,14 @@ void SacParser::parseFile() ...@@ -729,14 +740,14 @@ void SacParser::parseFile()
case eCIFTokenGLOBAL: case eCIFTokenGLOBAL:
parseGlobal(); parseGlobal();
break; break;
case eCIFTokenDATA: case eCIFTokenDATA:
produceDatablock(mTokenValue); produceDatablock(mTokenValue);
match(eCIFTokenDATA); match(eCIFTokenDATA);
parseDataBlock(); parseDataBlock();
break; break;
default: default:
error("This file does not seem to be an mmCIF file"); error("This file does not seem to be an mmCIF file");
break; break;
...@@ -757,24 +768,24 @@ void SacParser::parseGlobal() ...@@ -757,24 +768,24 @@ void SacParser::parseGlobal()
void SacParser::parseDataBlock() void SacParser::parseDataBlock()
{ {
std::string cat; std::string cat;
while (mLookahead == eCIFTokenLOOP or mLookahead == eCIFTokenTag or mLookahead == eCIFTokenSAVE) while (mLookahead == eCIFTokenLOOP or mLookahead == eCIFTokenTag or mLookahead == eCIFTokenSAVE)
{ {
switch (mLookahead) switch (mLookahead)
{ {
case eCIFTokenLOOP: case eCIFTokenLOOP:
{ {
cat.clear(); // should start a new category cat.clear(); // should start a new category
match(eCIFTokenLOOP); match(eCIFTokenLOOP);
std::vector<std::string> tags; std::vector<std::string> tags;
while (mLookahead == eCIFTokenTag) while (mLookahead == eCIFTokenTag)
{ {
std::string catName, itemName; std::string catName, itemName;
std::tie(catName, itemName) = splitTagName(mTokenValue); std::tie(catName, itemName) = splitTagName(mTokenValue);
if (cat.empty()) if (cat.empty())
{ {
produceCategory(catName); produceCategory(catName);
...@@ -782,27 +793,27 @@ void SacParser::parseDataBlock() ...@@ -782,27 +793,27 @@ void SacParser::parseDataBlock()
} }
else if (not iequals(cat, catName)) else if (not iequals(cat, catName))
error("inconsistent categories in loop_"); error("inconsistent categories in loop_");
tags.push_back(itemName); tags.push_back(itemName);
match(eCIFTokenTag); match(eCIFTokenTag);
} }
while (mLookahead == eCIFTokenValue) while (mLookahead == eCIFTokenValue)
{ {
produceRow(); produceRow();
for (auto tag: tags) for (auto tag : tags)
{ {
produceItem(cat, tag, mTokenValue); produceItem(cat, tag, mTokenValue);
match(eCIFTokenValue); match(eCIFTokenValue);
} }
} }
cat.clear(); cat.clear();
break; break;
} }
case eCIFTokenTag: case eCIFTokenTag:
{ {
std::string catName, itemName; std::string catName, itemName;
...@@ -816,17 +827,17 @@ void SacParser::parseDataBlock() ...@@ -816,17 +827,17 @@ void SacParser::parseDataBlock()
} }
match(eCIFTokenTag); match(eCIFTokenTag);
produceItem(cat, itemName, mTokenValue); produceItem(cat, itemName, mTokenValue);
match(eCIFTokenValue); match(eCIFTokenValue);
break; break;
} }
case eCIFTokenSAVE: case eCIFTokenSAVE:
parseSaveFrame(); parseSaveFrame();
break; break;
default: default:
assert(false); assert(false);
break; break;
...@@ -841,18 +852,20 @@ void SacParser::parseSaveFrame() ...@@ -841,18 +852,20 @@ void SacParser::parseSaveFrame()
// -------------------------------------------------------------------- // --------------------------------------------------------------------
Parser::Parser(std::istream& is, File& f, bool init) Parser::Parser(std::istream &is, File &f, bool init)
: SacParser(is, init), mFile(f), mDataBlock(nullptr) : SacParser(is, init)
, mFile(f)
, mDataBlock(nullptr)
{ {
} }
void Parser::produceDatablock(const std::string& name) void Parser::produceDatablock(const std::string &name)
{ {
mDataBlock = new Datablock(name); mDataBlock = new Datablock(name);
mFile.append(mDataBlock); mFile.append(mDataBlock);
} }
void Parser::produceCategory(const std::string& name) void Parser::produceCategory(const std::string &name)
{ {
if (VERBOSE >= 4) if (VERBOSE >= 4)
std::cerr << "producing category " << name << std::endl; std::cerr << "producing category " << name << std::endl;
...@@ -870,7 +883,7 @@ void Parser::produceRow() ...@@ -870,7 +883,7 @@ void Parser::produceRow()
mRow.lineNr(mLineNr); mRow.lineNr(mLineNr);
} }
void Parser::produceItem(const std::string& category, const std::string& item, const std::string& value) void Parser::produceItem(const std::string &category, const std::string &item, const std::string &value)
{ {
if (VERBOSE >= 4) if (VERBOSE >= 4)
std::cerr << "producing _" << category << '.' << item << " -> " << value << std::endl; std::cerr << "producing _" << category << '.' << item << " -> " << value << std::endl;
...@@ -886,13 +899,15 @@ void Parser::produceItem(const std::string& category, const std::string& item, c ...@@ -886,13 +899,15 @@ void Parser::produceItem(const std::string& category, const std::string& item, c
struct DictParserDataImpl struct DictParserDataImpl
{ {
// temporary values for constructing dictionaries // temporary values for constructing dictionaries
std::vector<ValidateCategory> mCategoryValidators; std::vector<ValidateCategory> mCategoryValidators;
std::map<std::string,std::vector<ValidateItem>> mItemValidators; std::map<std::string, std::vector<ValidateItem>> mItemValidators;
std::set<std::tuple<std::string,std::string>> mLinkedItems; std::set<std::tuple<std::string, std::string>> mLinkedItems;
}; };
DictParser::DictParser(Validator& validator, std::istream& is) DictParser::DictParser(Validator &validator, std::istream &is)
: Parser(is, mFile), mValidator(validator), mImpl(new DictParserDataImpl) : Parser(is, mFile)
, mValidator(validator)
, mImpl(new DictParserDataImpl)
{ {
} }
...@@ -910,9 +925,9 @@ void DictParser::parseSaveFrame() ...@@ -910,9 +925,9 @@ void DictParser::parseSaveFrame()
if (saveFrameName.empty()) if (saveFrameName.empty())
error("Invalid save frame, should contain more than just 'save_' here"); error("Invalid save frame, should contain more than just 'save_' here");
bool isCategorySaveFrame = mTokenValue[0] != '_'; bool isCategorySaveFrame = mTokenValue[0] != '_';
Datablock dict(mTokenValue); Datablock dict(mTokenValue);
Datablock::iterator cat = dict.end(); Datablock::iterator cat = dict.end();
...@@ -921,37 +936,37 @@ void DictParser::parseSaveFrame() ...@@ -921,37 +936,37 @@ void DictParser::parseSaveFrame()
{ {
if (mLookahead == eCIFTokenLOOP) if (mLookahead == eCIFTokenLOOP)
{ {
cat = dict.end(); // should start a new category cat = dict.end(); // should start a new category
match(eCIFTokenLOOP); match(eCIFTokenLOOP);
std::vector<std::string> tags; std::vector<std::string> tags;
while (mLookahead == eCIFTokenTag) while (mLookahead == eCIFTokenTag)
{ {
std::string catName, itemName; std::string catName, itemName;
std::tie(catName, itemName) = splitTagName(mTokenValue); std::tie(catName, itemName) = splitTagName(mTokenValue);
if (cat == dict.end()) if (cat == dict.end())
std::tie(cat, std::ignore) = dict.emplace(catName); std::tie(cat, std::ignore) = dict.emplace(catName);
else if (not iequals(cat->name(), catName)) else if (not iequals(cat->name(), catName))
error("inconsistent categories in loop_"); error("inconsistent categories in loop_");
tags.push_back(itemName); tags.push_back(itemName);
match(eCIFTokenTag); match(eCIFTokenTag);
} }
while (mLookahead == eCIFTokenValue) while (mLookahead == eCIFTokenValue)
{ {
cat->emplace({}); cat->emplace({});
auto row = cat->back(); auto row = cat->back();
for (auto tag: tags) for (auto tag : tags)
{ {
row[tag] = mTokenValue; row[tag] = mTokenValue;
match(eCIFTokenValue); match(eCIFTokenValue);
} }
} }
cat = dict.end(); cat = dict.end();
} }
else else
...@@ -963,30 +978,30 @@ void DictParser::parseSaveFrame() ...@@ -963,30 +978,30 @@ void DictParser::parseSaveFrame()
std::tie(cat, std::ignore) = dict.emplace(catName); std::tie(cat, std::ignore) = dict.emplace(catName);
match(eCIFTokenTag); match(eCIFTokenTag);
if (cat->empty()) if (cat->empty())
cat->emplace({}); cat->emplace({});
cat->back()[itemName] = mTokenValue; cat->back()[itemName] = mTokenValue;
match(eCIFTokenValue); match(eCIFTokenValue);
} }
} }
match(eCIFTokenSAVE); match(eCIFTokenSAVE);
if (isCategorySaveFrame) if (isCategorySaveFrame)
{ {
std::string category; std::string category;
cif::tie(category) = dict["category"].front().get("id"); cif::tie(category) = dict["category"].front().get("id");
std::vector<std::string> keys; std::vector<std::string> keys;
for (auto k: dict["category_key"]) for (auto k : dict["category_key"])
keys.push_back(std::get<1>(splitTagName(k["name"].as<std::string>()))); keys.push_back(std::get<1>(splitTagName(k["name"].as<std::string>())));
iset groups; iset groups;
for (auto g: dict["category_group"]) for (auto g : dict["category_group"])
groups.insert(g["id"].as<std::string>()); groups.insert(g["id"].as<std::string>());
mImpl->mCategoryValidators.push_back(ValidateCategory{category, keys, groups}); mImpl->mCategoryValidators.push_back(ValidateCategory{category, keys, groups});
} }
else else
...@@ -995,46 +1010,46 @@ void DictParser::parseSaveFrame() ...@@ -995,46 +1010,46 @@ void DictParser::parseSaveFrame()
std::string typeCode; std::string typeCode;
cif::tie(typeCode) = dict["item_type"].front().get("code"); cif::tie(typeCode) = dict["item_type"].front().get("code");
const ValidateType* tv = nullptr; const ValidateType *tv = nullptr;
if (not (typeCode.empty() or typeCode == "?")) if (not(typeCode.empty() or typeCode == "?"))
tv = mValidator.getValidatorForType(typeCode); tv = mValidator.getValidatorForType(typeCode);
iset ess; iset ess;
for (auto e: dict["item_enumeration"]) for (auto e : dict["item_enumeration"])
ess.insert(e["value"].as<std::string>()); ess.insert(e["value"].as<std::string>());
std::string defaultValue; std::string defaultValue;
cif::tie(defaultValue) = dict["item_default"].front().get("value"); cif::tie(defaultValue) = dict["item_default"].front().get("value");
bool defaultIsNull = false; bool defaultIsNull = false;
if (defaultValue.empty()) if (defaultValue.empty())
{ {
for (auto& r: dict["_item_default"]) for (auto &r : dict["_item_default"])
{ {
defaultIsNull = r["value"].is_null(); defaultIsNull = r["value"].is_null();
break; break;
} }
} }
// collect the dict from our dataBlock and construct validators // collect the dict from our dataBlock and construct validators
for (auto i: dict["item"]) for (auto i : dict["item"])
{ {
std::string tagName, category, mandatory; std::string tagName, category, mandatory;
cif::tie(tagName, category, mandatory) = i.get("name", "category_id", "mandatory_code"); cif::tie(tagName, category, mandatory) = i.get("name", "category_id", "mandatory_code");
std::string catName, itemName; std::string catName, itemName;
std::tie(catName, itemName) = splitTagName(tagName); std::tie(catName, itemName) = splitTagName(tagName);
if (catName.empty() or itemName.empty()) if (catName.empty() or itemName.empty())
error("Invalid tag name in _item.name " + tagName); error("Invalid tag name in _item.name " + tagName);
if (not iequals(category, catName) and not (category.empty() or category == "?")) if (not iequals(category, catName) and not(category.empty() or category == "?"))
error("specified category id does match the implicit category name for tag '" + tagName + '\''); error("specified category id does match the implicit category name for tag '" + tagName + '\'');
else else
category = catName; category = catName;
auto& ivs = mImpl->mItemValidators[category]; auto &ivs = mImpl->mItemValidators[category];
auto vi = find(ivs.begin(), ivs.end(), ValidateItem{itemName}); auto vi = find(ivs.begin(), ivs.end(), ValidateItem{itemName});
if (vi == ivs.end()) if (vi == ivs.end())
ivs.push_back(ValidateItem{itemName, iequals(mandatory, "yes"), tv, ess, defaultValue, defaultIsNull}); ivs.push_back(ValidateItem{itemName, iequals(mandatory, "yes"), tv, ess, defaultValue, defaultIsNull});
...@@ -1046,7 +1061,7 @@ void DictParser::parseSaveFrame() ...@@ -1046,7 +1061,7 @@ void DictParser::parseSaveFrame()
if (VERBOSE > 2) if (VERBOSE > 2)
{ {
std::cerr << "inconsistent mandatory value for " << tagName << " in dictionary" << std::endl; std::cerr << "inconsistent mandatory value for " << tagName << " in dictionary" << std::endl;
if (iequals(tagName, saveFrameName)) if (iequals(tagName, saveFrameName))
std::cerr << "choosing " << mandatory << std::endl; std::cerr << "choosing " << mandatory << std::endl;
else else
...@@ -1063,7 +1078,7 @@ void DictParser::parseSaveFrame() ...@@ -1063,7 +1078,7 @@ void DictParser::parseSaveFrame()
std::cerr << "inconsistent type for " << tagName << " in dictionary" << std::endl; std::cerr << "inconsistent type for " << tagName << " in dictionary" << std::endl;
} }
// vi->mMandatory = (iequals(mandatory, "yes")); // vi->mMandatory = (iequals(mandatory, "yes"));
if (vi->mType == nullptr) if (vi->mType == nullptr)
vi->mType = tv; vi->mType = tv;
...@@ -1073,14 +1088,14 @@ void DictParser::parseSaveFrame() ...@@ -1073,14 +1088,14 @@ void DictParser::parseSaveFrame()
// ... // ...
} }
} }
// collect the dict from our dataBlock and construct validators // collect the dict from our dataBlock and construct validators
for (auto i: dict["item_linked"]) for (auto i : dict["item_linked"])
{ {
std::string childTagName, parentTagName; std::string childTagName, parentTagName;
cif::tie(childTagName, parentTagName) = i.get("child_name", "parent_name"); cif::tie(childTagName, parentTagName) = i.get("child_name", "parent_name");
mImpl->mLinkedItems.emplace(childTagName, parentTagName); mImpl->mLinkedItems.emplace(childTagName, parentTagName);
} }
} }
...@@ -1091,20 +1106,20 @@ void DictParser::linkItems() ...@@ -1091,20 +1106,20 @@ void DictParser::linkItems()
if (not mDataBlock) if (not mDataBlock)
error("no datablock"); error("no datablock");
auto& dict = *mDataBlock; auto &dict = *mDataBlock;
// links are identified by a parent category, a child category and a group ID // links are identified by a parent category, a child category and a group ID
using key_type = std::tuple<std::string,std::string,int>; using key_type = std::tuple<std::string, std::string, int>;
std::map<key_type,size_t> linkIndex; std::map<key_type, size_t> linkIndex;
// Each link group consists of a set of keys // Each link group consists of a set of keys
std::vector<std::tuple<std::vector<std::string>,std::vector<std::string>>> linkKeys; std::vector<std::tuple<std::vector<std::string>, std::vector<std::string>>> linkKeys;
auto addLink = [&](size_t ix, const std::string& pk, const std::string& ck) auto addLink = [&](size_t ix, const std::string &pk, const std::string &ck)
{ {
auto&& [pkeys, ckeys] = linkKeys.at(ix); auto &&[pkeys, ckeys] = linkKeys.at(ix);
bool found = false; bool found = false;
for (size_t i = 0; i < pkeys.size(); ++i) for (size_t i = 0; i < pkeys.size(); ++i)
...@@ -1123,29 +1138,29 @@ void DictParser::linkItems() ...@@ -1123,29 +1138,29 @@ void DictParser::linkItems()
} }
}; };
auto& linkedGroupList = dict["pdbx_item_linked_group_list"]; auto &linkedGroupList = dict["pdbx_item_linked_group_list"];
for (auto gl: linkedGroupList) for (auto gl : linkedGroupList)
{ {
std::string child, parent; std::string child, parent;
int link_group_id; int link_group_id;
cif::tie(child, parent, link_group_id) = gl.get("child_name", "parent_name", "link_group_id"); cif::tie(child, parent, link_group_id) = gl.get("child_name", "parent_name", "link_group_id");
auto civ = mValidator.getValidatorForItem(child); auto civ = mValidator.getValidatorForItem(child);
if (civ == nullptr) if (civ == nullptr)
error("in pdbx_item_linked_group_list, item '" + child + "' is not specified"); error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
auto piv = mValidator.getValidatorForItem(parent); auto piv = mValidator.getValidatorForItem(parent);
if (piv == nullptr) if (piv == nullptr)
error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified"); error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
key_type key{ piv->mCategory->mName, civ->mCategory->mName, link_group_id }; key_type key{piv->mCategory->mName, civ->mCategory->mName, link_group_id};
if (not linkIndex.count(key)) if (not linkIndex.count(key))
{ {
linkIndex[key] = linkKeys.size(); linkIndex[key] = linkKeys.size();
linkKeys.push_back({}); linkKeys.push_back({});
} }
size_t ix = linkIndex.at(key); size_t ix = linkIndex.at(key);
addLink(ix, piv->mTag, civ->mTag); addLink(ix, piv->mTag, civ->mTag);
} }
...@@ -1154,35 +1169,35 @@ void DictParser::linkItems() ...@@ -1154,35 +1169,35 @@ void DictParser::linkItems()
if (linkedGroupList.empty()) if (linkedGroupList.empty())
{ {
// for links recorded in categories but not in pdbx_item_linked_group_list // for links recorded in categories but not in pdbx_item_linked_group_list
for (auto li: mImpl->mLinkedItems) for (auto li : mImpl->mLinkedItems)
{ {
std::string child, parent; std::string child, parent;
std::tie(child, parent) = li; std::tie(child, parent) = li;
auto civ = mValidator.getValidatorForItem(child); auto civ = mValidator.getValidatorForItem(child);
if (civ == nullptr) if (civ == nullptr)
error("in pdbx_item_linked_group_list, item '" + child + "' is not specified"); error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
auto piv = mValidator.getValidatorForItem(parent); auto piv = mValidator.getValidatorForItem(parent);
if (piv == nullptr) if (piv == nullptr)
error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified"); error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
key_type key{ piv->mCategory->mName, civ->mCategory->mName, 0 }; key_type key{piv->mCategory->mName, civ->mCategory->mName, 0};
if (not linkIndex.count(key)) if (not linkIndex.count(key))
{ {
linkIndex[key] = linkKeys.size(); linkIndex[key] = linkKeys.size();
linkKeys.push_back({}); linkKeys.push_back({});
} }
size_t ix = linkIndex.at(key); size_t ix = linkIndex.at(key);
addLink(ix, piv->mTag, civ->mTag); addLink(ix, piv->mTag, civ->mTag);
} }
} }
auto& linkedGroup = dict["pdbx_item_linked_group"]; auto &linkedGroup = dict["pdbx_item_linked_group"];
// now store the links in the validator // now store the links in the validator
for (auto& kv: linkIndex) for (auto &kv : linkIndex)
{ {
ValidateLink link = {}; ValidateLink link = {};
std::tie(link.mParentCategory, link.mChildCategory, link.mLinkGroupID) = kv.first; std::tie(link.mParentCategory, link.mChildCategory, link.mLinkGroupID) = kv.first;
...@@ -1190,7 +1205,7 @@ void DictParser::linkItems() ...@@ -1190,7 +1205,7 @@ void DictParser::linkItems()
std::tie(link.mParentKeys, link.mChildKeys) = linkKeys[kv.second]; std::tie(link.mParentKeys, link.mChildKeys) = linkKeys[kv.second];
// look up the label // look up the label
for (auto r: linkedGroup.find(cif::Key("category_id") == link.mChildCategory and cif::Key("link_group_id") == link.mLinkGroupID)) for (auto r : linkedGroup.find(cif::Key("category_id") == link.mChildCategory and cif::Key("link_group_id") == link.mLinkGroupID))
{ {
link.mLinkGroupLabel = r["label"].as<std::string>(); link.mLinkGroupLabel = r["label"].as<std::string>();
break; break;
...@@ -1200,22 +1215,22 @@ void DictParser::linkItems() ...@@ -1200,22 +1215,22 @@ void DictParser::linkItems()
} }
// now make sure the itemType is specified for all itemValidators // now make sure the itemType is specified for all itemValidators
for (auto& cv: mValidator.mCategoryValidators) for (auto &cv : mValidator.mCategoryValidators)
{ {
for (auto& iv: cv.mItemValidators) for (auto &iv : cv.mItemValidators)
{ {
if (iv.mType == nullptr) if (iv.mType == nullptr)
std::cerr << "Missing item_type for " << iv.mTag << std::endl; std::cerr << "Missing item_type for " << iv.mTag << std::endl;
} }
} }
} }
void DictParser::loadDictionary() void DictParser::loadDictionary()
{ {
std::unique_ptr<Datablock> dict; std::unique_ptr<Datablock> dict;
Datablock* savedDatablock = mDataBlock; Datablock *savedDatablock = mDataBlock;
try try
{ {
while (mLookahead != eCIFTokenEOF) while (mLookahead != eCIFTokenEOF)
...@@ -1225,12 +1240,12 @@ void DictParser::loadDictionary() ...@@ -1225,12 +1240,12 @@ void DictParser::loadDictionary()
case eCIFTokenGLOBAL: case eCIFTokenGLOBAL:
parseGlobal(); parseGlobal();
break; break;
default: default:
{ {
dict.reset(new Datablock(mTokenValue)); // dummy datablock, for constructing the validator only dict.reset(new Datablock(mTokenValue)); // dummy datablock, for constructing the validator only
mDataBlock = dict.get(); mDataBlock = dict.get();
match(eCIFTokenDATA); match(eCIFTokenDATA);
parseDataBlock(); parseDataBlock();
break; break;
...@@ -1238,29 +1253,29 @@ void DictParser::loadDictionary() ...@@ -1238,29 +1253,29 @@ void DictParser::loadDictionary()
} }
} }
} }
catch (const std::exception&) catch (const std::exception &)
{ {
std::cerr << "Error parsing dictionary" << std::endl; std::cerr << "Error parsing dictionary" << std::endl;
throw; throw;
} }
// store all validators // store all validators
for (auto& ic: mImpl->mCategoryValidators) for (auto &ic : mImpl->mCategoryValidators)
mValidator.addCategoryValidator(std::move(ic)); mValidator.addCategoryValidator(std::move(ic));
mImpl->mCategoryValidators.clear(); mImpl->mCategoryValidators.clear();
for (auto& iv: mImpl->mItemValidators) for (auto &iv : mImpl->mItemValidators)
{ {
auto cv = mValidator.getValidatorForCategory(iv.first); auto cv = mValidator.getValidatorForCategory(iv.first);
if (cv == nullptr) if (cv == nullptr)
error("Undefined category '" + iv.first); error("Undefined category '" + iv.first);
for (auto& v: iv.second) for (auto &v : iv.second)
const_cast<ValidateCategory*>(cv)->addItemValidator(std::move(v)); const_cast<ValidateCategory *>(cv)->addItemValidator(std::move(v));
} }
// check all item validators for having a typeValidator // check all item validators for having a typeValidator
if (dict) if (dict)
linkItems(); linkItems();
...@@ -1283,47 +1298,45 @@ void DictParser::loadDictionary() ...@@ -1283,47 +1298,45 @@ void DictParser::loadDictionary()
bool DictParser::collectItemTypes() bool DictParser::collectItemTypes()
{ {
bool result = false; bool result = false;
if (not mDataBlock) if (not mDataBlock)
error("no datablock"); error("no datablock");
auto& dict = *mDataBlock; auto &dict = *mDataBlock;
for (auto& t: dict["item_type_list"]) for (auto &t : dict["item_type_list"])
{ {
std::string code, primitiveCode, construct; std::string code, primitiveCode, construct;
cif::tie(code, primitiveCode, construct) = t.get("code", "primitive_code", "construct"); cif::tie(code, primitiveCode, construct) = t.get("code", "primitive_code", "construct");
ba::replace_all(construct, "\\n", "\n"); ba::replace_all(construct, "\\n", "\n");
ba::replace_all(construct, "\\t", "\t"); ba::replace_all(construct, "\\t", "\t");
ba::replace_all(construct, "\\\n", ""); ba::replace_all(construct, "\\\n", "");
try try
{ {
ValidateType v = { ValidateType v = {
code, mapToPrimitiveType(primitiveCode), boost::regex(construct, boost::regex::extended | boost::regex::optimize) code, mapToPrimitiveType(primitiveCode), boost::regex(construct, boost::regex::extended | boost::regex::optimize)};
};
mValidator.addTypeValidator(std::move(v)); mValidator.addTypeValidator(std::move(v));
} }
catch (const std::exception&) catch (const std::exception &)
{ {
throw_with_nested(CifParserError(t.lineNr(), "error in regular expression")); throw_with_nested(CifParserError(t.lineNr(), "error in regular expression"));
} }
// Do not replace an already defined type validator, this won't work with pdbx_v40 // Do not replace an already defined type validator, this won't work with pdbx_v40
// as it has a name that is too strict for its own names :-) // as it has a name that is too strict for its own names :-)
// if (mFileImpl.mTypeValidators.count(v)) // if (mFileImpl.mTypeValidators.count(v))
// mFileImpl.mTypeValidators.erase(v); // mFileImpl.mTypeValidators.erase(v);
if (VERBOSE >= 5) if (VERBOSE >= 5)
std::cerr << "Added type " << code << " (" << primitiveCode << ") => " << construct << std::endl; std::cerr << "Added type " << code << " (" << primitiveCode << ") => " << construct << std::endl;
result = true; result = true;
} }
return result; return result;
} }
} // namespace cif
}
...@@ -126,8 +126,9 @@ const uint8_t kCharToLowerMap[256] = ...@@ -126,8 +126,9 @@ const uint8_t kCharToLowerMap[256] =
bool iequals(std::string_view a, std::string_view b) bool iequals(std::string_view a, std::string_view b)
{ {
bool result = a.length() == b.length(); bool result = a.length() == b.length();
for (auto ai = a.begin(), bi = b.begin(); result and ai != a.end() and bi != b.end(); ++ai, ++bi) for (auto ai = a.begin(), bi = b.begin(); result and ai != a.end(); ++ai, ++bi)
result = tolower(*ai) == tolower(*bi); result = kCharToLowerMap[uint8_t(*ai)] == kCharToLowerMap[uint8_t(*bi)];
// result = tolower(*ai) == tolower(*bi);
return result; return result;
} }
......
...@@ -24,13 +24,20 @@ ...@@ -24,13 +24,20 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <fstream>
#include <filesystem>
#include <boost/algorithm/string.hpp> #include <boost/algorithm/string.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include "cif++/Cif++.hpp" #include "cif++/Cif++.hpp"
#include "cif++/CifParser.hpp" #include "cif++/CifParser.hpp"
#include "cif++/CifValidator.hpp" #include "cif++/CifValidator.hpp"
namespace ba = boost::algorithm; namespace ba = boost::algorithm;
namespace fs = std::filesystem;
namespace io = boost::iostreams;
extern int VERBOSE; extern int VERBOSE;
...@@ -219,8 +226,11 @@ const ValidateItem *ValidateCategory::getValidatorForItem(std::string_view tag) ...@@ -219,8 +226,11 @@ const ValidateItem *ValidateCategory::getValidatorForItem(std::string_view tag)
// -------------------------------------------------------------------- // --------------------------------------------------------------------
Validator::Validator() Validator::Validator(std::string_view name, std::istream &is)
: mName(name)
{ {
DictParser p(*this, is);
p.loadDictionary();
} }
Validator::~Validator() Validator::~Validator()
...@@ -340,7 +350,7 @@ std::vector<const ValidateLink *> Validator::getLinksForChild(std::string_view c ...@@ -340,7 +350,7 @@ std::vector<const ValidateLink *> Validator::getLinksForChild(std::string_view c
return result; return result;
} }
void Validator::reportError(const std::string &msg, bool fatal) void Validator::reportError(const std::string &msg, bool fatal) const
{ {
if (mStrict or fatal) if (mStrict or fatal)
throw ValidationError(msg); throw ValidationError(msg);
...@@ -348,4 +358,78 @@ void Validator::reportError(const std::string &msg, bool fatal) ...@@ -348,4 +358,78 @@ void Validator::reportError(const std::string &msg, bool fatal)
std::cerr << msg << std::endl; std::cerr << msg << std::endl;
} }
// --------------------------------------------------------------------
ValidatorFactory ValidatorFactory::sInstance;
ValidatorFactory::ValidatorFactory()
{
}
const Validator &ValidatorFactory::operator[](std::string_view dictionary)
{
std::lock_guard lock(mMutex);
for (auto &validator : mValidators)
{
if (iequals(validator.mName, dictionary))
return validator;
}
// not found, add it
fs::path dict_name(dictionary);
auto data = loadResource(dictionary);
if (not data and dict_name.extension().string() != ".dic")
data = loadResource(dict_name.parent_path() / (dict_name.filename().string() + ".dic"));
if (data)
mValidators.emplace_back(dictionary, *data);
else
{
// might be a compressed dictionary on disk
fs::path p = dictionary;
if (p.extension() == ".dic")
p = p.parent_path() / (p.filename().string() + ".gz");
else
p = p.parent_path() / (p.filename().string() + ".dic.gz");
#if defined(CACHE_DIR) and defined(DATA_DIR)
if (not fs::exists(p))
{
for (const char *dir : {CACHE_DIR, DATA_DIR})
{
auto p2 = fs::path(dir) / p;
if (fs::exists(p2))
{
swap(p, p2);
break;
}
}
}
#endif
if (fs::exists(p))
{
std::ifstream file(p, std::ios::binary);
if (not file.is_open())
throw std::runtime_error("Could not open dictionary (" + p.string() + ")");
io::filtering_stream<io::input> in;
in.push(io::gzip_decompressor());
in.push(file);
mValidators.emplace_back(dictionary, in);
}
else
throw std::runtime_error("Dictionary not found or defined (" + dict_name.string() + ")");
}
assert(iequals(mValidators.back().mName, dictionary));
return mValidators.back();
}
} // namespace cif } // namespace cif
...@@ -18,7 +18,6 @@ int main(int argc, char* argv[]) ...@@ -18,7 +18,6 @@ int main(int argc, char* argv[])
desc.add_options() desc.add_options()
("input,i", po::value<std::string>(), "Input file") ("input,i", po::value<std::string>(), "Input file")
("help,h", "Display help message") ("help,h", "Display help message")
("version", "Print version")
("verbose,v", "Verbose output") ("verbose,v", "Verbose output")
("debug,d", po::value<int>(), "Debug level (for even more verbose output)"); ("debug,d", po::value<int>(), "Debug level (for even more verbose output)");
...@@ -29,12 +28,6 @@ int main(int argc, char* argv[]) ...@@ -29,12 +28,6 @@ int main(int argc, char* argv[])
po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm); po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
po::notify(vm); po::notify(vm);
if (vm.count("version"))
{
std::cout << argv[0] << " version " PACKAGE_VERSION << std::endl;
exit(0);
}
if (vm.count("help") or vm.count("input") == 0) if (vm.count("help") or vm.count("input") == 0)
{ {
std::cerr << desc << std::endl; std::cerr << desc << std::endl;
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
// #include "cif++/DistanceMap.hpp" // #include "cif++/DistanceMap.hpp"
#include "cif++/Cif++.hpp" #include "cif++/Cif++.hpp"
#include "cif++/BondMap.hpp" #include "cif++/BondMap.hpp"
#include "cif++/CifValidator.hpp"
std::filesystem::path gTestDir = std::filesystem::current_path(); // filled in first test std::filesystem::path gTestDir = std::filesystem::current_path(); // filled in first test
...@@ -259,8 +260,10 @@ save__cat_2.desc ...@@ -259,8 +260,10 @@ save__cat_2.desc
std::istream is_dict(&buffer); std::istream is_dict(&buffer);
cif::Validator validator("test", is_dict);
cif::File f; cif::File f;
f.loadDictionary(is_dict); f.setValidator(&validator);
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -387,8 +390,10 @@ save__cat_1.c ...@@ -387,8 +390,10 @@ save__cat_1.c
std::istream is_dict(&buffer); std::istream is_dict(&buffer);
cif::Validator validator("test", is_dict);
cif::File f; cif::File f;
f.loadDictionary(is_dict); f.setValidator(&validator);
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -535,8 +540,10 @@ save__cat_2.desc ...@@ -535,8 +540,10 @@ save__cat_2.desc
std::istream is_dict(&buffer); std::istream is_dict(&buffer);
cif::Validator validator("test", is_dict);
cif::File f; cif::File f;
f.loadDictionary(is_dict); f.setValidator(&validator);
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -741,8 +748,10 @@ save__cat_2.parent_id3 ...@@ -741,8 +748,10 @@ save__cat_2.parent_id3
std::istream is_dict(&buffer); std::istream is_dict(&buffer);
cif::Validator validator("test", is_dict);
cif::File f; cif::File f;
f.loadDictionary(is_dict); f.setValidator(&validator);
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -963,8 +972,10 @@ cat_2 3 cat_2:cat_1:3 ...@@ -963,8 +972,10 @@ cat_2 3 cat_2:cat_1:3
std::istream is_dict(&buffer); std::istream is_dict(&buffer);
cif::Validator validator("test", is_dict);
cif::File f; cif::File f;
f.loadDictionary(is_dict); f.setValidator(&validator);
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -1389,9 +1400,10 @@ cat_2 1 '_cat_2.num' '_cat_3.num' cat_3 ...@@ -1389,9 +1400,10 @@ cat_2 1 '_cat_2.num' '_cat_3.num' cat_3
} buffer(const_cast<char*>(dict), sizeof(dict) - 1); } buffer(const_cast<char*>(dict), sizeof(dict) - 1);
std::istream is_dict(&buffer); std::istream is_dict(&buffer);
cif::Validator validator("test", is_dict);
cif::File f; cif::File f;
f.loadDictionary(is_dict); f.setValidator(&validator);
// -------------------------------------------------------------------- // --------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment