Commit 24078771 by Maarten L. Hekkelman

Reordering all files

parent 5fde0507
...@@ -193,50 +193,63 @@ endif() ...@@ -193,50 +193,63 @@ endif()
# Sources # Sources
set(project_sources set(project_sources
${PROJECT_SOURCE_DIR}/src/AtomType.cpp ${PROJECT_SOURCE_DIR}/src/cif/category.cpp
${PROJECT_SOURCE_DIR}/src/BondMap.cpp ${PROJECT_SOURCE_DIR}/src/cif/condition.cpp
${PROJECT_SOURCE_DIR}/src/Cif++.cpp ${PROJECT_SOURCE_DIR}/src/cif/datablock.cpp
${PROJECT_SOURCE_DIR}/src/Cif2PDB.cpp ${PROJECT_SOURCE_DIR}/src/cif/dictionary_parser.cpp
${PROJECT_SOURCE_DIR}/src/CifParser.cpp ${PROJECT_SOURCE_DIR}/src/cif/item.cpp
${PROJECT_SOURCE_DIR}/src/CifUtils.cpp ${PROJECT_SOURCE_DIR}/src/cif/parser.cpp
${PROJECT_SOURCE_DIR}/src/CifValidator.cpp ${PROJECT_SOURCE_DIR}/src/cif/row.cpp
${PROJECT_SOURCE_DIR}/src/Compound.cpp ${PROJECT_SOURCE_DIR}/src/cif/validate.cpp
${PROJECT_SOURCE_DIR}/src/PDB2Cif.cpp
${PROJECT_SOURCE_DIR}/src/PDB2CifRemark3.cpp # ${PROJECT_SOURCE_DIR}/src/pdb/Cif2PDB.cpp
${PROJECT_SOURCE_DIR}/src/Point.cpp # ${PROJECT_SOURCE_DIR}/src/pdb/PDB2Cif.cpp
${PROJECT_SOURCE_DIR}/src/Secondary.cpp # ${PROJECT_SOURCE_DIR}/src/pdb/PDB2CifRemark3.cpp
${PROJECT_SOURCE_DIR}/src/Structure.cpp
${PROJECT_SOURCE_DIR}/src/Symmetry.cpp # ${PROJECT_SOURCE_DIR}/src/structure/AtomType.cpp
${PROJECT_SOURCE_DIR}/src/TlsParser.cpp # ${PROJECT_SOURCE_DIR}/src/structure/BondMap.cpp
# ${PROJECT_SOURCE_DIR}/src/structure/Compound.cpp
${PROJECT_SOURCE_DIR}/src/v2/category.cpp # ${PROJECT_SOURCE_DIR}/src/structure/Secondary.cpp
${PROJECT_SOURCE_DIR}/src/v2/condition.cpp # ${PROJECT_SOURCE_DIR}/src/structure/Structure.cpp
${PROJECT_SOURCE_DIR}/src/v2/dictionary_parser.cpp # ${PROJECT_SOURCE_DIR}/src/structure/Symmetry.cpp
${PROJECT_SOURCE_DIR}/src/v2/item.cpp # ${PROJECT_SOURCE_DIR}/src/structure/TlsParser.cpp
${PROJECT_SOURCE_DIR}/src/v2/parser.cpp
${PROJECT_SOURCE_DIR}/src/v2/row.cpp ${PROJECT_SOURCE_DIR}/src/utilities.cpp
${PROJECT_SOURCE_DIR}/src/v2/validate.cpp ${PROJECT_SOURCE_DIR}/src/point.cpp
) )
set(project_headers set(project_headers
${PROJECT_SOURCE_DIR}/include/cif++/AtomType.hpp
${PROJECT_SOURCE_DIR}/include/cif++/BondMap.hpp ${PROJECT_SOURCE_DIR}/include/cif++/cif.hpp
${PROJECT_SOURCE_DIR}/include/cif++/utilities.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/item.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/datablock.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/file.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/writer.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/validate.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/list.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/iterator.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/parser.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/forward_decl.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/dictionary_parser.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/condition.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/category.hpp
${PROJECT_SOURCE_DIR}/include/cif++/cif/row.hpp
${PROJECT_SOURCE_DIR}/include/cif++/structure/AtomType.hpp
${PROJECT_SOURCE_DIR}/include/cif++/structure/BondMap.hpp
${PROJECT_SOURCE_DIR}/include/cif++/structure/TlsParser.hpp
${PROJECT_SOURCE_DIR}/include/cif++/structure/Symmetry.hpp
${PROJECT_SOURCE_DIR}/include/cif++/structure/Structure.hpp
${PROJECT_SOURCE_DIR}/include/cif++/structure/Secondary.hpp
${PROJECT_SOURCE_DIR}/include/cif++/structure/Compound.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/PDB2Cif.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/PDB2CifRemark3.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/Cif2PDB.hpp
${PROJECT_SOURCE_DIR}/include/cif++/Cif++.hpp ${PROJECT_SOURCE_DIR}/include/cif++/Cif++.hpp
${PROJECT_SOURCE_DIR}/include/cif++/Cif2PDB.hpp ${PROJECT_SOURCE_DIR}/include/cif++/point.hpp
${PROJECT_SOURCE_DIR}/include/cif++/CifParser.hpp
${PROJECT_SOURCE_DIR}/include/cif++/CifUtils.hpp
${PROJECT_SOURCE_DIR}/include/cif++/CifValidator.hpp
${PROJECT_SOURCE_DIR}/include/cif++/Compound.hpp
${PROJECT_SOURCE_DIR}/include/cif++/PDB2Cif.hpp
${PROJECT_SOURCE_DIR}/include/cif++/PDB2CifRemark3.hpp
${PROJECT_SOURCE_DIR}/include/cif++/Point.hpp
${PROJECT_SOURCE_DIR}/include/cif++/Secondary.hpp
${PROJECT_SOURCE_DIR}/include/cif++/Structure.hpp
${PROJECT_SOURCE_DIR}/include/cif++/Symmetry.hpp
${PROJECT_SOURCE_DIR}/include/cif++/TlsParser.hpp
) )
add_library(cifpp ${project_sources} ${project_headers} ${CMAKE_SOURCE_DIR}/src/SymOpTable_data.hpp) add_library(cifpp ${project_sources} ${project_headers} ${CMAKE_SOURCE_DIR}/src/structure/SymOpTable_data.hpp)
set_target_properties(cifpp PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(cifpp PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(cifpp target_include_directories(cifpp
...@@ -391,10 +404,10 @@ if(CIFPP_BUILD_TESTS) ...@@ -391,10 +404,10 @@ if(CIFPP_BUILD_TESTS)
list(APPEND CIFPP_tests list(APPEND CIFPP_tests
# pdb2cif # pdb2cif
rename-compound # rename-compound
structure # structure
sugar # sugar
unit # unit
unit-v2) unit-v2)
foreach(CIFPP_TEST IN LISTS CIFPP_tests) foreach(CIFPP_TEST IN LISTS CIFPP_tests)
......
This source diff could not be displayed because it is too large. You can view the blob instead.
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cif++/Cif++.hpp>
#include <map>
#include <stack>
namespace cif
{
// --------------------------------------------------------------------
class CifParserError : public std::runtime_error
{
public:
CifParserError(uint32_t lineNr, const std::string &message);
};
// --------------------------------------------------------------------
extern const uint32_t kMaxLineLength;
extern const uint8_t kCharTraitsTable[128];
enum CharTraitsMask : uint8_t
{
kOrdinaryMask = 1 << 0,
kNonBlankMask = 1 << 1,
kTextLeadMask = 1 << 2,
kAnyPrintMask = 1 << 3
};
inline bool isWhite(int ch)
{
return std::isspace(ch) or ch == '#';
}
inline bool isOrdinary(int ch)
{
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kOrdinaryMask) != 0;
}
inline bool isNonBlank(int ch)
{
return ch > 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kNonBlankMask) != 0;
}
inline bool isTextLead(int ch)
{
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kTextLeadMask) != 0;
}
inline bool isAnyPrint(int ch)
{
return ch == '\t' or
(ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
}
bool isUnquotedString(const char *s);
// --------------------------------------------------------------------
using DatablockIndex = std::map<std::string, std::size_t>;
// --------------------------------------------------------------------
// sac Parser, analogous to SAX Parser (simple api for xml)
class SacParser
{
public:
SacParser(std::istream &is, bool init = true);
virtual ~SacParser() {}
enum CIFToken
{
eCIFTokenUnknown,
eCIFTokenEOF,
eCIFTokenDATA,
eCIFTokenLOOP,
eCIFTokenGLOBAL,
eCIFTokenSAVE,
eCIFTokenSTOP,
eCIFTokenTag,
eCIFTokenValue,
};
static const char *kTokenName[];
enum CIFValueType
{
eCIFValueInt,
eCIFValueFloat,
eCIFValueNumeric,
eCIFValueString,
eCIFValueTextField,
eCIFValueInapplicable,
eCIFValueUnknown
};
static const char *kValueName[];
int getNextChar();
void retract();
int restart(int start);
CIFToken getNextToken();
void match(CIFToken token);
bool parseSingleDatablock(const std::string &datablock);
DatablockIndex indexDatablocks();
bool parseSingleDatablock(const std::string &datablock, const DatablockIndex &index);
void parseFile();
void parseGlobal();
void parseDataBlock();
virtual void parseSaveFrame();
void parseDictionary();
void error(const std::string &msg);
// production methods, these are pure virtual here
virtual void produceDatablock(const std::string &name) = 0;
virtual void produceCategory(const std::string &name) = 0;
virtual void produceRow() = 0;
virtual void produceItem(const std::string &category, const std::string &item, const std::string &value) = 0;
protected:
enum State
{
eStateStart,
eStateWhite,
eStateComment,
eStateQuestionMark,
eStateDot,
eStateQuotedString,
eStateQuotedStringQuote,
eStateUnquotedString,
eStateTag,
eStateTextField,
eStateFloat = 100,
eStateInt = 110,
eStateValue = 300,
eStateDATA,
eStateSAVE
};
std::istream &mData;
// Parser state
bool mValidate;
uint32_t mLineNr;
bool mBol;
CIFToken mLookahead;
std::string mTokenValue;
CIFValueType mTokenType;
std::stack<int> mBuffer;
};
// --------------------------------------------------------------------
class Parser : public SacParser
{
public:
Parser(std::istream &is, File &f, bool init = true);
virtual void produceDatablock(const std::string &name);
virtual void produceCategory(const std::string &name);
virtual void produceRow();
virtual void produceItem(const std::string &category, const std::string &item, const std::string &value);
protected:
File &mFile;
Datablock *mDataBlock;
Datablock::iterator mCat;
Row mRow;
};
// --------------------------------------------------------------------
class DictParser : public Parser
{
public:
DictParser(Validator &validator, std::istream &is);
~DictParser();
void loadDictionary();
private:
virtual void parseSaveFrame();
bool collectItemTypes();
void linkItems();
Validator &mValidator;
File mFile;
struct DictParserDataImpl *mImpl;
bool mCollectedItemTypes = false;
};
} // namespace cif
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cif++/Cif++.hpp>
// duh.. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86164
// #include <regex>
#include <boost/regex.hpp>
#include <set>
namespace cif
{
struct ValidateCategory;
class ValidatorFactory;
// --------------------------------------------------------------------
class ValidationError : public std::exception
{
public:
ValidationError(const std::string &msg);
ValidationError(const std::string &cat, const std::string &item,
const std::string &msg);
const char *what() const noexcept { return mMsg.c_str(); }
std::string mMsg;
};
// --------------------------------------------------------------------
enum class DDL_PrimitiveType
{
Char,
UChar,
Numb
};
DDL_PrimitiveType mapToPrimitiveType(std::string_view s);
struct ValidateType
{
std::string mName;
DDL_PrimitiveType mPrimitiveType;
// std::regex mRx;
boost::regex mRx;
bool operator<(const ValidateType &rhs) const
{
return icompare(mName, rhs.mName) < 0;
}
// compare values based on type
// int compare(const std::string& a, const std::string& b) const
// {
// return compare(a.c_str(), b.c_str());
// }
int compare(const char *a, const char *b) const;
};
struct ValidateItem
{
std::string mTag;
bool mMandatory;
const ValidateType *mType;
cif::iset mEnums;
std::string mDefault;
bool mDefaultIsNull;
ValidateCategory *mCategory = nullptr;
// ItemLinked is used for non-key links
struct ItemLinked
{
ValidateItem *mParent;
std::string mParentItem;
std::string mChildItem;
};
std::vector<ItemLinked> mLinked;
bool operator<(const ValidateItem &rhs) const
{
return icompare(mTag, rhs.mTag) < 0;
}
bool operator==(const ValidateItem &rhs) const
{
return iequals(mTag, rhs.mTag);
}
void operator()(std::string value) const;
};
struct ValidateCategory
{
std::string mName;
std::vector<std::string> mKeys;
cif::iset mGroups;
cif::iset mMandatoryFields;
std::set<ValidateItem> mItemValidators;
bool operator<(const ValidateCategory &rhs) const
{
return icompare(mName, rhs.mName) < 0;
}
void addItemValidator(ValidateItem &&v);
const ValidateItem *getValidatorForItem(std::string_view tag) const;
const std::set<ValidateItem> &itemValidators() const
{
return mItemValidators;
}
};
struct ValidateLink
{
int mLinkGroupID;
std::string mParentCategory;
std::vector<std::string> mParentKeys;
std::string mChildCategory;
std::vector<std::string> mChildKeys;
std::string mLinkGroupLabel;
};
// --------------------------------------------------------------------
class Validator
{
public:
Validator(std::string_view name, std::istream &is);
~Validator();
Validator(const Validator &rhs) = delete;
Validator &operator=(const Validator &rhs) = delete;
Validator(Validator &&rhs);
Validator &operator=(Validator &&rhs);
friend class DictParser;
friend class ValidatorFactory;
void addTypeValidator(ValidateType &&v);
const ValidateType *getValidatorForType(std::string_view typeCode) const;
void addCategoryValidator(ValidateCategory &&v);
const ValidateCategory *getValidatorForCategory(std::string_view category) const;
void addLinkValidator(ValidateLink &&v);
std::vector<const ValidateLink *> getLinksForParent(std::string_view category) const;
std::vector<const ValidateLink *> getLinksForChild(std::string_view category) const;
void reportError(const std::string &msg, bool fatal) const;
std::string dictName() const { return mName; }
void dictName(const std::string &name) { mName = name; }
std::string dictVersion() const { return mVersion; }
void dictVersion(const std::string &version) { mVersion = version; }
private:
// name is fully qualified here:
ValidateItem *getValidatorForItem(std::string_view name) const;
std::string mName;
std::string mVersion;
bool mStrict = false;
// std::set<uint32_t> mSubCategories;
std::set<ValidateType> mTypeValidators;
std::set<ValidateCategory> mCategoryValidators;
std::vector<ValidateLink> mLinkValidators;
};
// --------------------------------------------------------------------
class ValidatorFactory
{
public:
static ValidatorFactory &instance()
{
return sInstance;
}
const Validator &operator[](std::string_view dictionary);
private:
static ValidatorFactory sInstance;
ValidatorFactory();
std::mutex mMutex;
std::list<Validator> mValidators;
};
} // namespace cif
...@@ -26,18 +26,6 @@ ...@@ -26,18 +26,6 @@
#pragma once #pragma once
#include <filesystem> #include <cif++/utilities.hpp>
#include <forward_list> #include <cif++/cif/file.hpp>
#include <list> #include <cif++/cif/parser.hpp>
#include <map>
#include <scoped_allocator>
#include <string>
#include <cif++/CifUtils.hpp>
#include <cif++/v2/file.hpp>
namespace cif::v2
{
} // namespace cif::v2
...@@ -26,19 +26,18 @@ ...@@ -26,19 +26,18 @@
#pragma once #pragma once
#include <cif++/v2/forward_decl.hpp> #include <cif++/cif/forward_decl.hpp>
#include <cif++/v2/condition.hpp> #include <cif++/cif/condition.hpp>
#include <cif++/v2/iterator.hpp> #include <cif++/cif/iterator.hpp>
#include <cif++/v2/row.hpp> #include <cif++/cif/row.hpp>
#include <cif++/v2/validate.hpp> #include <cif++/cif/validate.hpp>
// TODO: implement all of: // TODO: implement all of:
// https://en.cppreference.com/w/cpp/named_req/Container // https://en.cppreference.com/w/cpp/named_req/Container
// https://en.cppreference.com/w/cpp/named_req/SequenceContainer // https://en.cppreference.com/w/cpp/named_req/SequenceContainer
// and more? // and more?
namespace cif::v2 namespace cif::v2
{ {
...@@ -475,6 +474,23 @@ class category ...@@ -475,6 +474,23 @@ class category
return result; return result;
} }
// --------------------------------------------------------------------
std::vector<std::string> get_tag_order() const;
void write(std::ostream &os) const;
void write(std::ostream &os, const std::vector<std::string> &order);
private:
void write(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyColumns) const;
public:
friend std::ostream &operator<<(std::ostream &os, const category &cat)
{
cat.write(os);
return os;
}
private: private:
void update_value(row *row, size_t column, std::string_view value, bool updateLinked, bool validate = true); void update_value(row *row, size_t column, std::string_view value, bool updateLinked, bool validate = true);
...@@ -631,6 +647,8 @@ class category ...@@ -631,6 +647,8 @@ class category
iterator insert_impl(const_iterator pos, row *n); iterator insert_impl(const_iterator pos, row *n);
iterator erase_impl(const_iterator pos); iterator erase_impl(const_iterator pos);
// --------------------------------------------------------------------
std::string m_name; std::string m_name;
std::vector<item_column> m_columns; std::vector<item_column> m_columns;
const validator *m_validator = nullptr; const validator *m_validator = nullptr;
...@@ -638,7 +656,7 @@ class category ...@@ -638,7 +656,7 @@ class category
std::vector<link> m_parent_links, m_child_links; std::vector<link> m_parent_links, m_child_links;
bool m_cascade = true; bool m_cascade = true;
uint32_t m_last_unique_num = 0; uint32_t m_last_unique_num = 0;
class category_index* m_index = nullptr; class category_index *m_index = nullptr;
row *m_head = nullptr, *m_tail = nullptr; row *m_head = nullptr, *m_tail = nullptr;
}; };
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#include <iostream> #include <iostream>
#include <regex> #include <regex>
#include <cif++/v2/row.hpp> #include <cif++/cif/row.hpp>
namespace cif::v2 namespace cif::v2
{ {
......
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cif++/cif/forward_decl.hpp>
#include <cif++/cif/category.hpp>
namespace cif::v2
{
// --------------------------------------------------------------------
class datablock : public std::list<category>
{
public:
datablock() = default;
datablock(std::string_view name)
: m_name(name)
{
}
datablock(const datablock &) = default;
datablock(datablock &&) = default;
datablock &operator=(const datablock &) = default;
datablock &operator=(datablock &&) = default;
// --------------------------------------------------------------------
const std::string &name() const { return m_name; }
void set_validator(const validator *v);
const validator *get_validator() const;
bool is_valid() const;
// --------------------------------------------------------------------
category &operator[](std::string_view name);
const category &operator[](std::string_view name) const;
category *get(std::string_view name);
const category *get(std::string_view name) const;
std::tuple<iterator, bool> emplace(std::string_view name);
std::vector<std::string> get_tag_order() const;
void write(std::ostream &os) const;
friend std::ostream &operator<<(std::ostream &os, const datablock &db)
{
db.write(os);
return os;
}
private:
std::string m_name;
const validator *m_validator = nullptr;
};
} // namespace cif::v2
\ No newline at end of file
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
#pragma once #pragma once
#include <cif++/v2/validate.hpp> #include <cif++/cif/validate.hpp>
namespace cif::v2 namespace cif::v2
{ {
......
...@@ -26,10 +26,10 @@ ...@@ -26,10 +26,10 @@
#pragma once #pragma once
#include <cif++/v2/forward_decl.hpp> #include <cif++/cif/forward_decl.hpp>
#include <cif++/v2/datablock.hpp> #include <cif++/cif/datablock.hpp>
#include <cif++/v2/parser.hpp> #include <cif++/cif/parser.hpp>
namespace cif::v2 namespace cif::v2
{ {
......
...@@ -34,14 +34,9 @@ ...@@ -34,14 +34,9 @@
#include <memory> #include <memory>
#include <optional> #include <optional>
#include <cif++/CifUtils.hpp> #include <cif++/utilities.hpp>
#include <cif++/v2/forward_decl.hpp> #include <cif++/cif/forward_decl.hpp>
namespace cif
{
extern int VERBOSE;
}
namespace cif::v2 namespace cif::v2
{ {
......
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
#pragma once #pragma once
#include <cif++/v2/row.hpp> #include <cif++/cif/row.hpp>
namespace cif::v2 namespace cif::v2
{ {
......
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
#pragma once #pragma once
#include <cif++/v2/row.hpp> #include <cif++/cif/row.hpp>
namespace cif::v2 namespace cif::v2
{ {
...@@ -87,12 +87,12 @@ class sac_parser ...@@ -87,12 +87,12 @@ class sac_parser
(ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0); (ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
} }
static bool is_unquoted_string(const char *s) static bool is_unquoted_string(std::string_view text)
{ {
auto ss = s; auto s = text.begin();
bool result = is_ordinary(*s++); bool result = is_ordinary(*s++);
while (result and *s != 0) while (result and s != text.end())
{ {
result = is_non_blank(*s); result = is_non_blank(*s);
++s; ++s;
...@@ -102,7 +102,7 @@ class sac_parser ...@@ -102,7 +102,7 @@ class sac_parser
if (result) if (result)
{ {
static const std::regex reservedRx(R"((^(?:data|save)|.*(?:loop|stop|global))_.+)", std::regex_constants::icase); static const std::regex reservedRx(R"((^(?:data|save)|.*(?:loop|stop|global))_.+)", std::regex_constants::icase);
result = not std::regex_match(ss, reservedRx); result = not std::regex_match(text.begin(), text.end(), reservedRx);
} }
return result; return result;
......
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
#pragma once #pragma once
#include <cif++/v2/item.hpp> #include <cif++/cif/item.hpp>
namespace cif::v2 namespace cif::v2
{ {
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
// #include <regex> // #include <regex>
#include <boost/regex.hpp> #include <boost/regex.hpp>
#include <cif++/CifUtils.hpp> #include <cif++/utilities.hpp>
namespace cif::v2 namespace cif::v2
{ {
......
...@@ -34,8 +34,8 @@ ...@@ -34,8 +34,8 @@
#include <tuple> #include <tuple>
#include <vector> #include <vector>
#include <cif++/AtomType.hpp> #include <cif++/cif.hpp>
#include <cif++/Cif++.hpp> #include <cif++/structure/AtomType.hpp>
namespace mmcif namespace mmcif
{ {
...@@ -130,8 +130,8 @@ class Compound ...@@ -130,8 +130,8 @@ class Compound
friend class CCDCompoundFactoryImpl; friend class CCDCompoundFactoryImpl;
friend class CCP4CompoundFactoryImpl; friend class CCP4CompoundFactoryImpl;
Compound(cif::Datablock &db); Compound(cif::v2::datablock &db);
Compound(cif::Datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group); Compound(cif::v2::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group);
std::string mID; std::string mID;
std::string mName; std::string mName;
......
...@@ -28,10 +28,10 @@ ...@@ -28,10 +28,10 @@
#include <numeric> #include <numeric>
#include <cif++/AtomType.hpp> #include <cif++/cif.hpp>
#include <cif++/Cif++.hpp> #include <cif++/structure/AtomType.hpp>
#include <cif++/Compound.hpp> #include <cif++/structure/Compound.hpp>
#include <cif++/Point.hpp> #include <cif++/point.hpp>
/* /*
To modify a structure, you will have to use actions. To modify a structure, you will have to use actions.
...@@ -63,7 +63,7 @@ class Atom ...@@ -63,7 +63,7 @@ class Atom
private: private:
struct AtomImpl : public std::enable_shared_from_this<AtomImpl> struct AtomImpl : public std::enable_shared_from_this<AtomImpl>
{ {
AtomImpl(cif::Datablock &db, const std::string &id, cif::Row row); AtomImpl(cif::v2::datablock &db, const std::string &id, cif::v2::row_handle row);
// constructor for a symmetry copy of an atom // constructor for a symmetry copy of an atom
AtomImpl(const AtomImpl &impl, const Point &loc, const std::string &sym_op); AtomImpl(const AtomImpl &impl, const Point &loc, const std::string &sym_op);
...@@ -85,7 +85,7 @@ class Atom ...@@ -85,7 +85,7 @@ class Atom
const std::string get_property(const std::string_view name) const; const std::string get_property(const std::string_view name) const;
void set_property(const std::string_view name, const std::string &value); void set_property(const std::string_view name, const std::string &value);
const cif::Datablock &mDb; const cif::v2::datablock &mDb;
std::string mID; std::string mID;
AtomType mType; AtomType mType;
...@@ -98,9 +98,9 @@ class Atom ...@@ -98,9 +98,9 @@ class Atom
Point mLocation; Point mLocation;
int mRefcount; int mRefcount;
cif::Row mRow; cif::v2::row_handle mRow;
mutable std::vector<std::tuple<std::string, cif::detail::ItemReference>> mCachedRefs; // mutable std::vector<std::tuple<std::string, cif::detail::ItemReference>> mCachedRefs;
mutable const Compound *mCompound = nullptr; mutable const Compound *mCompound = nullptr;
...@@ -123,7 +123,7 @@ class Atom ...@@ -123,7 +123,7 @@ class Atom
{ {
} }
Atom(cif::Datablock &db, cif::Row &row); Atom(cif::v2::datablock &db, cif::v2::row_handle &row);
// a special constructor to create symmetry copies // a special constructor to create symmetry copies
Atom(const Atom &rhs, const Point &symmmetry_location, const std::string &symmetry_operation); Atom(const Atom &rhs, const Point &symmmetry_location, const std::string &symmetry_operation);
...@@ -180,8 +180,8 @@ class Atom ...@@ -180,8 +180,8 @@ class Atom
void translateRotateAndTranslate(Point t1, Quaternion q, Point t2); void translateRotateAndTranslate(Point t1, Quaternion q, Point t2);
// for direct access to underlying data, be careful! // for direct access to underlying data, be careful!
const cif::Row getRow() const { return impl().mRow; } const cif::v2::row_handle getRow() const { return impl().mRow; }
const cif::Row getRowAniso() const; const cif::v2::row_handle getRowAniso() const;
bool isSymmetryCopy() const { return impl().mSymmetryCopy; } bool isSymmetryCopy() const { return impl().mSymmetryCopy; }
std::string symmetry() const { return impl().mSymmetryOperator; } std::string symmetry() const { return impl().mSymmetryOperator; }
...@@ -513,7 +513,7 @@ class Polymer : public std::vector<Monomer> ...@@ -513,7 +513,7 @@ class Polymer : public std::vector<Monomer>
Structure *mStructure; Structure *mStructure;
std::string mEntityID; std::string mEntityID;
std::string mAsymID; std::string mAsymID;
cif::RowSet mPolySeq; // cif::v2::row_handleSet mPolySeq;
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -578,31 +578,31 @@ class Branch : public std::vector<Sugar> ...@@ -578,31 +578,31 @@ class Branch : public std::vector<Sugar>
// file is a reference to the data stored in e.g. the cif file. // file is a reference to the data stored in e.g. the cif file.
// This object is not copyable. // This object is not copyable.
class File : public cif::File class File : public cif::v2::file
{ {
public: public:
File() {} File() {}
File(const std::filesystem::path &path) // File(const std::filesystem::path &path)
{ // {
load(path); // load(path);
} // }
File(const char *data, size_t length) // File(const char *data, size_t length)
{ // {
load(data, length); // load(data, length);
} // }
File(const File &) = delete; File(const File &) = delete;
File &operator=(const File &) = delete; File &operator=(const File &) = delete;
void load(const std::filesystem::path &p) override; // void load(const std::filesystem::path &p) override;
void save(const std::filesystem::path &p) override; // void save(const std::filesystem::path &p) override;
using cif::File::load; // using cif::v2::file::load;
using cif::File::save; // using cif::v2::file::save;
cif::Datablock &data() { return front(); } cif::v2::datablock &data() { return front(); }
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -622,12 +622,12 @@ inline bool operator&(StructureOpenOptions a, StructureOpenOptions b) ...@@ -622,12 +622,12 @@ inline bool operator&(StructureOpenOptions a, StructureOpenOptions b)
class Structure class Structure
{ {
public: public:
Structure(cif::File &p, size_t modelNr = 1, StructureOpenOptions options = {}) Structure(cif::v2::file &p, size_t modelNr = 1, StructureOpenOptions options = {})
: Structure(p.front(), modelNr, options) : Structure(p.front(), modelNr, options)
{ {
} }
Structure(cif::Datablock &db, size_t modelNr = 1, StructureOpenOptions options = {}); Structure(cif::v2::datablock &db, size_t modelNr = 1, StructureOpenOptions options = {});
Structure(Structure &&s) = default; Structure(Structure &&s) = default;
...@@ -756,12 +756,12 @@ class Structure ...@@ -756,12 +756,12 @@ class Structure
/// This method creates new atom records filled with info from the info. /// This method creates new atom records filled with info from the info.
/// ///
/// \param entity_id The entity ID of the new nonpoly /// \param entity_id The entity ID of the new nonpoly
/// \param atoms The array of sets of cif::item data containing the data for the atoms. /// \param atoms The array of sets of cif::v2::item data containing the data for the atoms.
/// \return The newly create asym ID /// \return The newly create asym ID
std::string createNonpoly(const std::string &entity_id, std::vector<std::vector<cif::Item>> &atom_info); std::string createNonpoly(const std::string &entity_id, std::vector<std::vector<cif::v2::item>> &atom_info);
/// \brief Create a new (sugar) branch with one first NAG containing atoms constructed from \a nag_atom_info /// \brief Create a new (sugar) branch with one first NAG containing atoms constructed from \a nag_atom_info
Branch &createBranch(std::vector<std::vector<cif::Item>> &nag_atom_info); Branch &createBranch(std::vector<std::vector<cif::v2::item>> &nag_atom_info);
/// \brief Extend an existing (sugar) branch identified by \a asymID with one sugar containing atoms constructed from \a atom_info /// \brief Extend an existing (sugar) branch identified by \a asymID with one sugar containing atoms constructed from \a atom_info
/// ///
...@@ -769,7 +769,7 @@ class Structure ...@@ -769,7 +769,7 @@ class Structure
/// \param atom_info Array containing the info for the atoms to construct for the new sugar /// \param atom_info Array containing the info for the atoms to construct for the new sugar
/// \param link_sugar The sugar to link to, note: this is the sugar number (1 based) /// \param link_sugar The sugar to link to, note: this is the sugar number (1 based)
/// \param link_atom The atom id of the atom linked in the sugar /// \param link_atom The atom id of the atom linked in the sugar
Branch &extendBranch(const std::string &asym_id, std::vector<std::vector<cif::Item>> &atom_info, Branch &extendBranch(const std::string &asym_id, std::vector<std::vector<cif::v2::item>> &atom_info,
int link_sugar, const std::string &link_atom); int link_sugar, const std::string &link_atom);
/// \brief Remove \a branch /// \brief Remove \a branch
...@@ -797,12 +797,12 @@ class Structure ...@@ -797,12 +797,12 @@ class Structure
void cleanupEmptyCategories(); void cleanupEmptyCategories();
/// \brief Direct access to underlying data /// \brief Direct access to underlying data
cif::Category &category(std::string_view name) const cif::v2::category &category(std::string_view name) const
{ {
return mDb[name]; return mDb[name];
} }
cif::Datablock &datablock() const cif::v2::datablock &datablock() const
{ {
return mDb; return mDb;
} }
...@@ -832,7 +832,7 @@ class Structure ...@@ -832,7 +832,7 @@ class Structure
void removeAtom(Atom &a, bool removeFromResidue); void removeAtom(Atom &a, bool removeFromResidue);
void removeSugar(Sugar &sugar); void removeSugar(Sugar &sugar);
cif::Datablock &mDb; cif::v2::datablock &mDb;
size_t mModelNr; size_t mModelNr;
AtomView mAtoms; AtomView mAtoms;
std::vector<size_t> mAtomIndex; std::vector<size_t> mAtomIndex;
......
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
#include <cstdint> #include <cstdint>
#include <array> #include <array>
#include <cif++/CifUtils.hpp> #include <cif++/utilities.hpp>
namespace mmcif namespace mmcif
{ {
......
...@@ -56,6 +56,8 @@ ...@@ -56,6 +56,8 @@
namespace cif namespace cif
{ {
extern int VERBOSE;
// the git 'build' number // the git 'build' number
std::string get_version_nr(); std::string get_version_nr();
// std::string get_version_date(); // std::string get_version_date();
......
...@@ -44,7 +44,7 @@ ...@@ -44,7 +44,7 @@
#include <cif++/Cif++.hpp> #include <cif++/Cif++.hpp>
#include <cif++/CifParser.hpp> #include <cif++/CifParser.hpp>
#include <cif++/CifUtils.hpp> #include <cif++/utilities.hpp>
#include <cif++/CifValidator.hpp> #include <cif++/CifValidator.hpp>
namespace fs = std::filesystem; namespace fs = std::filesystem;
......
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <set>
#include <cif++/Cif++.hpp>
#include <cif++/CifParser.hpp>
#include <cif++/CifValidator.hpp>
extern int VERBOSE;
namespace cif
{
const uint32_t kMaxLineLength = 132;
const uint8_t kCharTraitsTable[128] = {
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
14, 15, 14, 14, 14, 15, 15, 14, 15, 15, 15, 15, 15, 15, 15, 15, // 2
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 10, 15, 15, 15, 15, // 3
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 4
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 15, 14, 15, 14, // 5
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 6
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, // 7
};
// --------------------------------------------------------------------
CifParserError::CifParserError(uint32_t lineNr, const std::string &message)
: std::runtime_error("parse error at line " + std::to_string(lineNr) + ": " + message)
{
}
// --------------------------------------------------------------------
const char *SacParser::kTokenName[] = {
"unknown",
"EOF",
"DATA",
"LOOP",
"GLOBAL",
"SAVE",
"STOP",
"Tag",
"Value"};
const char *SacParser::kValueName[] = {
"Int",
"Float",
"Numeric",
"String",
"TextField",
"Inapplicable",
"Unknown"};
// --------------------------------------------------------------------
bool isUnquotedString(const char *s)
{
auto ss = s;
bool result = isOrdinary(*s++);
while (result and *s != 0)
{
result = isNonBlank(*s);
++s;
}
// but be careful it does not contain e.g. stop_
if (result)
{
static const std::regex reservedRx(R"((^(?:data|save)|.*(?:loop|stop|global))_.+)", std::regex_constants::icase);
result = not std::regex_match(ss, reservedRx);
}
return result;
}
// --------------------------------------------------------------------
SacParser::SacParser(std::istream &is, bool init)
: mData(is)
{
mValidate = true;
mLineNr = 1;
mBol = true;
if (init)
mLookahead = getNextToken();
}
void SacParser::error(const std::string &msg)
{
throw CifParserError(mLineNr, msg);
}
// getNextChar takes a char from the buffer, or if it is empty
// from the istream. This function also does carriage/linefeed
// translation.
int SacParser::getNextChar()
{
int result;
if (mBuffer.empty())
result = mData.get();
else
{
result = mBuffer.top();
mBuffer.pop();
}
// very simple CR/LF translation into LF
if (result == '\r')
{
int lookahead = mData.get();
if (lookahead != '\n')
mBuffer.push(lookahead);
result = '\n';
}
mTokenValue += static_cast<char>(result);
if (result == '\n')
++mLineNr;
if (VERBOSE >= 6)
{
std::cerr << "getNextChar => ";
if (iscntrl(result) or not isprint(result))
std::cerr << int(result) << std::endl;
else
std::cerr << char(result) << std::endl;
}
return result;
}
void SacParser::retract()
{
assert(not mTokenValue.empty());
char ch = mTokenValue.back();
if (ch == '\n')
--mLineNr;
mBuffer.push(ch);
mTokenValue.pop_back();
}
int SacParser::restart(int start)
{
int result = 0;
while (not mTokenValue.empty())
retract();
switch (start)
{
case eStateStart:
result = eStateFloat;
break;
case eStateFloat:
result = eStateInt;
break;
case eStateInt:
result = eStateValue;
break;
default:
error("Invalid state in SacParser");
}
mBol = false;
return result;
}
void SacParser::match(SacParser::CIFToken t)
{
if (mLookahead != t)
error(std::string("Unexpected token, expected ") + kTokenName[t] + " but found " + kTokenName[mLookahead]);
mLookahead = getNextToken();
}
SacParser::CIFToken SacParser::getNextToken()
{
const auto kEOF = std::char_traits<char>::eof();
CIFToken result = eCIFTokenUnknown;
int quoteChar = 0;
int state = eStateStart, start = eStateStart;
mBol = false;
mTokenValue.clear();
mTokenType = eCIFValueUnknown;
while (result == eCIFTokenUnknown)
{
auto ch = getNextChar();
switch (state)
{
case eStateStart:
if (ch == kEOF)
result = eCIFTokenEOF;
else if (ch == '\n')
{
mBol = true;
state = eStateWhite;
}
else if (ch == ' ' or ch == '\t')
state = eStateWhite;
else if (ch == '#')
state = eStateComment;
else if (ch == '_')
state = eStateTag;
else if (ch == ';' and mBol)
state = eStateTextField;
else if (ch == '\'' or ch == '"')
{
quoteChar = ch;
state = eStateQuotedString;
}
else
state = start = restart(start);
break;
case eStateWhite:
if (ch == kEOF)
result = eCIFTokenEOF;
else if (not isspace(ch))
{
state = eStateStart;
retract();
mTokenValue.clear();
}
else
mBol = (ch == '\n');
break;
case eStateComment:
if (ch == '\n')
{
state = eStateStart;
mBol = true;
mTokenValue.clear();
}
else if (ch == kEOF)
result = eCIFTokenEOF;
else if (not isAnyPrint(ch))
error("invalid character in comment");
break;
case eStateTextField:
if (ch == '\n')
state = eStateTextField + 1;
else if (ch == kEOF)
error("unterminated textfield");
else if (not isAnyPrint(ch))
// error("invalid character in text field '" + string({ static_cast<char>(ch) }) + "' (" + to_string((int)ch) + ")");
std::cerr << "invalid character in text field '" << std::string({static_cast<char>(ch)}) << "' (" << ch << ") line: " << mLineNr << std::endl;
break;
case eStateTextField + 1:
if (isTextLead(ch) or ch == ' ' or ch == '\t')
state = eStateTextField;
else if (ch == ';')
{
assert(mTokenValue.length() >= 2);
mTokenValue = mTokenValue.substr(1, mTokenValue.length() - 3);
mTokenType = eCIFValueTextField;
result = eCIFTokenValue;
}
else if (ch == kEOF)
error("unterminated textfield");
else if (ch != '\n')
error("invalid character in text field");
break;
case eStateQuotedString:
if (ch == kEOF)
error("unterminated quoted string");
else if (ch == quoteChar)
state = eStateQuotedStringQuote;
else if (not isAnyPrint(ch))
std::cerr << "invalid character in quoted string '" << std::string({static_cast<char>(ch)}) << "' (" << ch << ") line: " << mLineNr << std::endl;
// error("invalid character in quoted string");
break;
case eStateQuotedStringQuote:
if (isWhite(ch))
{
retract();
result = eCIFTokenValue;
mTokenType = eCIFValueString;
if (mTokenValue.length() < 2)
error("Invalid quoted string token");
mTokenValue = mTokenValue.substr(1, mTokenValue.length() - 2);
}
else if (ch == quoteChar)
;
else if (isAnyPrint(ch))
state = eStateQuotedString;
else if (ch == kEOF)
error("unterminated quoted string");
else
error("invalid character in quoted string");
break;
case eStateTag:
if (not isNonBlank(ch))
{
retract();
result = eCIFTokenTag;
}
break;
case eStateFloat:
if (ch == '+' or ch == '-')
{
state = eStateFloat + 1;
}
else if (isdigit(ch))
state = eStateFloat + 1;
else
state = start = restart(start);
break;
case eStateFloat + 1:
// if (ch == '(') // numeric???
// mState = eStateNumericSuffix;
// else
if (ch == '.')
state = eStateFloat + 2;
else if (tolower(ch) == 'e')
state = eStateFloat + 3;
else if (isWhite(ch) or ch == kEOF)
{
retract();
result = eCIFTokenValue;
mTokenType = eCIFValueInt;
}
else
state = start = restart(start);
break;
// parsed '.'
case eStateFloat + 2:
if (tolower(ch) == 'e')
state = eStateFloat + 3;
else if (isWhite(ch) or ch == kEOF)
{
retract();
result = eCIFTokenValue;
mTokenType = eCIFValueFloat;
}
else
state = start = restart(start);
break;
// parsed 'e'
case eStateFloat + 3:
if (ch == '-' or ch == '+')
state = eStateFloat + 4;
else if (isdigit(ch))
state = eStateFloat + 5;
else
state = start = restart(start);
break;
case eStateFloat + 4:
if (isdigit(ch))
state = eStateFloat + 5;
else
state = start = restart(start);
break;
case eStateFloat + 5:
if (isWhite(ch) or ch == kEOF)
{
retract();
result = eCIFTokenValue;
mTokenType = eCIFValueFloat;
}
else
state = start = restart(start);
break;
case eStateInt:
if (isdigit(ch) or ch == '+' or ch == '-')
state = eStateInt + 1;
else
state = start = restart(start);
break;
case eStateInt + 1:
if (isWhite(ch) or ch == kEOF)
{
retract();
result = eCIFTokenValue;
mTokenType = eCIFValueInt;
}
else
state = start = restart(start);
break;
case eStateValue:
if (ch == '_')
{
std::string s = toLowerCopy(mTokenValue);
if (s == "global_")
result = eCIFTokenGLOBAL;
else if (s == "stop_")
result = eCIFTokenSTOP;
else if (s == "loop_")
result = eCIFTokenLOOP;
else if (s == "data_")
{
state = eStateDATA;
continue;
}
else if (s == "save_")
{
state = eStateSAVE;
continue;
}
}
if (result == eCIFTokenUnknown and not isNonBlank(ch))
{
retract();
result = eCIFTokenValue;
if (mTokenValue == ".")
mTokenType = eCIFValueInapplicable;
else if (mTokenValue == "?")
{
mTokenType = eCIFValueUnknown;
mTokenValue.clear();
}
}
break;
case eStateDATA:
case eStateSAVE:
if (not isNonBlank(ch))
{
retract();
if (state == eStateDATA)
result = eCIFTokenDATA;
else
result = eCIFTokenSAVE;
mTokenValue.erase(mTokenValue.begin(), mTokenValue.begin() + 5);
}
break;
default:
assert(false);
error("Invalid state in getNextToken");
break;
}
}
if (VERBOSE >= 5)
{
std::cerr << kTokenName[result];
if (mTokenType != eCIFValueUnknown)
std::cerr << ' ' << kValueName[mTokenType];
if (result != eCIFTokenEOF)
std::cerr << " '" << mTokenValue << '\'';
std::cerr << std::endl;
}
return result;
}
DatablockIndex SacParser::indexDatablocks()
{
DatablockIndex index;
// first locate the start, as fast as we can
auto &sb = *mData.rdbuf();
enum
{
start,
comment,
string,
string_quote,
qstring,
data,
data_name
} state = start;
int quote = 0;
bool bol = true;
const char dblk[] = "data_";
std::string::size_type si = 0;
std::string datablock;
for (auto ch = sb.sbumpc(); ch != std::streambuf::traits_type::eof(); ch = sb.sbumpc())
{
switch (state)
{
case start:
switch (ch)
{
case '#': state = comment; break;
case 'd':
case 'D':
state = data;
si = 1;
break;
case '\'':
case '"':
state = string;
quote = ch;
break;
case ';':
if (bol)
state = qstring;
break;
}
break;
case comment:
if (ch == '\n')
state = start;
break;
case string:
if (ch == quote)
state = string_quote;
break;
case string_quote:
if (std::isspace(ch))
state = start;
else
state = string;
break;
case qstring:
if (ch == ';' and bol)
state = start;
break;
case data:
if (dblk[si] == 0 and isNonBlank(ch))
{
datablock = {static_cast<char>(ch)};
state = data_name;
}
else if (dblk[si++] != ch)
state = start;
break;
case data_name:
if (isNonBlank(ch))
datablock.insert(datablock.end(), char(ch));
else if (isspace(ch))
{
if (not datablock.empty())
index[datablock] = mData.tellg();
state = start;
}
else
state = start;
break;
}
bol = (ch == '\n');
}
return index;
}
bool SacParser::parseSingleDatablock(const std::string &datablock)
{
// first locate the start, as fast as we can
auto &sb = *mData.rdbuf();
enum
{
start,
comment,
string,
string_quote,
qstring,
data
} state = start;
int quote = 0;
bool bol = true;
std::string dblk = "data_" + datablock;
std::string::size_type si = 0;
bool found = false;
for (auto ch = sb.sbumpc(); not found and ch != std::streambuf::traits_type::eof(); ch = sb.sbumpc())
{
switch (state)
{
case start:
switch (ch)
{
case '#': state = comment; break;
case 'd':
case 'D':
state = data;
si = 1;
break;
case '\'':
case '"':
state = string;
quote = ch;
break;
case ';':
if (bol)
state = qstring;
break;
}
break;
case comment:
if (ch == '\n')
state = start;
break;
case string:
if (ch == quote)
state = string_quote;
break;
case string_quote:
if (std::isspace(ch))
state = start;
else
state = string;
break;
case qstring:
if (ch == ';' and bol)
state = start;
break;
case data:
if (isspace(ch) and dblk[si] == 0)
found = true;
else if (dblk[si++] != ch)
state = start;
break;
}
bol = (ch == '\n');
}
if (found)
{
produceDatablock(datablock);
mLookahead = getNextToken();
parseDataBlock();
}
return found;
}
bool SacParser::parseSingleDatablock(const std::string &datablock, const DatablockIndex &index)
{
bool result = false;
auto i = index.find(datablock);
if (i != index.end())
{
mData.seekg(i->second);
produceDatablock(datablock);
mLookahead = getNextToken();
parseDataBlock();
result = true;
}
return result;
}
void SacParser::parseFile()
{
while (mLookahead != eCIFTokenEOF)
{
switch (mLookahead)
{
case eCIFTokenGLOBAL:
parseGlobal();
break;
case eCIFTokenDATA:
produceDatablock(mTokenValue);
match(eCIFTokenDATA);
parseDataBlock();
break;
default:
error("This file does not seem to be an mmCIF file");
break;
}
}
}
void SacParser::parseGlobal()
{
match(eCIFTokenGLOBAL);
while (mLookahead == eCIFTokenTag)
{
match(eCIFTokenTag);
match(eCIFTokenValue);
}
}
void SacParser::parseDataBlock()
{
std::string cat;
while (mLookahead == eCIFTokenLOOP or mLookahead == eCIFTokenTag or mLookahead == eCIFTokenSAVE)
{
switch (mLookahead)
{
case eCIFTokenLOOP:
{
cat.clear(); // should start a new category
match(eCIFTokenLOOP);
std::vector<std::string> tags;
while (mLookahead == eCIFTokenTag)
{
std::string catName, itemName;
std::tie(catName, itemName) = splitTagName(mTokenValue);
if (cat.empty())
{
produceCategory(catName);
cat = catName;
}
else if (not iequals(cat, catName))
error("inconsistent categories in loop_");
tags.push_back(itemName);
match(eCIFTokenTag);
}
while (mLookahead == eCIFTokenValue)
{
produceRow();
for (auto tag : tags)
{
produceItem(cat, tag, mTokenValue);
match(eCIFTokenValue);
}
}
cat.clear();
break;
}
case eCIFTokenTag:
{
std::string catName, itemName;
std::tie(catName, itemName) = splitTagName(mTokenValue);
if (not iequals(cat, catName))
{
produceCategory(catName);
cat = catName;
produceRow();
}
match(eCIFTokenTag);
produceItem(cat, itemName, mTokenValue);
match(eCIFTokenValue);
break;
}
case eCIFTokenSAVE:
parseSaveFrame();
break;
default:
assert(false);
break;
}
}
}
void SacParser::parseSaveFrame()
{
error("A regular CIF file should not contain a save frame");
}
// --------------------------------------------------------------------
Parser::Parser(std::istream &is, File &f, bool init)
: SacParser(is, init)
, mFile(f)
, mDataBlock(nullptr)
{
}
void Parser::produceDatablock(const std::string &name)
{
mDataBlock = new Datablock(name);
mFile.append(mDataBlock);
}
void Parser::produceCategory(const std::string &name)
{
if (VERBOSE >= 4)
std::cerr << "producing category " << name << std::endl;
std::tie(mCat, std::ignore) = mDataBlock->emplace(name);
}
void Parser::produceRow()
{
if (VERBOSE >= 4)
std::cerr << "producing row for category " << mCat->name() << std::endl;
mCat->emplace({});
mRow = mCat->back();
mRow.lineNr(mLineNr);
}
void Parser::produceItem(const std::string &category, const std::string &item, const std::string &value)
{
if (VERBOSE >= 4)
std::cerr << "producing _" << category << '.' << item << " -> " << value << std::endl;
if (not iequals(category, mCat->name()))
error("inconsistent categories in loop_");
mRow[item] = mTokenValue;
}
// --------------------------------------------------------------------
struct DictParserDataImpl
{
// temporary values for constructing dictionaries
std::vector<ValidateCategory> mCategoryValidators;
std::map<std::string, std::vector<ValidateItem>> mItemValidators;
std::set<std::tuple<std::string, std::string>> mLinkedItems;
};
DictParser::DictParser(Validator &validator, std::istream &is)
: Parser(is, mFile)
, mValidator(validator)
, mImpl(new DictParserDataImpl)
{
}
DictParser::~DictParser()
{
delete mImpl;
}
void DictParser::parseSaveFrame()
{
if (not mCollectedItemTypes)
mCollectedItemTypes = collectItemTypes();
std::string saveFrameName = mTokenValue;
if (saveFrameName.empty())
error("Invalid save frame, should contain more than just 'save_' here");
bool isCategorySaveFrame = mTokenValue[0] != '_';
Datablock dict(mTokenValue);
Datablock::iterator cat = dict.end();
match(eCIFTokenSAVE);
while (mLookahead == eCIFTokenLOOP or mLookahead == eCIFTokenTag)
{
if (mLookahead == eCIFTokenLOOP)
{
cat = dict.end(); // should start a new category
match(eCIFTokenLOOP);
std::vector<std::string> tags;
while (mLookahead == eCIFTokenTag)
{
std::string catName, itemName;
std::tie(catName, itemName) = splitTagName(mTokenValue);
if (cat == dict.end())
std::tie(cat, std::ignore) = dict.emplace(catName);
else if (not iequals(cat->name(), catName))
error("inconsistent categories in loop_");
tags.push_back(itemName);
match(eCIFTokenTag);
}
while (mLookahead == eCIFTokenValue)
{
cat->emplace({});
auto row = cat->back();
for (auto tag : tags)
{
row[tag] = mTokenValue;
match(eCIFTokenValue);
}
}
cat = dict.end();
}
else
{
std::string catName, itemName;
std::tie(catName, itemName) = splitTagName(mTokenValue);
if (cat == dict.end() or not iequals(cat->name(), catName))
std::tie(cat, std::ignore) = dict.emplace(catName);
match(eCIFTokenTag);
if (cat->empty())
cat->emplace({});
cat->back()[itemName] = mTokenValue;
match(eCIFTokenValue);
}
}
match(eCIFTokenSAVE);
if (isCategorySaveFrame)
{
std::string category;
cif::tie(category) = dict["category"].front().get("id");
std::vector<std::string> keys;
for (auto k : dict["category_key"])
keys.push_back(std::get<1>(splitTagName(k["name"].as<std::string>())));
iset groups;
for (auto g : dict["category_group"])
groups.insert(g["id"].as<std::string>());
mImpl->mCategoryValidators.push_back(ValidateCategory{category, keys, groups});
}
else
{
// if the type code is missing, this must be a pointer, just skip it
std::string typeCode;
cif::tie(typeCode) = dict["item_type"].front().get("code");
const ValidateType *tv = nullptr;
if (not(typeCode.empty() or typeCode == "?"))
tv = mValidator.getValidatorForType(typeCode);
iset ess;
for (auto e : dict["item_enumeration"])
ess.insert(e["value"].as<std::string>());
std::string defaultValue;
cif::tie(defaultValue) = dict["item_default"].front().get("value");
bool defaultIsNull = false;
if (defaultValue.empty())
{
for (auto &r : dict["_item_default"])
{
defaultIsNull = r["value"].is_null();
break;
}
}
// collect the dict from our dataBlock and construct validators
for (auto i : dict["item"])
{
std::string tagName, category, mandatory;
cif::tie(tagName, category, mandatory) = i.get("name", "category_id", "mandatory_code");
std::string catName, itemName;
std::tie(catName, itemName) = splitTagName(tagName);
if (catName.empty() or itemName.empty())
error("Invalid tag name in _item.name " + tagName);
if (not iequals(category, catName) and not(category.empty() or category == "?"))
error("specified category id does match the implicit category name for tag '" + tagName + '\'');
else
category = catName;
auto &ivs = mImpl->mItemValidators[category];
auto vi = find(ivs.begin(), ivs.end(), ValidateItem{itemName});
if (vi == ivs.end())
ivs.push_back(ValidateItem{itemName, iequals(mandatory, "yes"), tv, ess, defaultValue, defaultIsNull});
else
{
// need to update the itemValidator?
if (vi->mMandatory != (iequals(mandatory, "yes")))
{
if (VERBOSE > 2)
{
std::cerr << "inconsistent mandatory value for " << tagName << " in dictionary" << std::endl;
if (iequals(tagName, saveFrameName))
std::cerr << "choosing " << mandatory << std::endl;
else
std::cerr << "choosing " << (vi->mMandatory ? "Y" : "N") << std::endl;
}
if (iequals(tagName, saveFrameName))
vi->mMandatory = (iequals(mandatory, "yes"));
}
if (vi->mType != nullptr and tv != nullptr and vi->mType != tv)
{
if (VERBOSE > 1)
std::cerr << "inconsistent type for " << tagName << " in dictionary" << std::endl;
}
// vi->mMandatory = (iequals(mandatory, "yes"));
if (vi->mType == nullptr)
vi->mType = tv;
vi->mEnums.insert(ess.begin(), ess.end());
// anything else yet?
// ...
}
}
// collect the dict from our dataBlock and construct validators
for (auto i : dict["item_linked"])
{
std::string childTagName, parentTagName;
cif::tie(childTagName, parentTagName) = i.get("child_name", "parent_name");
mImpl->mLinkedItems.emplace(childTagName, parentTagName);
}
}
}
void DictParser::linkItems()
{
if (not mDataBlock)
error("no datablock");
auto &dict = *mDataBlock;
// links are identified by a parent category, a child category and a group ID
using key_type = std::tuple<std::string, std::string, int>;
std::map<key_type, size_t> linkIndex;
// Each link group consists of a set of keys
std::vector<std::tuple<std::vector<std::string>, std::vector<std::string>>> linkKeys;
auto addLink = [&](size_t ix, const std::string &pk, const std::string &ck)
{
auto &&[pkeys, ckeys] = linkKeys.at(ix);
bool found = false;
for (size_t i = 0; i < pkeys.size(); ++i)
{
if (pkeys[i] == pk and ckeys[i] == ck)
{
found = true;
break;
}
}
if (not found)
{
pkeys.push_back(pk);
ckeys.push_back(ck);
}
};
auto &linkedGroupList = dict["pdbx_item_linked_group_list"];
for (auto gl : linkedGroupList)
{
std::string child, parent;
int link_group_id;
cif::tie(child, parent, link_group_id) = gl.get("child_name", "parent_name", "link_group_id");
auto civ = mValidator.getValidatorForItem(child);
if (civ == nullptr)
error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
auto piv = mValidator.getValidatorForItem(parent);
if (piv == nullptr)
error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
key_type key{piv->mCategory->mName, civ->mCategory->mName, link_group_id};
if (not linkIndex.count(key))
{
linkIndex[key] = linkKeys.size();
linkKeys.push_back({});
}
size_t ix = linkIndex.at(key);
addLink(ix, piv->mTag, civ->mTag);
}
// Only process inline linked items if the linked group list is absent
if (linkedGroupList.empty())
{
// for links recorded in categories but not in pdbx_item_linked_group_list
for (auto li : mImpl->mLinkedItems)
{
std::string child, parent;
std::tie(child, parent) = li;
auto civ = mValidator.getValidatorForItem(child);
if (civ == nullptr)
error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
auto piv = mValidator.getValidatorForItem(parent);
if (piv == nullptr)
error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
key_type key{piv->mCategory->mName, civ->mCategory->mName, 0};
if (not linkIndex.count(key))
{
linkIndex[key] = linkKeys.size();
linkKeys.push_back({});
}
size_t ix = linkIndex.at(key);
addLink(ix, piv->mTag, civ->mTag);
}
}
auto &linkedGroup = dict["pdbx_item_linked_group"];
// now store the links in the validator
for (auto &kv : linkIndex)
{
ValidateLink link = {};
std::tie(link.mParentCategory, link.mChildCategory, link.mLinkGroupID) = kv.first;
std::tie(link.mParentKeys, link.mChildKeys) = linkKeys[kv.second];
// look up the label
for (auto r : linkedGroup.find(cif::Key("category_id") == link.mChildCategory and cif::Key("link_group_id") == link.mLinkGroupID))
{
link.mLinkGroupLabel = r["label"].as<std::string>();
break;
}
mValidator.addLinkValidator(std::move(link));
}
// now make sure the itemType is specified for all itemValidators
for (auto &cv : mValidator.mCategoryValidators)
{
for (auto &iv : cv.mItemValidators)
{
if (iv.mType == nullptr and cif::VERBOSE >= 0)
std::cerr << "Missing item_type for " << iv.mTag << std::endl;
}
}
}
void DictParser::loadDictionary()
{
std::unique_ptr<Datablock> dict;
Datablock *savedDatablock = mDataBlock;
try
{
while (mLookahead != eCIFTokenEOF)
{
switch (mLookahead)
{
case eCIFTokenGLOBAL:
parseGlobal();
break;
default:
{
dict.reset(new Datablock(mTokenValue)); // dummy datablock, for constructing the validator only
mDataBlock = dict.get();
match(eCIFTokenDATA);
parseDataBlock();
break;
}
}
}
}
catch (const std::exception &)
{
if (cif::VERBOSE >= 0)
std::cerr << "Error parsing dictionary" << std::endl;
throw;
}
// store all validators
for (auto &ic : mImpl->mCategoryValidators)
mValidator.addCategoryValidator(std::move(ic));
mImpl->mCategoryValidators.clear();
for (auto &iv : mImpl->mItemValidators)
{
auto cv = mValidator.getValidatorForCategory(iv.first);
if (cv == nullptr)
error("Undefined category '" + iv.first);
for (auto &v : iv.second)
const_cast<ValidateCategory *>(cv)->addItemValidator(std::move(v));
}
// check all item validators for having a typeValidator
if (dict)
linkItems();
// store meta information
Datablock::iterator info;
bool n;
std::tie(info, n) = mDataBlock->emplace("dictionary");
if (n)
{
auto r = info->front();
mValidator.dictName(r["title"].as<std::string>());
mValidator.dictVersion(r["version"].as<std::string>());
}
mDataBlock = savedDatablock;
mImpl->mItemValidators.clear();
}
bool DictParser::collectItemTypes()
{
bool result = false;
if (not mDataBlock)
error("no datablock");
auto &dict = *mDataBlock;
for (auto &t : dict["item_type_list"])
{
std::string code, primitiveCode, construct;
cif::tie(code, primitiveCode, construct) = t.get("code", "primitive_code", "construct");
cif::replace_all(construct, "\\n", "\n");
cif::replace_all(construct, "\\t", "\t");
cif::replace_all(construct, "\\\n", "");
try
{
ValidateType v = {
code, mapToPrimitiveType(primitiveCode), boost::regex(construct, boost::regex::extended | boost::regex::optimize)};
mValidator.addTypeValidator(std::move(v));
}
catch (const std::exception &)
{
throw_with_nested(CifParserError(t.lineNr(), "error in regular expression"));
}
// Do not replace an already defined type validator, this won't work with pdbx_v40
// as it has a name that is too strict for its own names :-)
// if (mFileImpl.mTypeValidators.count(v))
// mFileImpl.mTypeValidators.erase(v);
if (VERBOSE >= 5)
std::cerr << "Added type " << code << " (" << primitiveCode << ") => " << construct << std::endl;
result = true;
}
return result;
}
} // namespace cif
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <fstream>
#include <filesystem>
#include <gzstream/gzstream.hpp>
#include <cif++/Cif++.hpp>
#include <cif++/CifParser.hpp>
#include <cif++/CifValidator.hpp>
namespace fs = std::filesystem;
extern int VERBOSE;
namespace cif
{
ValidationError::ValidationError(const std::string &msg)
: mMsg(msg)
{
}
ValidationError::ValidationError(const std::string &cat, const std::string &item, const std::string &msg)
: mMsg("When validating _" + cat + '.' + item + ": " + msg)
{
}
// --------------------------------------------------------------------
DDL_PrimitiveType mapToPrimitiveType(std::string_view s)
{
DDL_PrimitiveType result;
if (iequals(s, "char"))
result = DDL_PrimitiveType::Char;
else if (iequals(s, "uchar"))
result = DDL_PrimitiveType::UChar;
else if (iequals(s, "numb"))
result = DDL_PrimitiveType::Numb;
else
throw ValidationError("Not a known primitive type");
return result;
}
// --------------------------------------------------------------------
int ValidateType::compare(const char *a, const char *b) const
{
int result = 0;
if (*a == 0)
result = *b == 0 ? 0 : -1;
else if (*b == 0)
result = *a == 0 ? 0 : +1;
else
{
try
{
switch (mPrimitiveType)
{
case DDL_PrimitiveType::Numb:
{
double da = strtod(a, nullptr);
double db = strtod(b, nullptr);
auto d = da - db;
if (std::abs(d) > std::numeric_limits<double>::epsilon())
{
if (d > 0)
result = 1;
else if (d < 0)
result = -1;
}
break;
}
case DDL_PrimitiveType::UChar:
case DDL_PrimitiveType::Char:
{
// CIF is guaranteed to have ascii only, therefore this primitive code will do
// also, we're collapsing spaces
auto ai = a, bi = b;
for (;;)
{
if (*ai == 0)
{
if (*bi != 0)
result = -1;
break;
}
else if (*bi == 0)
{
result = 1;
break;
}
char ca = *ai;
char cb = *bi;
if (mPrimitiveType == DDL_PrimitiveType::UChar)
{
ca = tolower(ca);
cb = tolower(cb);
}
result = ca - cb;
if (result != 0)
break;
if (ca == ' ')
{
while (ai[1] == ' ')
++ai;
while (bi[1] == ' ')
++bi;
}
++ai;
++bi;
}
break;
}
}
}
catch (const std::invalid_argument &ex)
{
result = 1;
}
}
return result;
}
// --------------------------------------------------------------------
//void ValidateItem::addLinked(ValidateItem* parent, const std::string& parentItem, const std::string& childItem)
//{
//// if (mParent != nullptr and VERBOSE)
//// cerr << "replacing parent in " << mCategory->mName << " from " << mParent->mCategory->mName << " to " << parent->mCategory->mName << endl;
//// mParent = parent;
//
// if (mType == nullptr and parent != nullptr)
// mType = parent->mType;
//
// if (parent != nullptr)
// {
// mLinked.push_back({parent, parentItem, childItem});
//
// parent->mChildren.insert(this);
////
//// if (mCategory->mKeys == std::vector<std::string>{mTag})
//// parent->mForeignKeys.insert(this);
// }
//}
void ValidateItem::operator()(std::string value) const
{
if (not value.empty() and value != "?" and value != ".")
{
if (mType != nullptr and not regex_match(value, mType->mRx))
throw ValidationError(mCategory->mName, mTag, "Value '" + value + "' does not match type expression for type " + mType->mName);
if (not mEnums.empty())
{
if (mEnums.count(value) == 0)
throw ValidationError(mCategory->mName, mTag, "Value '" + value + "' is not in the list of allowed values");
}
}
}
// --------------------------------------------------------------------
void ValidateCategory::addItemValidator(ValidateItem &&v)
{
if (v.mMandatory)
mMandatoryFields.insert(v.mTag);
v.mCategory = this;
auto r = mItemValidators.insert(std::move(v));
if (not r.second and VERBOSE >= 4)
std::cout << "Could not add validator for item " << v.mTag << " to category " << mName << std::endl;
}
const ValidateItem *ValidateCategory::getValidatorForItem(std::string_view tag) const
{
const ValidateItem *result = nullptr;
auto i = mItemValidators.find(ValidateItem{std::string(tag)});
if (i != mItemValidators.end())
result = &*i;
else if (VERBOSE > 4)
std::cout << "No validator for tag " << tag << std::endl;
return result;
}
// --------------------------------------------------------------------
Validator::Validator(std::string_view name, std::istream &is)
: mName(name)
{
DictParser p(*this, is);
p.loadDictionary();
}
Validator::~Validator()
{
}
void Validator::addTypeValidator(ValidateType &&v)
{
auto r = mTypeValidators.insert(std::move(v));
if (not r.second and VERBOSE > 4)
std::cout << "Could not add validator for type " << v.mName << std::endl;
}
const ValidateType *Validator::getValidatorForType(std::string_view typeCode) const
{
const ValidateType *result = nullptr;
auto i = mTypeValidators.find(ValidateType{std::string(typeCode), DDL_PrimitiveType::Char, boost::regex()});
if (i != mTypeValidators.end())
result = &*i;
else if (VERBOSE > 4)
std::cout << "No validator for type " << typeCode << std::endl;
return result;
}
void Validator::addCategoryValidator(ValidateCategory &&v)
{
auto r = mCategoryValidators.insert(std::move(v));
if (not r.second and VERBOSE > 4)
std::cout << "Could not add validator for category " << v.mName << std::endl;
}
const ValidateCategory *Validator::getValidatorForCategory(std::string_view category) const
{
const ValidateCategory *result = nullptr;
auto i = mCategoryValidators.find(ValidateCategory{std::string(category)});
if (i != mCategoryValidators.end())
result = &*i;
else if (VERBOSE > 4)
std::cout << "No validator for category " << category << std::endl;
return result;
}
ValidateItem *Validator::getValidatorForItem(std::string_view tag) const
{
ValidateItem *result = nullptr;
std::string cat, item;
std::tie(cat, item) = splitTagName(tag);
auto *cv = getValidatorForCategory(cat);
if (cv != nullptr)
result = const_cast<ValidateItem *>(cv->getValidatorForItem(item));
if (result == nullptr and VERBOSE > 4)
std::cout << "No validator for item " << tag << std::endl;
return result;
}
void Validator::addLinkValidator(ValidateLink &&v)
{
assert(v.mParentKeys.size() == v.mChildKeys.size());
if (v.mParentKeys.size() != v.mChildKeys.size())
throw std::runtime_error("unequal number of keys for parent and child in link");
auto pcv = getValidatorForCategory(v.mParentCategory);
auto ccv = getValidatorForCategory(v.mChildCategory);
if (pcv == nullptr)
throw std::runtime_error("unknown parent category " + v.mParentCategory);
if (ccv == nullptr)
throw std::runtime_error("unknown child category " + v.mChildCategory);
for (size_t i = 0; i < v.mParentKeys.size(); ++i)
{
auto piv = pcv->getValidatorForItem(v.mParentKeys[i]);
if (piv == nullptr)
throw std::runtime_error("unknown parent tag _" + v.mParentCategory + '.' + v.mParentKeys[i]);
auto civ = ccv->getValidatorForItem(v.mChildKeys[i]);
if (civ == nullptr)
throw std::runtime_error("unknown child tag _" + v.mChildCategory + '.' + v.mChildKeys[i]);
if (civ->mType == nullptr and piv->mType != nullptr)
const_cast<ValidateItem *>(civ)->mType = piv->mType;
}
mLinkValidators.emplace_back(std::move(v));
}
std::vector<const ValidateLink *> Validator::getLinksForParent(std::string_view category) const
{
std::vector<const ValidateLink *> result;
for (auto &l : mLinkValidators)
{
if (l.mParentCategory == category)
result.push_back(&l);
}
return result;
}
std::vector<const ValidateLink *> Validator::getLinksForChild(std::string_view category) const
{
std::vector<const ValidateLink *> result;
for (auto &l : mLinkValidators)
{
if (l.mChildCategory == category)
result.push_back(&l);
}
return result;
}
void Validator::reportError(const std::string &msg, bool fatal) const
{
if (mStrict or fatal)
throw ValidationError(msg);
else if (VERBOSE > 0)
std::cerr << msg << std::endl;
}
// --------------------------------------------------------------------
ValidatorFactory ValidatorFactory::sInstance;
ValidatorFactory::ValidatorFactory()
{
}
const Validator &ValidatorFactory::operator[](std::string_view dictionary)
{
std::lock_guard lock(mMutex);
for (auto &validator : mValidators)
{
if (iequals(validator.mName, dictionary))
return validator;
}
// not found, add it
// too bad clang version 10 did not have a constructor for fs::path that accepts a std::string_view
fs::path dict_name(dictionary.data(), dictionary.data() + dictionary.length());
auto data = loadResource(dict_name);
if (not data and dict_name.extension().string() != ".dic")
data = loadResource(dict_name.parent_path() / (dict_name.filename().string() + ".dic"));
if (data)
mValidators.emplace_back(dictionary, *data);
else
{
std::error_code ec;
// might be a compressed dictionary on disk
fs::path p = dict_name;
if (p.extension() == ".dic")
p = p.parent_path() / (p.filename().string() + ".gz");
else
p = p.parent_path() / (p.filename().string() + ".dic.gz");
#if defined(CACHE_DIR) and defined(DATA_DIR)
if (not fs::exists(p, ec) or ec)
{
for (const char *dir : {CACHE_DIR, DATA_DIR})
{
auto p2 = fs::path(dir) / p;
if (fs::exists(p2, ec) and not ec)
{
swap(p, p2);
break;
}
}
}
#endif
if (fs::exists(p, ec) and not ec)
{
gzstream::ifstream file(p);
if (not file.is_open())
throw std::runtime_error("Could not open dictionary (" + p.string() + ")");
mValidators.emplace_back(dictionary, file);
}
else
throw std::runtime_error("Dictionary not found or defined (" + dict_name.string() + ")");
}
assert(iequals(mValidators.back().mName, dictionary));
return mValidators.back();
}
} // namespace cif
/* Define to the name of this package. */
#cmakedefine PACKAGE_NAME "@PACKAGE_NAME@"
/* Define to the version of this package. */
#cmakedefine PACKAGE_VERSION "@PACKAGE_VERSION@"
/* Define the complete package string */
#cmakedefine PACKAGE_STRING "@PACKAGE_STRING@"
/* Using resources? */
#cmakedefine USE_RSRC @USE_RSRC@
/* src/Config.hpp.in. Generated from configure.ac by autoheader. */
/* define if the Boost library is available */
#undef HAVE_BOOST
/* define if the Boost::Date_Time library is available */
#undef HAVE_BOOST_DATE_TIME
/* define if the Boost::IOStreams library is available */
#undef HAVE_BOOST_IOSTREAMS
/* define if the Boost::Regex library is available */
#undef HAVE_BOOST_REGEX
/* define if the compiler supports basic C++17 syntax */
#undef HAVE_CXX17
/* Define to 1 if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* Define to 1 if you have the `floor' function. */
#undef HAVE_FLOOR
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Define to 1 if you have the `pow' function. */
#undef HAVE_POW
/* Define if you have POSIX threads libraries and header files. */
#undef HAVE_PTHREAD
/* Have PTHREAD_PRIO_INHERIT. */
#undef HAVE_PTHREAD_PRIO_INHERIT
/* Define to 1 if the system has the type `ptrdiff_t'. */
#undef HAVE_PTRDIFF_T
/* Define to 1 if you have the `rint' function. */
#undef HAVE_RINT
/* Define to 1 if you have the `sqrt' function. */
#undef HAVE_SQRT
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
/* Define to 1 if you have the `strchr' function. */
#undef HAVE_STRCHR
/* Define to 1 if you have the `strerror' function. */
#undef HAVE_STRERROR
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* Define to 1 if you have the <sys/ioctl.h> header file. */
#undef HAVE_SYS_IOCTL_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
/* Define to 1 if you have the <termios.h> header file. */
#undef HAVE_TERMIOS_H
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define to 1 if the system has the type `_Bool'. */
#undef HAVE__BOOL
/* Define to the sub-directory where libtool stores uninstalled libraries. */
#undef LT_OBJDIR
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */
#undef PACKAGE_NAME
/* Define to the full name and version of this package. */
#undef PACKAGE_STRING
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the home page for this package. */
#undef PACKAGE_URL
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* Define to necessary symbol if this constant uses a non-standard name on
your system. */
#undef PTHREAD_CREATE_JOINABLE
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Use mrc to store resources */
#undef USE_RSRC
...@@ -24,8 +24,11 @@ ...@@ -24,8 +24,11 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <cif++/v2/category.hpp> #include <numeric>
#include <cif++/v2/datablock.hpp>
#include <cif++/cif/category.hpp>
#include <cif++/cif/datablock.hpp>
#include <cif++/cif/parser.hpp>
// TODO: Find out what the rules are exactly for linked items, the current implementation // TODO: Find out what the rules are exactly for linked items, the current implementation
// is inconsistent. It all depends whether a link is satified if a field taking part in the // is inconsistent. It all depends whether a link is satified if a field taking part in the
...@@ -34,6 +37,8 @@ ...@@ -34,6 +37,8 @@
namespace cif::v2 namespace cif::v2
{ {
const uint32_t kMaxLineLength = 132;
// -------------------------------------------------------------------- // --------------------------------------------------------------------
class row_comparator class row_comparator
...@@ -1526,7 +1531,7 @@ category::iterator category::insert_impl(const_iterator pos, row *n) ...@@ -1526,7 +1531,7 @@ category::iterator category::insert_impl(const_iterator pos, row *n)
// if (test != nullptr) // if (test != nullptr)
// { // {
// if (VERBOSE > 1) // if (VERBOSE > 1)
// std::cerr << "Not inserting new record in " << mName << " (duplicate Key)" << std::endl; // std::cerr << "Not inserting new record in " << m_name << " (duplicate Key)" << std::endl;
// result = test; // result = test;
// isNew = false; // isNew = false;
// } // }
...@@ -1612,4 +1617,262 @@ category::iterator category::erase_impl(const_iterator pos) ...@@ -1612,4 +1617,262 @@ category::iterator category::erase_impl(const_iterator pos)
// return iterator(*this, cur); // return iterator(*this, cur);
} }
namespace detail
{
size_t write_value(std::ostream &os, std::string_view value, size_t offset, size_t width)
{
if (value.find('\n') != std::string::npos or width == 0 or value.length() > 132) // write as text field
{
if (offset > 0)
os << std::endl;
os << ';';
char pc = 0;
for (auto ch : value)
{
if (pc == '\n' and ch == ';')
os << '\\';
os << ch;
pc = ch;
}
if (value.back() != '\n')
os << std::endl;
os << ';' << std::endl;
offset = 0;
}
else if (sac_parser::is_unquoted_string(value))
{
os << value;
if (value.length() < width)
{
os << std::string(width - value.length(), ' ');
offset += width;
}
else
{
os << ' ';
offset += value.length() + 1;
}
}
else
{
bool done = false;
for (char q : {'\'', '"'})
{
auto p = value.find(q); // see if we can use the quote character
while (p != std::string::npos and sac_parser::is_non_blank(value[p + 1]) and value[p + 1] != q)
p = value.find(q, p + 1);
if (p != std::string::npos)
continue;
os << q << value << q;
if (value.length() + 2 < width)
{
os << std::string(width - value.length() - 2, ' ');
offset += width;
}
else
{
os << ' ';
offset += value.length() + 1;
}
done = true;
break;
}
if (not done)
{
if (offset > 0)
os << std::endl;
os << ';' << value << std::endl
<< ';' << std::endl;
offset = 0;
}
}
return offset;
}
} // namespace detail
std::vector<std::string> category::get_tag_order() const
{
std::vector<std::string> result;
for (auto &c : m_columns)
result.push_back("_" + m_name + "." + c.m_name);
return result;
}
void category::write(std::ostream &os) const
{
std::vector<uint16_t> order(m_columns.size());
iota(order.begin(), order.end(), 0);
write(os, order, false);
}
void category::write(std::ostream &os, const std::vector<std::string> &columns)
{
// make sure all columns are present
for (auto &c : columns)
add_column(c);
std::vector<uint16_t> order;
order.reserve(m_columns.size());
for (auto &c : columns)
order.push_back(get_column_ix(c));
for (size_t i = 0; i < m_columns.size(); ++i)
{
if (std::find(order.begin(), order.end(), i) == order.end())
order.push_back(i);
}
write(os, order, true);
}
void category::write(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyColumns) const
{
if (empty())
return;
// If the first Row has a next, we need a loop_
bool needLoop = (m_head->m_next != nullptr);
if (needLoop)
{
os << "loop_" << std::endl;
std::vector<size_t> columnWidths;
for (auto cix : order)
{
auto &col = m_columns[cix];
os << '_' << m_name << '.' << col.m_name << ' ' << std::endl;
columnWidths.push_back(2);
}
for (auto r = m_head; r != nullptr; r = r->m_next)
{
for (auto v = r->m_head; v != nullptr; v = v->m_next)
{
if (v->text().find('\n') == std::string_view::npos)
{
size_t l = v->text().length();
if (not sac_parser::is_unquoted_string(v->text()))
l += 2;
if (l > 132)
continue;
if (columnWidths[v->m_column_ix] < l + 1)
columnWidths[v->m_column_ix] = l + 1;
}
}
}
for (auto r = m_head; r != nullptr; r = r->m_next) // loop over rows
{
size_t offset = 0;
for (size_t cix : order)
{
size_t w = columnWidths[cix];
std::string_view s;
for (auto iv = r->m_head; iv != nullptr; iv = iv->m_next)
{
if (iv->m_column_ix == cix)
{
s = iv->text();
break;
}
}
if (s.empty())
s = "?";
size_t l = s.length();
if (not sac_parser::is_unquoted_string(s))
l += 2;
if (l < w)
l = w;
if (offset + l > 132 and offset > 0)
{
os << std::endl;
offset = 0;
}
offset = detail::write_value(os, s, offset, w);
if (offset > 132)
{
os << std::endl;
offset = 0;
}
}
if (offset > 0)
os << std::endl;
}
}
else
{
// first find the indent level
size_t l = 0;
for (auto &col : m_columns)
{
std::string tag = '_' + m_name + '.' + col.m_name;
if (l < tag.length())
l = tag.length();
}
l += 3;
for (size_t cix : order)
{
auto &col = m_columns[cix];
os << '_' << m_name << '.' << col.m_name << std::string(l - col.m_name.length() - m_name.length() - 2, ' ');
std::string_view s;
for (auto iv = m_head->m_head; iv != nullptr; iv = iv->m_next)
{
if (iv->m_column_ix == cix)
{
s = iv->text();
break;
}
}
if (s.empty())
s = "?";
size_t offset = l;
if (s.length() + l >= kMaxLineLength)
{
os << std::endl;
offset = 0;
}
if (detail::write_value(os, s, offset, 1) != 0)
os << std::endl;
}
}
os << "# " << std::endl;
}
} // namespace cif::v2 } // namespace cif::v2
\ No newline at end of file
...@@ -24,8 +24,8 @@ ...@@ -24,8 +24,8 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <cif++/v2/category.hpp> #include <cif++/cif/category.hpp>
#include <cif++/v2/condition.hpp> #include <cif++/cif/condition.hpp>
namespace cif::v2 namespace cif::v2
{ {
......
...@@ -24,53 +24,26 @@ ...@@ -24,53 +24,26 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#pragma once #include <cif++/cif/datablock.hpp>
#include <cif++/v2/forward_decl.hpp>
#include <cif++/v2/category.hpp>
namespace cif::v2 namespace cif::v2
{ {
// -------------------------------------------------------------------- void datablock::set_validator(const validator *v)
class datablock : public std::list<category>
{ {
public:
datablock() = default;
datablock(std::string_view name)
: m_name(name)
{
}
datablock(const datablock &) = default;
datablock(datablock &&) = default;
datablock &operator=(const datablock &) = default;
datablock &operator=(datablock &&) = default;
// --------------------------------------------------------------------
const std::string &name() const { return m_name; }
void set_validator(const validator *v)
{
m_validator = v; m_validator = v;
for (auto &cat : *this) for (auto &cat : *this)
cat.set_validator(v, *this); cat.set_validator(v, *this);
} }
const validator *get_validator() const const validator *datablock::get_validator() const
{ {
return m_validator; return m_validator;
} }
bool is_valid() const bool datablock::is_valid() const
{ {
if (m_validator == nullptr) if (m_validator == nullptr)
throw std::runtime_error("Validator not specified"); throw std::runtime_error("Validator not specified");
...@@ -79,12 +52,12 @@ class datablock : public std::list<category> ...@@ -79,12 +52,12 @@ class datablock : public std::list<category>
result = cat.is_valid() and result; result = cat.is_valid() and result;
return result; return result;
} }
// -------------------------------------------------------------------- // --------------------------------------------------------------------
category &operator[](std::string_view name) category &datablock::operator[](std::string_view name)
{ {
auto i = std::find_if(begin(), end(), [name](const category &c) auto i = std::find_if(begin(), end(), [name](const category &c)
{ return iequals(c.name(), name); }); { return iequals(c.name(), name); });
...@@ -93,30 +66,30 @@ class datablock : public std::list<category> ...@@ -93,30 +66,30 @@ class datablock : public std::list<category>
emplace_back(name); emplace_back(name);
return back(); return back();
} }
const category &operator[](std::string_view name) const const category &datablock::operator[](std::string_view name) const
{ {
static const category s_empty; static const category s_empty;
auto i = std::find_if(begin(), end(), [name](const category &c) auto i = std::find_if(begin(), end(), [name](const category &c)
{ return iequals(c.name(), name); }); { return iequals(c.name(), name); });
return i == end() ? s_empty : *i; return i == end() ? s_empty : *i;
} }
category *get(std::string_view name) category *datablock::get(std::string_view name)
{ {
auto i = std::find_if(begin(), end(), [name](const category &c) auto i = std::find_if(begin(), end(), [name](const category &c)
{ return iequals(c.name(), name); }); { return iequals(c.name(), name); });
return i == end() ? nullptr : &*i; return i == end() ? nullptr : &*i;
} }
const category *get(std::string_view name) const const category *datablock::get(std::string_view name) const
{ {
return const_cast<datablock *>(this)->get(name); return const_cast<datablock *>(this)->get(name);
} }
std::tuple<iterator, bool> emplace(std::string_view name) std::tuple<datablock::iterator, bool> datablock::emplace(std::string_view name)
{ {
bool is_new = true; bool is_new = true;
auto i = begin(); auto i = begin();
...@@ -145,53 +118,54 @@ class datablock : public std::list<category> ...@@ -145,53 +118,54 @@ class datablock : public std::list<category>
} }
return std::make_tuple(begin(), is_new); return std::make_tuple(begin(), is_new);
}
std::vector<std::string> datablock::get_tag_order() const
{
std::vector<std::string> result;
for (auto &cat : *this)
{
auto cto = cat.get_tag_order();
result.insert(result.end(), cto.begin(), cto.end());
}
return result;
}
void datablock::write(std::ostream &os) const
{
os << "data_" << m_name << std::endl
<< "# " << std::endl;
// mmcif support, sort of. First write the 'entry' Category
// and if it exists, _AND_ we have a Validator, write out the
// audit_conform record.
for (auto &cat : *this)
{
if (cat.name() != "entry")
continue;
cat.write(os);
if (m_validator != nullptr)
{
category auditConform("audit_conform");
auditConform.emplace({
{"dict_name", m_validator->name()},
{"dict_version", m_validator->version()}});
auditConform.write(os);
}
break;
}
for (auto &cat : *this)
{
if (cat.name() != "entry" and cat.name() != "audit_conform")
cat.write(os);
} }
}
// void write(std::ostream &os) const } // namespace cif::cif
// { \ No newline at end of file
// // std::shared_lock lock(mLock);
// os << "data_" << m_name << std::endl
// << "# " << std::endl;
// // mmcif support, sort of. First write the 'entry' Category
// // and if it exists, _AND_ we have a Validator, write out the
// // audit_conform record.
// for (auto &cat : m_categories)
// {
// if (cat.name() != "entry")
// continue;
// cat.write(os);
// // if (mValidator != nullptr)
// // {
// // Category auditConform(*this, "audit_conform", nullptr);
// // auditConform.emplace({{"dict_name", mValidator->dictName()},
// // {"dict_version", mValidator->dictVersion()}});
// // auditConform.write(os);
// // }
// break;
// }
// for (auto &cat : m_categories)
// {
// if (cat.name() != "entry" and cat.name() != "audit_conform")
// cat.write(os);
// }
// }
// friend std::ostream &operator<<(std::ostream &os, const datablock &db)
// {
// db.write(os);
// return os;
// }
private:
std::string m_name;
const validator *m_validator = nullptr;
};
} // namespace cif::v2
\ No newline at end of file
...@@ -24,10 +24,10 @@ ...@@ -24,10 +24,10 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <cif++/v2/condition.hpp> #include <cif++/cif/condition.hpp>
#include <cif++/v2/dictionary_parser.hpp> #include <cif++/cif/dictionary_parser.hpp>
#include <cif++/v2/file.hpp> #include <cif++/cif/file.hpp>
#include <cif++/v2/parser.hpp> #include <cif++/cif/parser.hpp>
namespace cif::v2 namespace cif::v2
{ {
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <cif++/v2/row.hpp> #include <cif++/cif/row.hpp>
namespace cif::v2 namespace cif::v2
{ {
......
...@@ -30,11 +30,11 @@ ...@@ -30,11 +30,11 @@
#include <regex> #include <regex>
#include <stack> #include <stack>
#include <cif++/CifUtils.hpp> #include <cif++/utilities.hpp>
#include <cif++/v2/forward_decl.hpp> #include <cif++/cif/forward_decl.hpp>
#include <cif++/v2/parser.hpp> #include <cif++/cif/parser.hpp>
#include <cif++/v2/file.hpp> #include <cif++/cif/file.hpp>
namespace cif namespace cif
{ {
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <cif++/v2/category.hpp> #include <cif++/cif/category.hpp>
namespace cif::v2 namespace cif::v2
{ {
......
...@@ -30,10 +30,10 @@ ...@@ -30,10 +30,10 @@
#include <gzstream/gzstream.hpp> #include <gzstream/gzstream.hpp>
#include <cif++/v2/dictionary_parser.hpp> #include <cif++/cif/dictionary_parser.hpp>
#include <cif++/v2/validate.hpp> #include <cif++/cif/validate.hpp>
#include <cif++/CifUtils.hpp> #include <cif++/utilities.hpp>
namespace cif namespace cif
{ {
......
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <set>
#include <cif++/v2/parser.hpp>
// extern int VERBOSE;
namespace cif::v2
{
const uint32_t kMaxLineLength = 132;
const uint8_t kCharTraitsTable[128] = {
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
14, 15, 14, 14, 14, 15, 15, 14, 15, 15, 15, 15, 15, 15, 15, 15, // 2
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 10, 15, 15, 15, 15, // 3
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 4
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 15, 14, 15, 14, // 5
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 6
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, // 7
};
// --------------------------------------------------------------------
parse_error::parse_error(uint32_t lineNr, const std::string &message)
: std::runtime_error("parse error at line " + std::to_string(lineNr) + ": " + message)
{
}
// --------------------------------------------------------------------
const char *SacParser::kTokenName[] = {
"unknown",
"EOF",
"DATA",
"LOOP",
"GLOBAL",
"SAVE",
"STOP",
"Tag",
"Value"};
const char *SacParser::kValueName[] = {
"Int",
"Float",
"Numeric",
"String",
"TextField",
"Inapplicable",
"Unknown"};
// --------------------------------------------------------------------
bool isUnquotedString(const char *s)
{
auto ss = s;
bool result = isOrdinary(*s++);
while (result and *s != 0)
{
result = isNonBlank(*s);
++s;
}
// but be careful it does not contain e.g. stop_
if (result)
{
static const std::regex reservedRx(R"((^(?:data|save)|.*(?:loop|stop|global))_.+)", std::regex_constants::icase);
result = not std::regex_match(ss, reservedRx);
}
return result;
}
// --------------------------------------------------------------------
SacParser::SacParser(std::istream &is, bool init)
: mData(is)
{
mValidate = true;
mLineNr = 1;
mBol = true;
if (init)
mLookahead = getNextToken();
}
void SacParser::error(const std::string &msg)
{
throw parse_error(mLineNr, msg);
}
// getNextChar takes a char from the buffer, or if it is empty
// from the istream. This function also does carriage/linefeed
// translation.
int SacParser::getNextChar()
{
int result;
if (mBuffer.empty())
result = mData.get();
else
{
result = mBuffer.top();
mBuffer.pop();
}
// very simple CR/LF translation into LF
if (result == '\r')
{
int lookahead = mData.get();
if (lookahead != '\n')
mBuffer.push(lookahead);
result = '\n';
}
mTokenValue += static_cast<char>(result);
if (result == '\n')
++mLineNr;
if (VERBOSE >= 6)
{
std::cerr << "getNextChar => ";
if (iscntrl(result) or not isprint(result))
std::cerr << int(result) << std::endl;
else
std::cerr << char(result) << std::endl;
}
return result;
}
void SacParser::retract()
{
assert(not mTokenValue.empty());
char ch = mTokenValue.back();
if (ch == '\n')
--mLineNr;
mBuffer.push(ch);
mTokenValue.pop_back();
}
int SacParser::restart(int start)
{
int result = 0;
while (not mTokenValue.empty())
retract();
switch (start)
{
case eStateStart:
result = eStateFloat;
break;
case eStateFloat:
result = eStateInt;
break;
case eStateInt:
result = eStateValue;
break;
default:
error("Invalid state in SacParser");
}
mBol = false;
return result;
}
void SacParser::match(SacParser::CIFToken t)
{
if (mLookahead != t)
error(std::string("Unexpected token, expected ") + kTokenName[t] + " but found " + kTokenName[mLookahead]);
mLookahead = getNextToken();
}
SacParser::CIFToken SacParser::getNextToken()
{
const auto kEOF = std::char_traits<char>::eof();
CIFToken result = eCIFTokenUnknown;
int quoteChar = 0;
int state = eStateStart, start = eStateStart;
mBol = false;
mTokenValue.clear();
mTokenType = eCIFValueUnknown;
while (result == eCIFTokenUnknown)
{
auto ch = getNextChar();
switch (state)
{
case eStateStart:
if (ch == kEOF)
result = eCIFTokenEOF;
else if (ch == '\n')
{
mBol = true;
state = eStateWhite;
}
else if (ch == ' ' or ch == '\t')
state = eStateWhite;
else if (ch == '#')
state = eStateComment;
else if (ch == '_')
state = eStateTag;
else if (ch == ';' and mBol)
state = eStateTextField;
else if (ch == '\'' or ch == '"')
{
quoteChar = ch;
state = eStateQuotedString;
}
else
state = start = restart(start);
break;
case eStateWhite:
if (ch == kEOF)
result = eCIFTokenEOF;
else if (not isspace(ch))
{
state = eStateStart;
retract();
mTokenValue.clear();
}
else
mBol = (ch == '\n');
break;
case eStateComment:
if (ch == '\n')
{
state = eStateStart;
mBol = true;
mTokenValue.clear();
}
else if (ch == kEOF)
result = eCIFTokenEOF;
else if (not isAnyPrint(ch))
error("invalid character in comment");
break;
case eStateTextField:
if (ch == '\n')
state = eStateTextField + 1;
else if (ch == kEOF)
error("unterminated textfield");
else if (not isAnyPrint(ch))
// error("invalid character in text field '" + string({ static_cast<char>(ch) }) + "' (" + to_string((int)ch) + ")");
std::cerr << "invalid character in text field '" << std::string({static_cast<char>(ch)}) << "' (" << ch << ") line: " << mLineNr << std::endl;
break;
case eStateTextField + 1:
if (isTextLead(ch) or ch == ' ' or ch == '\t')
state = eStateTextField;
else if (ch == ';')
{
assert(mTokenValue.length() >= 2);
mTokenValue = mTokenValue.substr(1, mTokenValue.length() - 3);
mTokenType = eCIFValueTextField;
result = eCIFTokenValue;
}
else if (ch == kEOF)
error("unterminated textfield");
else if (ch != '\n')
error("invalid character in text field");
break;
case eStateQuotedString:
if (ch == kEOF)
error("unterminated quoted string");
else if (ch == quoteChar)
state = eStateQuotedStringQuote;
else if (not isAnyPrint(ch))
std::cerr << "invalid character in quoted string '" << std::string({static_cast<char>(ch)}) << "' (" << ch << ") line: " << mLineNr << std::endl;
// error("invalid character in quoted string");
break;
case eStateQuotedStringQuote:
if (isWhite(ch))
{
retract();
result = eCIFTokenValue;
mTokenType = eCIFValueString;
if (mTokenValue.length() < 2)
error("Invalid quoted string token");
mTokenValue = mTokenValue.substr(1, mTokenValue.length() - 2);
}
else if (ch == quoteChar)
;
else if (isAnyPrint(ch))
state = eStateQuotedString;
else if (ch == kEOF)
error("unterminated quoted string");
else
error("invalid character in quoted string");
break;
case eStateTag:
if (not isNonBlank(ch))
{
retract();
result = eCIFTokenTag;
}
break;
case eStateFloat:
if (ch == '+' or ch == '-')
{
state = eStateFloat + 1;
}
else if (isdigit(ch))
state = eStateFloat + 1;
else
state = start = restart(start);
break;
case eStateFloat + 1:
// if (ch == '(') // numeric???
// mState = eStateNumericSuffix;
// else
if (ch == '.')
state = eStateFloat + 2;
else if (tolower(ch) == 'e')
state = eStateFloat + 3;
else if (isWhite(ch) or ch == kEOF)
{
retract();
result = eCIFTokenValue;
mTokenType = eCIFValueInt;
}
else
state = start = restart(start);
break;
// parsed '.'
case eStateFloat + 2:
if (tolower(ch) == 'e')
state = eStateFloat + 3;
else if (isWhite(ch) or ch == kEOF)
{
retract();
result = eCIFTokenValue;
mTokenType = eCIFValueFloat;
}
else
state = start = restart(start);
break;
// parsed 'e'
case eStateFloat + 3:
if (ch == '-' or ch == '+')
state = eStateFloat + 4;
else if (isdigit(ch))
state = eStateFloat + 5;
else
state = start = restart(start);
break;
case eStateFloat + 4:
if (isdigit(ch))
state = eStateFloat + 5;
else
state = start = restart(start);
break;
case eStateFloat + 5:
if (isWhite(ch) or ch == kEOF)
{
retract();
result = eCIFTokenValue;
mTokenType = eCIFValueFloat;
}
else
state = start = restart(start);
break;
case eStateInt:
if (isdigit(ch) or ch == '+' or ch == '-')
state = eStateInt + 1;
else
state = start = restart(start);
break;
case eStateInt + 1:
if (isWhite(ch) or ch == kEOF)
{
retract();
result = eCIFTokenValue;
mTokenType = eCIFValueInt;
}
else
state = start = restart(start);
break;
case eStateValue:
if (ch == '_')
{
std::string s = toLowerCopy(mTokenValue);
if (s == "global_")
result = eCIFTokenGLOBAL;
else if (s == "stop_")
result = eCIFTokenSTOP;
else if (s == "loop_")
result = eCIFTokenLOOP;
else if (s == "data_")
{
state = eStateDATA;
continue;
}
else if (s == "save_")
{
state = eStateSAVE;
continue;
}
}
if (result == eCIFTokenUnknown and not isNonBlank(ch))
{
retract();
result = eCIFTokenValue;
if (mTokenValue == ".")
mTokenType = eCIFValueInapplicable;
else if (mTokenValue == "?")
{
mTokenType = eCIFValueUnknown;
mTokenValue.clear();
}
}
break;
case eStateDATA:
case eStateSAVE:
if (not isNonBlank(ch))
{
retract();
if (state == eStateDATA)
result = eCIFTokenDATA;
else
result = eCIFTokenSAVE;
mTokenValue.erase(mTokenValue.begin(), mTokenValue.begin() + 5);
}
break;
default:
assert(false);
error("Invalid state in getNextToken");
break;
}
}
if (VERBOSE >= 5)
{
std::cerr << kTokenName[result];
if (mTokenType != eCIFValueUnknown)
std::cerr << ' ' << kValueName[mTokenType];
if (result != eCIFTokenEOF)
std::cerr << " '" << mTokenValue << '\'';
std::cerr << std::endl;
}
return result;
}
DatablockIndex SacParser::indexDatablocks()
{
DatablockIndex index;
// first locate the start, as fast as we can
auto &sb = *mData.rdbuf();
enum
{
start,
comment,
string,
string_quote,
qstring,
data,
data_name
} state = start;
int quote = 0;
bool bol = true;
const char dblk[] = "data_";
std::string::size_type si = 0;
std::string datablock;
for (auto ch = sb.sbumpc(); ch != std::streambuf::traits_type::eof(); ch = sb.sbumpc())
{
switch (state)
{
case start:
switch (ch)
{
case '#': state = comment; break;
case 'd':
case 'D':
state = data;
si = 1;
break;
case '\'':
case '"':
state = string;
quote = ch;
break;
case ';':
if (bol)
state = qstring;
break;
}
break;
case comment:
if (ch == '\n')
state = start;
break;
case string:
if (ch == quote)
state = string_quote;
break;
case string_quote:
if (std::isspace(ch))
state = start;
else
state = string;
break;
case qstring:
if (ch == ';' and bol)
state = start;
break;
case data:
if (dblk[si] == 0 and isNonBlank(ch))
{
datablock = {static_cast<char>(ch)};
state = data_name;
}
else if (dblk[si++] != ch)
state = start;
break;
case data_name:
if (isNonBlank(ch))
datablock.insert(datablock.end(), char(ch));
else if (isspace(ch))
{
if (not datablock.empty())
index[datablock] = mData.tellg();
state = start;
}
else
state = start;
break;
}
bol = (ch == '\n');
}
return index;
}
bool SacParser::parseSingleDatablock(const std::string &datablock)
{
// first locate the start, as fast as we can
auto &sb = *mData.rdbuf();
enum
{
start,
comment,
string,
string_quote,
qstring,
data
} state = start;
int quote = 0;
bool bol = true;
std::string dblk = "data_" + datablock;
std::string::size_type si = 0;
bool found = false;
for (auto ch = sb.sbumpc(); not found and ch != std::streambuf::traits_type::eof(); ch = sb.sbumpc())
{
switch (state)
{
case start:
switch (ch)
{
case '#': state = comment; break;
case 'd':
case 'D':
state = data;
si = 1;
break;
case '\'':
case '"':
state = string;
quote = ch;
break;
case ';':
if (bol)
state = qstring;
break;
}
break;
case comment:
if (ch == '\n')
state = start;
break;
case string:
if (ch == quote)
state = string_quote;
break;
case string_quote:
if (std::isspace(ch))
state = start;
else
state = string;
break;
case qstring:
if (ch == ';' and bol)
state = start;
break;
case data:
if (isspace(ch) and dblk[si] == 0)
found = true;
else if (dblk[si++] != ch)
state = start;
break;
}
bol = (ch == '\n');
}
if (found)
{
produceDatablock(datablock);
mLookahead = getNextToken();
parseDataBlock();
}
return found;
}
bool SacParser::parseSingleDatablock(const std::string &datablock, const DatablockIndex &index)
{
bool result = false;
auto i = index.find(datablock);
if (i != index.end())
{
mData.seekg(i->second);
produceDatablock(datablock);
mLookahead = getNextToken();
parseDataBlock();
result = true;
}
return result;
}
void SacParser::parseFile()
{
while (mLookahead != eCIFTokenEOF)
{
switch (mLookahead)
{
case eCIFTokenGLOBAL:
parseGlobal();
break;
case eCIFTokenDATA:
produceDatablock(mTokenValue);
match(eCIFTokenDATA);
parseDataBlock();
break;
default:
error("This file does not seem to be an mmCIF file");
break;
}
}
}
void SacParser::parseGlobal()
{
match(eCIFTokenGLOBAL);
while (mLookahead == eCIFTokenTag)
{
match(eCIFTokenTag);
match(eCIFTokenValue);
}
}
void SacParser::parseDataBlock()
{
std::string cat;
while (mLookahead == eCIFTokenLOOP or mLookahead == eCIFTokenTag or mLookahead == eCIFTokenSAVE)
{
switch (mLookahead)
{
case eCIFTokenLOOP:
{
cat.clear(); // should start a new category
match(eCIFTokenLOOP);
std::vector<std::string> tags;
while (mLookahead == eCIFTokenTag)
{
std::string catName, itemName;
std::tie(catName, itemName) = splitTagName(mTokenValue);
if (cat.empty())
{
produceCategory(catName);
cat = catName;
}
else if (not iequals(cat, catName))
error("inconsistent categories in loop_");
tags.push_back(itemName);
match(eCIFTokenTag);
}
while (mLookahead == eCIFTokenValue)
{
produceRow();
for (auto tag : tags)
{
produceItem(cat, tag, mTokenValue);
match(eCIFTokenValue);
}
}
cat.clear();
break;
}
case eCIFTokenTag:
{
std::string catName, itemName;
std::tie(catName, itemName) = splitTagName(mTokenValue);
if (not iequals(cat, catName))
{
produceCategory(catName);
cat = catName;
produceRow();
}
match(eCIFTokenTag);
produceItem(cat, itemName, mTokenValue);
match(eCIFTokenValue);
break;
}
case eCIFTokenSAVE:
parseSaveFrame();
break;
default:
assert(false);
break;
}
}
}
void SacParser::parseSaveFrame()
{
error("A regular CIF file should not contain a save frame");
}
// --------------------------------------------------------------------
Parser::Parser(std::istream &is, File &f, bool init)
: SacParser(is, init)
, mFile(f)
, mDataBlock(nullptr)
{
}
void Parser::produceDatablock(const std::string &name)
{
mDataBlock = new Datablock(name);
mFile.append(mDataBlock);
}
void Parser::produceCategory(const std::string &name)
{
if (VERBOSE >= 4)
std::cerr << "producing category " << name << std::endl;
std::tie(mCat, std::ignore) = mDataBlock->emplace(name);
}
void Parser::produceRow()
{
if (VERBOSE >= 4)
std::cerr << "producing row for category " << mCat->name() << std::endl;
mCat->emplace({});
mRow = mCat->back();
mRow.lineNr(mLineNr);
}
void Parser::produceItem(const std::string &category, const std::string &item, const std::string &value)
{
if (VERBOSE >= 4)
std::cerr << "producing _" << category << '.' << item << " -> " << value << std::endl;
if (not iequals(category, mCat->name()))
error("inconsistent categories in loop_");
mRow[item] = mTokenValue;
}
// --------------------------------------------------------------------
struct DictParserDataImpl
{
// temporary values for constructing dictionaries
std::vector<ValidateCategory> mCategoryValidators;
std::map<std::string, std::vector<ValidateItem>> mItemValidators;
std::set<std::tuple<std::string, std::string>> mLinkedItems;
};
DictParser::DictParser(Validator &validator, std::istream &is)
: Parser(is, mFile)
, mValidator(validator)
, mImpl(new DictParserDataImpl)
{
}
DictParser::~DictParser()
{
delete mImpl;
}
void DictParser::parseSaveFrame()
{
if (not mCollectedItemTypes)
mCollectedItemTypes = collectItemTypes();
std::string saveFrameName = mTokenValue;
if (saveFrameName.empty())
error("Invalid save frame, should contain more than just 'save_' here");
bool isCategorySaveFrame = mTokenValue[0] != '_';
Datablock dict(mTokenValue);
Datablock::iterator cat = dict.end();
match(eCIFTokenSAVE);
while (mLookahead == eCIFTokenLOOP or mLookahead == eCIFTokenTag)
{
if (mLookahead == eCIFTokenLOOP)
{
cat = dict.end(); // should start a new category
match(eCIFTokenLOOP);
std::vector<std::string> tags;
while (mLookahead == eCIFTokenTag)
{
std::string catName, itemName;
std::tie(catName, itemName) = splitTagName(mTokenValue);
if (cat == dict.end())
std::tie(cat, std::ignore) = dict.emplace(catName);
else if (not iequals(cat->name(), catName))
error("inconsistent categories in loop_");
tags.push_back(itemName);
match(eCIFTokenTag);
}
while (mLookahead == eCIFTokenValue)
{
cat->emplace({});
auto row = cat->back();
for (auto tag : tags)
{
row[tag] = mTokenValue;
match(eCIFTokenValue);
}
}
cat = dict.end();
}
else
{
std::string catName, itemName;
std::tie(catName, itemName) = splitTagName(mTokenValue);
if (cat == dict.end() or not iequals(cat->name(), catName))
std::tie(cat, std::ignore) = dict.emplace(catName);
match(eCIFTokenTag);
if (cat->empty())
cat->emplace({});
cat->back()[itemName] = mTokenValue;
match(eCIFTokenValue);
}
}
match(eCIFTokenSAVE);
if (isCategorySaveFrame)
{
std::string category;
cif::tie(category) = dict["category"].front().get("id");
std::vector<std::string> keys;
for (auto k : dict["category_key"])
keys.push_back(std::get<1>(splitTagName(k["name"].as<std::string>())));
iset groups;
for (auto g : dict["category_group"])
groups.insert(g["id"].as<std::string>());
mImpl->mCategoryValidators.push_back(ValidateCategory{category, keys, groups});
}
else
{
// if the type code is missing, this must be a pointer, just skip it
std::string typeCode;
cif::tie(typeCode) = dict["item_type"].front().get("code");
const ValidateType *tv = nullptr;
if (not(typeCode.empty() or typeCode == "?"))
tv = mValidator.getValidatorForType(typeCode);
iset ess;
for (auto e : dict["item_enumeration"])
ess.insert(e["value"].as<std::string>());
std::string defaultValue;
cif::tie(defaultValue) = dict["item_default"].front().get("value");
bool defaultIsNull = false;
if (defaultValue.empty())
{
for (auto &r : dict["_item_default"])
{
defaultIsNull = r["value"].is_null();
break;
}
}
// collect the dict from our dataBlock and construct validators
for (auto i : dict["item"])
{
std::string tagName, category, mandatory;
cif::tie(tagName, category, mandatory) = i.get("name", "category_id", "mandatory_code");
std::string catName, itemName;
std::tie(catName, itemName) = splitTagName(tagName);
if (catName.empty() or itemName.empty())
error("Invalid tag name in _item.name " + tagName);
if (not iequals(category, catName) and not(category.empty() or category == "?"))
error("specified category id does match the implicit category name for tag '" + tagName + '\'');
else
category = catName;
auto &ivs = mImpl->mItemValidators[category];
auto vi = find(ivs.begin(), ivs.end(), ValidateItem{itemName});
if (vi == ivs.end())
ivs.push_back(ValidateItem{itemName, iequals(mandatory, "yes"), tv, ess, defaultValue, defaultIsNull});
else
{
// need to update the itemValidator?
if (vi->mMandatory != (iequals(mandatory, "yes")))
{
if (VERBOSE > 2)
{
std::cerr << "inconsistent mandatory value for " << tagName << " in dictionary" << std::endl;
if (iequals(tagName, saveFrameName))
std::cerr << "choosing " << mandatory << std::endl;
else
std::cerr << "choosing " << (vi->mMandatory ? "Y" : "N") << std::endl;
}
if (iequals(tagName, saveFrameName))
vi->mMandatory = (iequals(mandatory, "yes"));
}
if (vi->mType != nullptr and tv != nullptr and vi->mType != tv)
{
if (VERBOSE > 1)
std::cerr << "inconsistent type for " << tagName << " in dictionary" << std::endl;
}
// vi->mMandatory = (iequals(mandatory, "yes"));
if (vi->mType == nullptr)
vi->mType = tv;
vi->mEnums.insert(ess.begin(), ess.end());
// anything else yet?
// ...
}
}
// collect the dict from our dataBlock and construct validators
for (auto i : dict["item_linked"])
{
std::string childTagName, parentTagName;
cif::tie(childTagName, parentTagName) = i.get("child_name", "parent_name");
mImpl->mLinkedItems.emplace(childTagName, parentTagName);
}
}
}
void DictParser::linkItems()
{
if (not mDataBlock)
error("no datablock");
auto &dict = *mDataBlock;
// links are identified by a parent category, a child category and a group ID
using key_type = std::tuple<std::string, std::string, int>;
std::map<key_type, size_t> linkIndex;
// Each link group consists of a set of keys
std::vector<std::tuple<std::vector<std::string>, std::vector<std::string>>> linkKeys;
auto addLink = [&](size_t ix, const std::string &pk, const std::string &ck)
{
auto &&[pkeys, ckeys] = linkKeys.at(ix);
bool found = false;
for (size_t i = 0; i < pkeys.size(); ++i)
{
if (pkeys[i] == pk and ckeys[i] == ck)
{
found = true;
break;
}
}
if (not found)
{
pkeys.push_back(pk);
ckeys.push_back(ck);
}
};
auto &linkedGroupList = dict["pdbx_item_linked_group_list"];
for (auto gl : linkedGroupList)
{
std::string child, parent;
int link_group_id;
cif::tie(child, parent, link_group_id) = gl.get("child_name", "parent_name", "link_group_id");
auto civ = mValidator.getValidatorForItem(child);
if (civ == nullptr)
error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
auto piv = mValidator.getValidatorForItem(parent);
if (piv == nullptr)
error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
key_type key{piv->mCategory->mName, civ->mCategory->mName, link_group_id};
if (not linkIndex.count(key))
{
linkIndex[key] = linkKeys.size();
linkKeys.push_back({});
}
size_t ix = linkIndex.at(key);
addLink(ix, piv->mTag, civ->mTag);
}
// Only process inline linked items if the linked group list is absent
if (linkedGroupList.empty())
{
// for links recorded in categories but not in pdbx_item_linked_group_list
for (auto li : mImpl->mLinkedItems)
{
std::string child, parent;
std::tie(child, parent) = li;
auto civ = mValidator.getValidatorForItem(child);
if (civ == nullptr)
error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
auto piv = mValidator.getValidatorForItem(parent);
if (piv == nullptr)
error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
key_type key{piv->mCategory->mName, civ->mCategory->mName, 0};
if (not linkIndex.count(key))
{
linkIndex[key] = linkKeys.size();
linkKeys.push_back({});
}
size_t ix = linkIndex.at(key);
addLink(ix, piv->mTag, civ->mTag);
}
}
auto &linkedGroup = dict["pdbx_item_linked_group"];
// now store the links in the validator
for (auto &kv : linkIndex)
{
ValidateLink link = {};
std::tie(link.mParentCategory, link.mChildCategory, link.mLinkGroupID) = kv.first;
std::tie(link.mParentKeys, link.mChildKeys) = linkKeys[kv.second];
// look up the label
for (auto r : linkedGroup.find(cif::Key("category_id") == link.mChildCategory and cif::Key("link_group_id") == link.mLinkGroupID))
{
link.mLinkGroupLabel = r["label"].as<std::string>();
break;
}
mValidator.addLinkValidator(std::move(link));
}
// now make sure the itemType is specified for all itemValidators
for (auto &cv : mValidator.mCategoryValidators)
{
for (auto &iv : cv.mItemValidators)
{
if (iv.mType == nullptr and cif::VERBOSE >= 0)
std::cerr << "Missing item_type for " << iv.mTag << std::endl;
}
}
}
void DictParser::loadDictionary()
{
std::unique_ptr<Datablock> dict;
Datablock *savedDatablock = mDataBlock;
try
{
while (mLookahead != eCIFTokenEOF)
{
switch (mLookahead)
{
case eCIFTokenGLOBAL:
parseGlobal();
break;
default:
{
dict.reset(new Datablock(mTokenValue)); // dummy datablock, for constructing the validator only
mDataBlock = dict.get();
match(eCIFTokenDATA);
parseDataBlock();
break;
}
}
}
}
catch (const std::exception &)
{
if (cif::VERBOSE >= 0)
std::cerr << "Error parsing dictionary" << std::endl;
throw;
}
// store all validators
for (auto &ic : mImpl->mCategoryValidators)
mValidator.addCategoryValidator(std::move(ic));
mImpl->mCategoryValidators.clear();
for (auto &iv : mImpl->mItemValidators)
{
auto cv = mValidator.getValidatorForCategory(iv.first);
if (cv == nullptr)
error("Undefined category '" + iv.first);
for (auto &v : iv.second)
const_cast<ValidateCategory *>(cv)->addItemValidator(std::move(v));
}
// check all item validators for having a typeValidator
if (dict)
linkItems();
// store meta information
Datablock::iterator info;
bool n;
std::tie(info, n) = mDataBlock->emplace("dictionary");
if (n)
{
auto r = info->front();
mValidator.dictName(r["title"].as<std::string>());
mValidator.dictVersion(r["version"].as<std::string>());
}
mDataBlock = savedDatablock;
mImpl->mItemValidators.clear();
}
bool DictParser::collectItemTypes()
{
bool result = false;
if (not mDataBlock)
error("no datablock");
auto &dict = *mDataBlock;
for (auto &t : dict["item_type_list"])
{
std::string code, primitiveCode, construct;
cif::tie(code, primitiveCode, construct) = t.get("code", "primitive_code", "construct");
cif::replace_all(construct, "\\n", "\n");
cif::replace_all(construct, "\\t", "\t");
cif::replace_all(construct, "\\\n", "");
try
{
ValidateType v = {
code, mapToPrimitiveType(primitiveCode), boost::regex(construct, boost::regex::extended | boost::regex::optimize)};
mValidator.addTypeValidator(std::move(v));
}
catch (const std::exception &)
{
throw_with_nested(parse_error(t.lineNr(), "error in regular expression"));
}
// Do not replace an already defined type validator, this won't work with pdbx_v40
// as it has a name that is too strict for its own names :-)
// if (mFileImpl.mTypeValidators.count(v))
// mFileImpl.mTypeValidators.erase(v);
if (VERBOSE >= 5)
std::cerr << "Added type " << code << " (" << primitiveCode << ") => " << construct << std::endl;
result = true;
}
return result;
}
} // namespace cif
...@@ -34,7 +34,7 @@ ...@@ -34,7 +34,7 @@
#include <boost/numeric/ublas/matrix.hpp> #include <boost/numeric/ublas/matrix.hpp>
#include <cif++/AtomType.hpp> #include <cif++/AtomType.hpp>
#include <cif++/CifUtils.hpp> #include <cif++/utilities.hpp>
#include <cif++/Compound.hpp> #include <cif++/Compound.hpp>
#include <cif++/PDB2Cif.hpp> #include <cif++/PDB2Cif.hpp>
#include <cif++/PDB2CifRemark3.hpp> #include <cif++/PDB2CifRemark3.hpp>
......
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
#include <cif++/AtomType.hpp> #include <cif++/AtomType.hpp>
#include <cif++/Compound.hpp> #include <cif++/Compound.hpp>
#include <cif++/PDB2CifRemark3.hpp> #include <cif++/PDB2CifRemark3.hpp>
#include <cif++/CifUtils.hpp> #include <cif++/utilities.hpp>
using cif::Datablock; using cif::Datablock;
using cif::Category; using cif::Category;
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
#include <random> #include <random>
#include <valarray> #include <valarray>
#include <cif++/Point.hpp> #include <cif++/point.hpp>
namespace mmcif namespace mmcif
{ {
......
...@@ -26,8 +26,8 @@ ...@@ -26,8 +26,8 @@
#include <cmath> #include <cmath>
#include <cif++/AtomType.hpp> #include <cif++/cif.hpp>
#include <cif++/Cif++.hpp> #include <cif++/structure/AtomType.hpp>
namespace mmcif namespace mmcif
{ {
......
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
#include <cif++/BondMap.hpp> #include <cif++/BondMap.hpp>
#include <cif++/Cif++.hpp> #include <cif++/Cif++.hpp>
#include <cif++/CifUtils.hpp> #include <cif++/utilities.hpp>
#include <cif++/Compound.hpp> #include <cif++/Compound.hpp>
namespace mmcif namespace mmcif
......
...@@ -34,7 +34,7 @@ ...@@ -34,7 +34,7 @@
#include <cif++/Cif++.hpp> #include <cif++/Cif++.hpp>
#include <cif++/CifParser.hpp> #include <cif++/CifParser.hpp>
#include <cif++/CifUtils.hpp> #include <cif++/utilities.hpp>
#include <cif++/Compound.hpp> #include <cif++/Compound.hpp>
#include <cif++/Point.hpp> #include <cif++/Point.hpp>
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include <cif++/Structure.hpp> #include <cif++/structure/Structure.hpp>
#include <filesystem> #include <filesystem>
#include <fstream> #include <fstream>
...@@ -39,9 +39,9 @@ ...@@ -39,9 +39,9 @@
#include <boost/format.hpp> #include <boost/format.hpp>
#endif #endif
#include <cif++/Cif2PDB.hpp> #include <cif++/pdb/Cif2PDB.hpp>
#include <cif++/CifParser.hpp> #include <cif++/cif/parser.hpp>
#include <cif++/PDB2Cif.hpp> #include <cif++/pdb/PDB2Cif.hpp>
// #include <cif++/AtomShape.hpp> // #include <cif++/AtomShape.hpp>
namespace fs = std::filesystem; namespace fs = std::filesystem;
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
#include <mutex> #include <mutex>
#include <cif++/Symmetry.hpp> #include <cif++/Symmetry.hpp>
#include <cif++/CifUtils.hpp> #include <cif++/utilities.hpp>
#include "SymOpTable_data.hpp" #include "SymOpTable_data.hpp"
......
...@@ -27,7 +27,9 @@ ...@@ -27,7 +27,9 @@
#include <atomic> #include <atomic>
#include <cassert> #include <cassert>
#include <cmath> #include <cmath>
#include <cstring>
#include <fstream> #include <fstream>
#include <functional>
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <map> #include <map>
...@@ -43,7 +45,7 @@ ...@@ -43,7 +45,7 @@
#include <termios.h> #include <termios.h>
#endif #endif
#include <cif++/CifUtils.hpp> #include <cif++/utilities.hpp>
#include "revision.hpp" #include "revision.hpp"
...@@ -54,7 +56,7 @@ namespace fs = std::filesystem; ...@@ -54,7 +56,7 @@ namespace fs = std::filesystem;
namespace cif namespace cif
{ {
extern int VERBOSE; int VERBOSE = 0;
// -------------------------------------------------------------------- // --------------------------------------------------------------------
......
...@@ -29,8 +29,8 @@ ...@@ -29,8 +29,8 @@
#include <stdexcept> #include <stdexcept>
#include <cif++/Cif++.hpp> #include <cif++/cif.hpp>
#include <cif++/Structure.hpp> #include <cif++/structure/Structure.hpp>
// -------------------------------------------------------------------- // --------------------------------------------------------------------
......
...@@ -29,9 +29,8 @@ ...@@ -29,9 +29,8 @@
#include <stdexcept> #include <stdexcept>
#include <cif++/Cif++.hpp> #include <cif++/cif.hpp>
#include <cif++/Structure.hpp> #include <cif++/structure/Structure.hpp>
#include <cif++/CifValidator.hpp>
// -------------------------------------------------------------------- // --------------------------------------------------------------------
......
...@@ -31,12 +31,12 @@ ...@@ -31,12 +31,12 @@
// #include <cif++/DistanceMap.hpp> // #include <cif++/DistanceMap.hpp>
// #include <cif++/BondMap.hpp> // #include <cif++/BondMap.hpp>
#include <cif++/Cif++-v2.hpp> #include <cif++/cif.hpp>
// #include <cif++/CifValidator.hpp> // #include <cif++/CifValidator.hpp>
// #include <cif++/CifParser.hpp> // #include <cif++/CifParser.hpp>
#include <cif++/v2/parser.hpp> #include <cif++/cif/parser.hpp>
#include <cif++/v2/dictionary_parser.hpp> #include <cif++/cif/dictionary_parser.hpp>
namespace tt = boost::test_tools; namespace tt = boost::test_tools;
...@@ -2176,4 +2176,8 @@ BOOST_AUTO_TEST_CASE(replace_all_test) ...@@ -2176,4 +2176,8 @@ BOOST_AUTO_TEST_CASE(replace_all_test)
cif::replace_all(s, ",", ", "); cif::replace_all(s, ",", ", ");
BOOST_CHECK_EQUAL(s, "aap, noot, mies"); BOOST_CHECK_EQUAL(s, "aap, noot, mies");
cif::replace_all(s, ", ", ", ");
BOOST_CHECK_EQUAL(s, "aap, noot, mies");
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment