Commit 19210df6 by Maarten L. Hekkelman

Fix parsing mmCIF files with an unquoted string ??

parent 15c57307
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
cmake_minimum_required(VERSION 3.16) cmake_minimum_required(VERSION 3.16)
# set the project name # set the project name
project(cifpp VERSION 3.0.2 LANGUAGES CXX) project(cifpp VERSION 3.0.4 LANGUAGES CXX)
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
......
Version 3.0.4
- Fix in mmCIF parser, now correctly handles the unquoted
string ??
Version 3.0.3 Version 3.0.3
- Better configuration checks, for atomic e.g. - Better configuration checks, for atomic e.g.
- Fixed a problem introduced in refactoring mmcif::Atom - Fixed a problem introduced in refactoring mmcif::Atom
...@@ -17,6 +21,9 @@ Version 3.0.0 ...@@ -17,6 +21,9 @@ Version 3.0.0
- Upgraded mmcif::Structure - Upgraded mmcif::Structure
- various other small fixes - various other small fixes
Version 2.0.5
- Backporting updated CMakeLists.txt file
Version 2.0.4 Version 2.0.4
- Reverted a too strict test when reading cif files. - Reverted a too strict test when reading cif files.
......
...@@ -139,7 +139,7 @@ class SacParser ...@@ -139,7 +139,7 @@ class SacParser
int getNextChar(); int getNextChar();
void retract(); void retract();
void restart(); int restart(int start);
CIFToken getNextToken(); CIFToken getNextToken();
void match(CIFToken token); void match(CIFToken token);
...@@ -181,8 +181,9 @@ class SacParser ...@@ -181,8 +181,9 @@ class SacParser
eStateTextField, eStateTextField,
eStateFloat = 100, eStateFloat = 100,
eStateInt = 110, eStateInt = 110,
// eStateNumericSuffix = 200, eStateValue = 300,
eStateValue = 300 eStateDATA,
eStateSAVE
}; };
std::istream &mData; std::istream &mData;
...@@ -191,7 +192,6 @@ class SacParser ...@@ -191,7 +192,6 @@ class SacParser
bool mValidate; bool mValidate;
uint32_t mLineNr; uint32_t mLineNr;
bool mBol; bool mBol;
int mState, mStart;
CIFToken mLookahead; CIFToken mLookahead;
std::string mTokenValue; std::string mTokenValue;
CIFValueType mTokenType; CIFValueType mTokenType;
......
...@@ -42,7 +42,7 @@ namespace cif ...@@ -42,7 +42,7 @@ namespace cif
const uint32_t kMaxLineLength = 132; const uint32_t kMaxLineLength = 132;
const uint8_t kCharTraitsTable[128] = { const uint8_t kCharTraitsTable[128] = {
// 0 1 2 3 4 5 6 7 8 9 a b c d e f // 0 1 2 3 4 5 6 7 8 9 a b c d e f
14, 15, 14, 14, 14, 15, 15, 14, 15, 15, 15, 15, 15, 15, 15, 15, // 2 14, 15, 14, 14, 14, 15, 15, 14, 15, 15, 15, 15, 15, 15, 15, 15, // 2
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 10, 15, 15, 15, 15, // 3 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 10, 15, 15, 15, 15, // 3
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 4 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 4
...@@ -151,23 +151,26 @@ void SacParser::retract() ...@@ -151,23 +151,26 @@ void SacParser::retract()
mTokenValue.pop_back(); mTokenValue.pop_back();
} }
void SacParser::restart()
int SacParser::restart(int start)
{ {
int result = 0;
while (not mTokenValue.empty()) while (not mTokenValue.empty())
retract(); retract();
switch (mStart) switch (start)
{ {
case eStateStart: case eStateStart:
mState = mStart = eStateFloat; result = eStateFloat;
break; break;
case eStateFloat: case eStateFloat:
mState = mStart = eStateInt; result = eStateInt;
break; break;
case eStateInt: case eStateInt:
mState = mStart = eStateValue; result = eStateValue;
break; break;
default: default:
...@@ -175,6 +178,8 @@ void SacParser::restart() ...@@ -175,6 +178,8 @@ void SacParser::restart()
} }
mBol = false; mBol = false;
return result;
} }
void SacParser::match(SacParser::CIFToken t) void SacParser::match(SacParser::CIFToken t)
...@@ -191,7 +196,7 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -191,7 +196,7 @@ SacParser::CIFToken SacParser::getNextToken()
CIFToken result = eCIFTokenUnknown; CIFToken result = eCIFTokenUnknown;
int quoteChar = 0; int quoteChar = 0;
mState = mStart = eStateStart; int state = eStateStart, start = eStateStart;
mBol = false; mBol = false;
mTokenValue.clear(); mTokenValue.clear();
...@@ -201,7 +206,7 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -201,7 +206,7 @@ SacParser::CIFToken SacParser::getNextToken()
{ {
auto ch = getNextChar(); auto ch = getNextChar();
switch (mState) switch (state)
{ {
case eStateStart: case eStateStart:
if (ch == kEOF) if (ch == kEOF)
...@@ -209,27 +214,23 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -209,27 +214,23 @@ SacParser::CIFToken SacParser::getNextToken()
else if (ch == '\n') else if (ch == '\n')
{ {
mBol = true; mBol = true;
mState = eStateWhite; state = eStateWhite;
} }
else if (ch == ' ' or ch == '\t') else if (ch == ' ' or ch == '\t')
mState = eStateWhite; state = eStateWhite;
else if (ch == '#') else if (ch == '#')
mState = eStateComment; state = eStateComment;
else if (ch == '.')
mState = eStateDot;
else if (ch == '_') else if (ch == '_')
mState = eStateTag; state = eStateTag;
else if (ch == ';' and mBol) else if (ch == ';' and mBol)
mState = eStateTextField; state = eStateTextField;
else if (ch == '\'' or ch == '"') else if (ch == '\'' or ch == '"')
{ {
quoteChar = ch; quoteChar = ch;
mState = eStateQuotedString; state = eStateQuotedString;
} }
else if (ch == '?')
mState = eStateQuestionMark;
else else
restart(); state = start = restart(start);
break; break;
case eStateWhite: case eStateWhite:
...@@ -237,7 +238,7 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -237,7 +238,7 @@ SacParser::CIFToken SacParser::getNextToken()
result = eCIFTokenEOF; result = eCIFTokenEOF;
else if (not isspace(ch)) else if (not isspace(ch))
{ {
mState = eStateStart; state = eStateStart;
retract(); retract();
mTokenValue.clear(); mTokenValue.clear();
} }
...@@ -248,7 +249,7 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -248,7 +249,7 @@ SacParser::CIFToken SacParser::getNextToken()
case eStateComment: case eStateComment:
if (ch == '\n') if (ch == '\n')
{ {
mState = eStateStart; state = eStateStart;
mBol = true; mBol = true;
mTokenValue.clear(); mTokenValue.clear();
} }
...@@ -258,44 +259,19 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -258,44 +259,19 @@ SacParser::CIFToken SacParser::getNextToken()
error("invalid character in comment"); error("invalid character in comment");
break; break;
case eStateQuestionMark:
if (isNonBlank(ch))
mState = eStateValue;
else
{
retract();
result = eCIFTokenValue;
mTokenValue.clear();
mTokenType = eCIFValueUnknown;
}
break;
case eStateDot:
if (isdigit(ch))
mState = eStateFloat + 2;
else if (isspace(ch))
{
retract();
result = eCIFTokenValue;
mTokenType = eCIFValueInapplicable;
}
else
mState = eStateValue;
break;
case eStateTextField: case eStateTextField:
if (ch == '\n') if (ch == '\n')
mState = eStateTextField + 1; state = eStateTextField + 1;
else if (ch == kEOF) else if (ch == kEOF)
error("unterminated textfield"); error("unterminated textfield");
else if (not isAnyPrint(ch) and cif::VERBOSE >= 0) else if (not isAnyPrint(ch))
// error("invalid character in text field '" + string({ static_cast<char>(ch) }) + "' (" + to_string((int)ch) + ")"); // error("invalid character in text field '" + string({ static_cast<char>(ch) }) + "' (" + to_string((int)ch) + ")");
std::cerr << "invalid character in text field '" << std::string({static_cast<char>(ch)}) << "' (" << ch << ") line: " << mLineNr << std::endl; std::cerr << "invalid character in text field '" << std::string({static_cast<char>(ch)}) << "' (" << ch << ") line: " << mLineNr << std::endl;
break; break;
case eStateTextField + 1: case eStateTextField + 1:
if (isTextLead(ch) or ch == ' ' or ch == '\t') if (isTextLead(ch) or ch == ' ' or ch == '\t')
mState = eStateTextField; state = eStateTextField;
else if (ch == ';') else if (ch == ';')
{ {
assert(mTokenValue.length() >= 2); assert(mTokenValue.length() >= 2);
...@@ -313,7 +289,7 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -313,7 +289,7 @@ SacParser::CIFToken SacParser::getNextToken()
if (ch == kEOF) if (ch == kEOF)
error("unterminated quoted string"); error("unterminated quoted string");
else if (ch == quoteChar) else if (ch == quoteChar)
mState = eStateQuotedStringQuote; state = eStateQuotedStringQuote;
else if (not isAnyPrint(ch)) else if (not isAnyPrint(ch))
error("invalid character in quoted string"); error("invalid character in quoted string");
break; break;
...@@ -331,7 +307,7 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -331,7 +307,7 @@ SacParser::CIFToken SacParser::getNextToken()
else if (ch == quoteChar) else if (ch == quoteChar)
; ;
else if (isAnyPrint(ch)) else if (isAnyPrint(ch))
mState = eStateQuotedString; state = eStateQuotedString;
else if (ch == kEOF) else if (ch == kEOF)
error("unterminated quoted string"); error("unterminated quoted string");
else else
...@@ -349,12 +325,12 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -349,12 +325,12 @@ SacParser::CIFToken SacParser::getNextToken()
case eStateFloat: case eStateFloat:
if (ch == '+' or ch == '-') if (ch == '+' or ch == '-')
{ {
mState = eStateFloat + 1; state = eStateFloat + 1;
} }
else if (isdigit(ch)) else if (isdigit(ch))
mState = eStateFloat + 1; state = eStateFloat + 1;
else else
restart(); state = start = restart(start);
break; break;
case eStateFloat + 1: case eStateFloat + 1:
...@@ -362,9 +338,9 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -362,9 +338,9 @@ SacParser::CIFToken SacParser::getNextToken()
// mState = eStateNumericSuffix; // mState = eStateNumericSuffix;
// else // else
if (ch == '.') if (ch == '.')
mState = eStateFloat + 2; state = eStateFloat + 2;
else if (tolower(ch) == 'e') else if (tolower(ch) == 'e')
mState = eStateFloat + 3; state = eStateFloat + 3;
else if (isWhite(ch) or ch == kEOF) else if (isWhite(ch) or ch == kEOF)
{ {
retract(); retract();
...@@ -372,16 +348,13 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -372,16 +348,13 @@ SacParser::CIFToken SacParser::getNextToken()
mTokenType = eCIFValueInt; mTokenType = eCIFValueInt;
} }
else else
restart(); state = start = restart(start);
break; break;
// parsed '.' // parsed '.'
case eStateFloat + 2: case eStateFloat + 2:
// if (ch == '(') // numeric???
// mState = eStateNumericSuffix;
// else
if (tolower(ch) == 'e') if (tolower(ch) == 'e')
mState = eStateFloat + 3; state = eStateFloat + 3;
else if (isWhite(ch) or ch == kEOF) else if (isWhite(ch) or ch == kEOF)
{ {
retract(); retract();
...@@ -389,30 +362,27 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -389,30 +362,27 @@ SacParser::CIFToken SacParser::getNextToken()
mTokenType = eCIFValueFloat; mTokenType = eCIFValueFloat;
} }
else else
restart(); state = start = restart(start);
break; break;
// parsed 'e' // parsed 'e'
case eStateFloat + 3: case eStateFloat + 3:
if (ch == '-' or ch == '+') if (ch == '-' or ch == '+')
mState = eStateFloat + 4; state = eStateFloat + 4;
else if (isdigit(ch)) else if (isdigit(ch))
mState = eStateFloat + 5; state = eStateFloat + 5;
else else
restart(); state = start = restart(start);
break; break;
case eStateFloat + 4: case eStateFloat + 4:
if (isdigit(ch)) if (isdigit(ch))
mState = eStateFloat + 5; state = eStateFloat + 5;
else else
restart(); state = start = restart(start);
break; break;
case eStateFloat + 5: case eStateFloat + 5:
// if (ch == '(')
// mState = eStateNumericSuffix;
// else
if (isWhite(ch) or ch == kEOF) if (isWhite(ch) or ch == kEOF)
{ {
retract(); retract();
...@@ -420,14 +390,14 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -420,14 +390,14 @@ SacParser::CIFToken SacParser::getNextToken()
mTokenType = eCIFValueFloat; mTokenType = eCIFValueFloat;
} }
else else
restart(); state = start = restart(start);
break; break;
case eStateInt: case eStateInt:
if (isdigit(ch) or ch == '+' or ch == '-') if (isdigit(ch) or ch == '+' or ch == '-')
mState = eStateInt + 1; state = eStateInt + 1;
else else
restart(); state = start = restart(start);
break; break;
case eStateInt + 1: case eStateInt + 1:
...@@ -438,35 +408,11 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -438,35 +408,11 @@ SacParser::CIFToken SacParser::getNextToken()
mTokenType = eCIFValueInt; mTokenType = eCIFValueInt;
} }
else else
restart(); state = start = restart(start);
break; break;
// case eStateNumericSuffix:
// if (isdigit(ch))
// mState = eStateNumericSuffix + 1;
// else
// restart();
// break;
//
// case eStateNumericSuffix + 1:
// if (ch == ')')
// {
// result = eCIFTokenValue;
// mTokenType = eCIFValueNumeric;
// }
// else if (not isdigit(ch))
// restart();
// break;
case eStateValue: case eStateValue:
if (isNonBlank(ch)) if (ch == '_')
mState = eStateValue + 1;
else
error("invalid character at this position");
break;
case eStateValue + 1:
if (ch == '_') // first _, check for keywords
{ {
std::string s = toLowerCopy(mTokenValue); std::string s = toLowerCopy(mTokenValue);
...@@ -476,23 +422,40 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -476,23 +422,40 @@ SacParser::CIFToken SacParser::getNextToken()
result = eCIFTokenSTOP; result = eCIFTokenSTOP;
else if (s == "loop_") else if (s == "loop_")
result = eCIFTokenLOOP; result = eCIFTokenLOOP;
else if (s == "data_" or s == "save_") else if (s == "data_")
mState = eStateValue + 2; {
state = eStateDATA;
continue;
} }
else if (not isNonBlank(ch)) else if (s == "save_")
{
state = eStateSAVE;
continue;
}
}
if (result == eCIFTokenUnknown and not isNonBlank(ch))
{ {
retract(); retract();
result = eCIFTokenValue; result = eCIFTokenValue;
mTokenType = eCIFValueString;
if (mTokenValue == ".")
mTokenType = eCIFValueInapplicable;
else if (mTokenValue == "?")
{
mTokenType = eCIFValueUnknown;
mTokenValue.clear();
}
} }
break; break;
case eStateValue + 2: case eStateDATA:
case eStateSAVE:
if (not isNonBlank(ch)) if (not isNonBlank(ch))
{ {
retract(); retract();
if (tolower(mTokenValue[0]) == 'd') if (state == eStateDATA)
result = eCIFTokenDATA; result = eCIFTokenDATA;
else else
result = eCIFTokenSAVE; result = eCIFTokenSAVE;
...@@ -521,6 +484,7 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -521,6 +484,7 @@ SacParser::CIFToken SacParser::getNextToken()
return result; return result;
} }
DatablockIndex SacParser::indexDatablocks() DatablockIndex SacParser::indexDatablocks()
{ {
DatablockIndex index; DatablockIndex index;
......
...@@ -31,26 +31,25 @@ ...@@ -31,26 +31,25 @@
#include <stdexcept> #include <stdexcept>
// #include "cif++/DistanceMap.hpp" // #include "cif++/DistanceMap.hpp"
#include "cif++/Cif++.hpp"
#include "cif++/BondMap.hpp" #include "cif++/BondMap.hpp"
#include "cif++/Cif++.hpp"
#include "cif++/CifValidator.hpp" #include "cif++/CifValidator.hpp"
namespace tt = boost::test_tools; namespace tt = boost::test_tools;
std::filesystem::path gTestDir = std::filesystem::current_path(); // filled in first test std::filesystem::path gTestDir = std::filesystem::current_path(); // filled in first test
// -------------------------------------------------------------------- // --------------------------------------------------------------------
cif::File operator""_cf(const char* text, size_t length) cif::File operator""_cf(const char *text, size_t length)
{ {
struct membuf : public std::streambuf struct membuf : public std::streambuf
{ {
membuf(char* text, size_t length) membuf(char *text, size_t length)
{ {
this->setg(text, text, text + length); this->setg(text, text, text + length);
} }
} buffer(const_cast<char*>(text), length); } buffer(const_cast<char *>(text), length);
std::istream is(&buffer); std::istream is(&buffer);
return cif::File(is); return cif::File(is);
...@@ -91,11 +90,11 @@ _test.name ...@@ -91,11 +90,11 @@ _test.name
3 mies 3 mies
)"_cf; )"_cf;
auto& db = f.firstDatablock(); auto &db = f.firstDatablock();
BOOST_CHECK(db.getName() == "TEST"); BOOST_CHECK(db.getName() == "TEST");
auto& test = db["test"]; auto &test = db["test"];
BOOST_CHECK(test.size() == 3); BOOST_CHECK(test.size() == 3);
// wrong! the next lines will crash. And that's OK, don't do that // wrong! the next lines will crash. And that's OK, don't do that
...@@ -106,10 +105,10 @@ _test.name ...@@ -106,10 +105,10 @@ _test.name
// test.purge(); // test.purge();
auto n = test.erase(cif::Key("id") == 1, [](const cif::Row& r) { auto n = test.erase(cif::Key("id") == 1, [](const cif::Row &r)
{
BOOST_CHECK_EQUAL(r["id"].as<int>(), 1); BOOST_CHECK_EQUAL(r["id"].as<int>(), 1);
BOOST_CHECK_EQUAL(r["name"].as<std::string>(), "aap"); BOOST_CHECK_EQUAL(r["name"].as<std::string>(), "aap"); });
});
BOOST_CHECK_EQUAL(n, 1); BOOST_CHECK_EQUAL(n, 1);
} }
...@@ -131,15 +130,15 @@ _test.value ...@@ -131,15 +130,15 @@ _test.value
3 mies 1.2 3 mies 1.2
)"_cf; )"_cf;
auto& db = f.firstDatablock(); auto &db = f.firstDatablock();
BOOST_CHECK(db.getName() == "TEST"); BOOST_CHECK(db.getName() == "TEST");
auto& test = db["test"]; auto &test = db["test"];
BOOST_CHECK(test.size() == 3); BOOST_CHECK(test.size() == 3);
int n = 0; int n = 0;
for (auto r: test.find(cif::Key("name") == "aap")) for (auto r : test.find(cif::Key("name") == "aap"))
{ {
BOOST_CHECK(++n == 1); BOOST_CHECK(++n == 1);
BOOST_CHECK(r["id"].as<int>() == 1); BOOST_CHECK(r["id"].as<int>() == 1);
...@@ -255,11 +254,11 @@ save__cat_2.desc ...@@ -255,11 +254,11 @@ save__cat_2.desc
struct membuf : public std::streambuf struct membuf : public std::streambuf
{ {
membuf(char* text, size_t length) membuf(char *text, size_t length)
{ {
this->setg(text, text, text + length); this->setg(text, text, text + length);
} }
} buffer(const_cast<char*>(dict), sizeof(dict) - 1); } buffer(const_cast<char *>(dict), sizeof(dict) - 1);
std::istream is_dict(&buffer); std::istream is_dict(&buffer);
...@@ -290,17 +289,17 @@ _cat_2.desc ...@@ -290,17 +289,17 @@ _cat_2.desc
struct data_membuf : public std::streambuf struct data_membuf : public std::streambuf
{ {
data_membuf(char* text, size_t length) data_membuf(char *text, size_t length)
{ {
this->setg(text, text, text + length); this->setg(text, text, text + length);
} }
} data_buffer(const_cast<char*>(data), sizeof(data) - 1); } data_buffer(const_cast<char *>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer); std::istream is_data(&data_buffer);
f.load(is_data); f.load(is_data);
auto& cat1 = f.firstDatablock()["cat_1"]; auto &cat1 = f.firstDatablock()["cat_1"];
auto& cat2 = f.firstDatablock()["cat_2"]; auto &cat2 = f.firstDatablock()["cat_2"];
BOOST_CHECK(cat1.size() == 3); BOOST_CHECK(cat1.size() == 3);
BOOST_CHECK(cat2.size() == 3); BOOST_CHECK(cat2.size() == 3);
...@@ -316,11 +315,10 @@ _cat_2.desc ...@@ -316,11 +315,10 @@ _cat_2.desc
// { "desc", "moet fout gaan" } // { "desc", "moet fout gaan" }
// }), std::exception); // }), std::exception);
BOOST_CHECK_THROW(cat2.emplace({ BOOST_CHECK_THROW(cat2.emplace({{"id", "vijf"}, // <- invalid value
{ "id", "vijf" }, // <- invalid value {"parent_id", 2},
{ "parent_id", 2 }, {"desc", "moet fout gaan"}}),
{ "desc", "moet fout gaan" } std::exception);
}), std::exception);
} }
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -385,11 +383,11 @@ save__cat_1.c ...@@ -385,11 +383,11 @@ save__cat_1.c
struct membuf : public std::streambuf struct membuf : public std::streambuf
{ {
membuf(char* text, size_t length) membuf(char *text, size_t length)
{ {
this->setg(text, text, text + length); this->setg(text, text, text + length);
} }
} buffer(const_cast<char*>(dict), sizeof(dict) - 1); } buffer(const_cast<char *>(dict), sizeof(dict) - 1);
std::istream is_dict(&buffer); std::istream is_dict(&buffer);
...@@ -412,16 +410,16 @@ mies Mies ...@@ -412,16 +410,16 @@ mies Mies
struct data_membuf : public std::streambuf struct data_membuf : public std::streambuf
{ {
data_membuf(char* text, size_t length) data_membuf(char *text, size_t length)
{ {
this->setg(text, text, text + length); this->setg(text, text, text + length);
} }
} data_buffer(const_cast<char*>(data), sizeof(data) - 1); } data_buffer(const_cast<char *>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer); std::istream is_data(&data_buffer);
f.load(is_data); f.load(is_data);
auto& cat1 = f.firstDatablock()["cat_1"]; auto &cat1 = f.firstDatablock()["cat_1"];
BOOST_CHECK(cat1.size() == 3); BOOST_CHECK(cat1.size() == 3);
...@@ -432,8 +430,6 @@ mies Mies ...@@ -432,8 +430,6 @@ mies Mies
cat1.erase(cif::Key("id") == "noot"); cat1.erase(cif::Key("id") == "noot");
BOOST_CHECK(cat1.size() == 2); BOOST_CHECK(cat1.size() == 2);
} }
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -535,11 +531,11 @@ save__cat_2.desc ...@@ -535,11 +531,11 @@ save__cat_2.desc
struct membuf : public std::streambuf struct membuf : public std::streambuf
{ {
membuf(char* text, size_t length) membuf(char *text, size_t length)
{ {
this->setg(text, text, text + length); this->setg(text, text, text + length);
} }
} buffer(const_cast<char*>(dict), sizeof(dict) - 1); } buffer(const_cast<char *>(dict), sizeof(dict) - 1);
std::istream is_dict(&buffer); std::istream is_dict(&buffer);
...@@ -573,21 +569,21 @@ _cat_2.desc ...@@ -573,21 +569,21 @@ _cat_2.desc
struct data_membuf : public std::streambuf struct data_membuf : public std::streambuf
{ {
data_membuf(char* text, size_t length) data_membuf(char *text, size_t length)
{ {
this->setg(text, text, text + length); this->setg(text, text, text + length);
} }
} data_buffer(const_cast<char*>(data), sizeof(data) - 1); } data_buffer(const_cast<char *>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer); std::istream is_data(&data_buffer);
f.load(is_data); f.load(is_data);
auto& cat1 = f.firstDatablock()["cat_1"]; auto &cat1 = f.firstDatablock()["cat_1"];
auto& cat2 = f.firstDatablock()["cat_2"]; auto &cat2 = f.firstDatablock()["cat_2"];
// check a rename in parent and child // check a rename in parent and child
for (auto r: cat1.find(cif::Key("id") == 1)) for (auto r : cat1.find(cif::Key("id") == 1))
{ {
r["id"] = 10; r["id"] = 10;
break; break;
...@@ -604,7 +600,7 @@ _cat_2.desc ...@@ -604,7 +600,7 @@ _cat_2.desc
// check a rename in parent and child, this time only one child should be renamed // check a rename in parent and child, this time only one child should be renamed
for (auto r: cat1.find(cif::Key("id") == 2)) for (auto r : cat1.find(cif::Key("id") == 2))
{ {
r["id"] = 20; r["id"] = 20;
break; break;
...@@ -624,8 +620,6 @@ _cat_2.desc ...@@ -624,8 +620,6 @@ _cat_2.desc
BOOST_CHECK(cat2.find(cif::Key("parent_id") == 20 and cif::Key("name2") == "noot").size() == 1); BOOST_CHECK(cat2.find(cif::Key("parent_id") == 20 and cif::Key("name2") == "noot").size() == 1);
BOOST_CHECK(cat2.find(cif::Key("parent_id") == 20 and cif::Key("name2") == "n2").size() == 0); BOOST_CHECK(cat2.find(cif::Key("parent_id") == 20 and cif::Key("name2") == "n2").size() == 0);
// // -------------------------------------------------------------------- // // --------------------------------------------------------------------
// cat1.erase(cif::Key("id") == 10); // cat1.erase(cif::Key("id") == 10);
...@@ -637,9 +631,6 @@ _cat_2.desc ...@@ -637,9 +631,6 @@ _cat_2.desc
// BOOST_CHECK(cat1.size() == 1); // BOOST_CHECK(cat1.size() == 1);
// BOOST_CHECK(cat2.size() == 1); // BOOST_CHECK(cat2.size() == 1);
} }
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -743,11 +734,11 @@ save__cat_2.parent_id3 ...@@ -743,11 +734,11 @@ save__cat_2.parent_id3
struct membuf : public std::streambuf struct membuf : public std::streambuf
{ {
membuf(char* text, size_t length) membuf(char *text, size_t length)
{ {
this->setg(text, text, text + length); this->setg(text, text, text + length);
} }
} buffer(const_cast<char*>(dict), sizeof(dict) - 1); } buffer(const_cast<char *>(dict), sizeof(dict) - 1);
std::istream is_dict(&buffer); std::istream is_dict(&buffer);
...@@ -791,21 +782,21 @@ _cat_2.parent_id3 ...@@ -791,21 +782,21 @@ _cat_2.parent_id3
struct data_membuf : public std::streambuf struct data_membuf : public std::streambuf
{ {
data_membuf(char* text, size_t length) data_membuf(char *text, size_t length)
{ {
this->setg(text, text, text + length); this->setg(text, text, text + length);
} }
} data_buffer(const_cast<char*>(data), sizeof(data) - 1); } data_buffer(const_cast<char *>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer); std::istream is_data(&data_buffer);
f.load(is_data); f.load(is_data);
auto& cat1 = f.firstDatablock()["cat_1"]; auto &cat1 = f.firstDatablock()["cat_1"];
auto& cat2 = f.firstDatablock()["cat_2"]; auto &cat2 = f.firstDatablock()["cat_2"];
// check a rename in parent and child // check a rename in parent and child
for (auto r: cat1.find(cif::Key("id") == 1)) for (auto r : cat1.find(cif::Key("id") == 1))
{ {
r["id"] = 10; r["id"] = 10;
break; break;
...@@ -820,8 +811,7 @@ _cat_2.parent_id3 ...@@ -820,8 +811,7 @@ _cat_2.parent_id3
BOOST_CHECK(cat2.find(cif::Key("parent_id") == 1).size() == 1); BOOST_CHECK(cat2.find(cif::Key("parent_id") == 1).size() == 1);
BOOST_CHECK(cat2.find(cif::Key("parent_id") == 10).size() == 2); BOOST_CHECK(cat2.find(cif::Key("parent_id") == 10).size() == 2);
for (auto r : cat1.find(cif::Key("id") == 2))
for (auto r: cat1.find(cif::Key("id") == 2))
{ {
r["id"] = 20; r["id"] = 20;
break; break;
...@@ -836,8 +826,7 @@ _cat_2.parent_id3 ...@@ -836,8 +826,7 @@ _cat_2.parent_id3
BOOST_CHECK(cat2.find(cif::Key("parent_id") == 2).size() == 2); BOOST_CHECK(cat2.find(cif::Key("parent_id") == 2).size() == 2);
BOOST_CHECK(cat2.find(cif::Key("parent_id") == 20).size() == 2); BOOST_CHECK(cat2.find(cif::Key("parent_id") == 20).size() == 2);
for (auto r : cat1.find(cif::Key("id") == 3))
for (auto r: cat1.find(cif::Key("id") == 3))
{ {
r["id"] = 30; r["id"] = 30;
break; break;
...@@ -852,8 +841,7 @@ _cat_2.parent_id3 ...@@ -852,8 +841,7 @@ _cat_2.parent_id3
BOOST_CHECK(cat2.find(cif::Key("parent_id") == 3).size() == 2); BOOST_CHECK(cat2.find(cif::Key("parent_id") == 3).size() == 2);
BOOST_CHECK(cat2.find(cif::Key("parent_id") == 30).size() == 1); BOOST_CHECK(cat2.find(cif::Key("parent_id") == 30).size() == 1);
for (auto r : cat1.find(cif::Key("id") == 4))
for (auto r: cat1.find(cif::Key("id") == 4))
{ {
r["id"] = 40; r["id"] = 40;
break; break;
...@@ -967,11 +955,11 @@ cat_2 3 cat_2:cat_1:3 ...@@ -967,11 +955,11 @@ cat_2 3 cat_2:cat_1:3
struct membuf : public std::streambuf struct membuf : public std::streambuf
{ {
membuf(char* text, size_t length) membuf(char *text, size_t length)
{ {
this->setg(text, text, text + length); this->setg(text, text, text + length);
} }
} buffer(const_cast<char*>(dict), sizeof(dict) - 1); } buffer(const_cast<char *>(dict), sizeof(dict) - 1);
std::istream is_dict(&buffer); std::istream is_dict(&buffer);
...@@ -1008,17 +996,17 @@ _cat_2.parent_id3 ...@@ -1008,17 +996,17 @@ _cat_2.parent_id3
struct data_membuf : public std::streambuf struct data_membuf : public std::streambuf
{ {
data_membuf(char* text, size_t length) data_membuf(char *text, size_t length)
{ {
this->setg(text, text, text + length); this->setg(text, text, text + length);
} }
} data_buffer(const_cast<char*>(data), sizeof(data) - 1); } data_buffer(const_cast<char *>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer); std::istream is_data(&data_buffer);
f.load(is_data); f.load(is_data);
auto& cat1 = f.firstDatablock()["cat_1"]; auto &cat1 = f.firstDatablock()["cat_1"];
auto& cat2 = f.firstDatablock()["cat_2"]; auto &cat2 = f.firstDatablock()["cat_2"];
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// check iterate children // check iterate children
...@@ -1032,13 +1020,14 @@ _cat_2.parent_id3 ...@@ -1032,13 +1020,14 @@ _cat_2.parent_id3
BOOST_ASSERT(CR2set.size() == 3); BOOST_ASSERT(CR2set.size() == 3);
std::vector<int> CRids; std::vector<int> CRids;
std::transform(CR2set.begin(), CR2set.end(), std::back_inserter(CRids), [](cif::Row r) { return r["id"].as<int>(); }); std::transform(CR2set.begin(), CR2set.end(), std::back_inserter(CRids), [](cif::Row r)
{ return r["id"].as<int>(); });
std::sort(CRids.begin(), CRids.end()); std::sort(CRids.begin(), CRids.end());
BOOST_CHECK(CRids == std::vector<int>({ 4, 5, 6})); BOOST_CHECK(CRids == std::vector<int>({4, 5, 6}));
// check a rename in parent and child // check a rename in parent and child
for (auto r: cat1.find(cif::Key("id") == 1)) for (auto r : cat1.find(cif::Key("id") == 1))
{ {
r["id"] = 10; r["id"] = 10;
break; break;
...@@ -1057,7 +1046,7 @@ _cat_2.parent_id3 ...@@ -1057,7 +1046,7 @@ _cat_2.parent_id3
BOOST_CHECK(cat2.find(cif::Key("parent_id2") == 10).size() == 1); BOOST_CHECK(cat2.find(cif::Key("parent_id2") == 10).size() == 1);
BOOST_CHECK(cat2.find(cif::Key("parent_id3") == 10).size() == 1); BOOST_CHECK(cat2.find(cif::Key("parent_id3") == 10).size() == 1);
for (auto r: cat1.find(cif::Key("id") == 2)) for (auto r : cat1.find(cif::Key("id") == 2))
{ {
r["id"] = 20; r["id"] = 20;
break; break;
...@@ -1076,7 +1065,7 @@ _cat_2.parent_id3 ...@@ -1076,7 +1065,7 @@ _cat_2.parent_id3
BOOST_CHECK(cat2.find(cif::Key("parent_id2") == 20).size() == 2); BOOST_CHECK(cat2.find(cif::Key("parent_id2") == 20).size() == 2);
BOOST_CHECK(cat2.find(cif::Key("parent_id3") == 20).size() == 2); BOOST_CHECK(cat2.find(cif::Key("parent_id3") == 20).size() == 2);
for (auto r: cat1.find(cif::Key("id") == 3)) for (auto r : cat1.find(cif::Key("id") == 3))
{ {
r["id"] = 30; r["id"] = 30;
break; break;
...@@ -1128,34 +1117,34 @@ _test.name ...@@ -1128,34 +1117,34 @@ _test.name
5 ? 5 ?
)"_cf; )"_cf;
auto& db = f.firstDatablock(); auto &db = f.firstDatablock();
for (auto r: db["test"].find(cif::Key("id") == 1)) for (auto r : db["test"].find(cif::Key("id") == 1))
{ {
const auto& [id, name] = r.get<int, std::string>({"id", "name"}); const auto &[id, name] = r.get<int, std::string>({"id", "name"});
BOOST_CHECK(id == 1); BOOST_CHECK(id == 1);
BOOST_CHECK(name == "aap"); BOOST_CHECK(name == "aap");
} }
for (auto r: db["test"].find(cif::Key("id") == 4)) for (auto r : db["test"].find(cif::Key("id") == 4))
{ {
const auto& [id, name] = r.get<int, std::string>({"id", "name"}); const auto &[id, name] = r.get<int, std::string>({"id", "name"});
BOOST_CHECK(id == 4); BOOST_CHECK(id == 4);
BOOST_CHECK(name.empty()); BOOST_CHECK(name.empty());
} }
for (auto r: db["test"].find(cif::Key("id") == 5)) for (auto r : db["test"].find(cif::Key("id") == 5))
{ {
const auto& [id, name] = r.get<int, std::string>({"id", "name"}); const auto &[id, name] = r.get<int, std::string>({"id", "name"});
BOOST_CHECK(id == 5); BOOST_CHECK(id == 5);
BOOST_CHECK(name.empty()); BOOST_CHECK(name.empty());
} }
// optional // optional
for (auto r: db["test"]) for (auto r : db["test"])
{ {
const auto& [id, name] = r.get<int, std::optional<std::string>>({"id", "name"}); const auto &[id, name] = r.get<int, std::optional<std::string>>({"id", "name"});
switch (id) switch (id)
{ {
case 1: BOOST_CHECK(name == "aap"); break; case 1: BOOST_CHECK(name == "aap"); break;
...@@ -1185,11 +1174,11 @@ _test.name ...@@ -1185,11 +1174,11 @@ _test.name
5 ? 5 ?
)"_cf; )"_cf;
auto& db = f.firstDatablock(); auto &db = f.firstDatablock();
// query tests // query tests
for (const auto& [id, name]: db["test"].rows<int, std::optional<std::string>>("id", "name")) for (const auto &[id, name] : db["test"].rows<int, std::optional<std::string>>("id", "name"))
{ {
switch (id) switch (id)
{ {
...@@ -1204,7 +1193,6 @@ _test.name ...@@ -1204,7 +1193,6 @@ _test.name
} }
} }
BOOST_AUTO_TEST_CASE(c3) BOOST_AUTO_TEST_CASE(c3)
{ {
cif::VERBOSE = 1; cif::VERBOSE = 1;
...@@ -1221,10 +1209,10 @@ _test.name ...@@ -1221,10 +1209,10 @@ _test.name
5 ? 5 ?
)"_cf; )"_cf;
auto& db = f.firstDatablock(); auto &db = f.firstDatablock();
// query tests // query tests
for (const auto& [id, name]: db["test"].find<int, std::optional<std::string>>(cif::All(), "id", "name")) for (const auto &[id, name] : db["test"].find<int, std::optional<std::string>>(cif::All(), "id", "name"))
{ {
switch (id) switch (id)
{ {
...@@ -1238,7 +1226,7 @@ _test.name ...@@ -1238,7 +1226,7 @@ _test.name
} }
} }
const auto& [id, name] = db["test"].find1<int, std::string>(cif::Key("id") == 1, "id", "name"); const auto &[id, name] = db["test"].find1<int, std::string>(cif::Key("id") == 1, "id", "name");
BOOST_CHECK(id == 1); BOOST_CHECK(id == 1);
BOOST_CHECK(name == "aap"); BOOST_CHECK(name == "aap");
...@@ -1396,11 +1384,11 @@ cat_2 1 '_cat_2.num' '_cat_3.num' cat_3 ...@@ -1396,11 +1384,11 @@ cat_2 1 '_cat_2.num' '_cat_3.num' cat_3
struct membuf : public std::streambuf struct membuf : public std::streambuf
{ {
membuf(char* text, size_t length) membuf(char *text, size_t length)
{ {
this->setg(text, text, text + length); this->setg(text, text, text + length);
} }
} buffer(const_cast<char*>(dict), sizeof(dict) - 1); } buffer(const_cast<char *>(dict), sizeof(dict) - 1);
std::istream is_dict(&buffer); std::istream is_dict(&buffer);
cif::Validator validator("test", is_dict); cif::Validator validator("test", is_dict);
...@@ -1441,18 +1429,18 @@ _cat_3.num ...@@ -1441,18 +1429,18 @@ _cat_3.num
struct data_membuf : public std::streambuf struct data_membuf : public std::streambuf
{ {
data_membuf(char* text, size_t length) data_membuf(char *text, size_t length)
{ {
this->setg(text, text, text + length); this->setg(text, text, text + length);
} }
} data_buffer(const_cast<char*>(data), sizeof(data) - 1); } data_buffer(const_cast<char *>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer); std::istream is_data(&data_buffer);
f.load(is_data); f.load(is_data);
auto& cat1 = f.firstDatablock()["cat_1"]; auto &cat1 = f.firstDatablock()["cat_1"];
auto& cat2 = f.firstDatablock()["cat_2"]; auto &cat2 = f.firstDatablock()["cat_2"];
auto& cat3 = f.firstDatablock()["cat_3"]; auto &cat3 = f.firstDatablock()["cat_3"];
cat3.update_value("name"_key == "aap" and "num"_key == 1, "name", "aapje"); cat3.update_value("name"_key == "aap" and "num"_key == 1, "name", "aapje");
...@@ -1473,7 +1461,7 @@ _cat_3.num ...@@ -1473,7 +1461,7 @@ _cat_3.num
} }
int i = 0; int i = 0;
for (const auto &[id, name, num, desc]: cat2.rows<int,std::string,int,std::string>("id", "name", "num", "desc")) for (const auto &[id, name, num, desc] : cat2.rows<int, std::string, int, std::string>("id", "name", "num", "desc"))
{ {
switch (++i) switch (++i)
{ {
...@@ -1505,7 +1493,7 @@ _cat_3.num ...@@ -1505,7 +1493,7 @@ _cat_3.num
BOOST_CHECK(cat1.size() == 4); BOOST_CHECK(cat1.size() == 4);
i = 0; i = 0;
for (const auto &[id, name, desc]: cat1.rows<int,std::string,std::string>("id", "name", "desc")) for (const auto &[id, name, desc] : cat1.rows<int, std::string, std::string>("id", "name", "desc"))
{ {
switch (++i) switch (++i)
{ {
...@@ -1634,7 +1622,7 @@ PRO CD HD3 SING N N 16 ...@@ -1634,7 +1622,7 @@ PRO CD HD3 SING N N 16
PRO OXT HXT SING N N 17 PRO OXT HXT SING N N 17
)"_cf; )"_cf;
const std::filesystem::path example(gTestDir / ".."/"examples"/"1cbs.cif.gz"); const std::filesystem::path example(gTestDir / ".." / "examples" / "1cbs.cif.gz");
mmcif::File file(example.string()); mmcif::File file(example.string());
mmcif::Structure structure(file); mmcif::Structure structure(file);
...@@ -1644,9 +1632,9 @@ PRO OXT HXT SING N N 17 ...@@ -1644,9 +1632,9 @@ PRO OXT HXT SING N N 17
// Test the bonds of the first three residues, that's PRO A 1, ASN A 2, PHE A 3 // Test the bonds of the first three residues, that's PRO A 1, ASN A 2, PHE A 3
for (const auto& [compound, seqnr]: std::initializer_list<std::tuple<std::string,int>>{ { "PRO", 1 }, { "ASN", 2 }, { "PHE", 3 } }) for (const auto &[compound, seqnr] : std::initializer_list<std::tuple<std::string, int>>{{"PRO", 1}, {"ASN", 2}, {"PHE", 3}})
{ {
auto& res = structure.getResidue("A", compound, seqnr); auto &res = structure.getResidue("A", compound, seqnr);
auto atoms = res.atoms(); auto atoms = res.atoms();
auto dc = components.get(compound); auto dc = components.get(compound);
...@@ -1655,14 +1643,14 @@ PRO OXT HXT SING N N 17 ...@@ -1655,14 +1643,14 @@ PRO OXT HXT SING N N 17
auto cc = dc->get("chem_comp_bond"); auto cc = dc->get("chem_comp_bond");
BOOST_ASSERT(cc != nullptr); BOOST_ASSERT(cc != nullptr);
std::set<std::tuple<std::string,std::string>> bonded; std::set<std::tuple<std::string, std::string>> bonded;
for (const auto& [atom_id_1, atom_id_2]: cc->rows<std::string,std::string>("atom_id_1", "atom_id_2")) for (const auto &[atom_id_1, atom_id_2] : cc->rows<std::string, std::string>("atom_id_1", "atom_id_2"))
{ {
if (atom_id_1 > atom_id_2) if (atom_id_1 > atom_id_2)
bonded.insert({ atom_id_2, atom_id_1 }); bonded.insert({atom_id_2, atom_id_1});
else else
bonded.insert({ atom_id_1, atom_id_2 }); bonded.insert({atom_id_1, atom_id_2});
} }
for (size_t i = 0; i + 1 < atoms.size(); ++i) for (size_t i = 0; i + 1 < atoms.size(); ++i)
...@@ -1677,8 +1665,8 @@ PRO OXT HXT SING N N 17 ...@@ -1677,8 +1665,8 @@ PRO OXT HXT SING N N 17
bool bonded_1_i = bm(atoms[j], atoms[i]); bool bonded_1_i = bm(atoms[j], atoms[i]);
bool bonded_t = label_i > label_j bool bonded_t = label_i > label_j
? bonded.count({ label_j, label_i }) ? bonded.count({label_j, label_i})
: bonded.count({ label_i, label_j }); : bonded.count({label_i, label_j});
BOOST_CHECK(bonded_1 == bonded_t); BOOST_CHECK(bonded_1 == bonded_t);
BOOST_CHECK(bonded_1_i == bonded_t); BOOST_CHECK(bonded_1_i == bonded_t);
...@@ -1718,20 +1706,19 @@ BOOST_AUTO_TEST_CASE(t1) ...@@ -1718,20 +1706,19 @@ BOOST_AUTO_TEST_CASE(t1)
// q = Normalize(q); // q = Normalize(q);
// Quaternion q{ 0.1, 0.2, 0.3, 0.4 }; // Quaternion q{ 0.1, 0.2, 0.3, 0.4 };
Quaternion q{ 0.5, 0.5, 0.5, 0.5 }; Quaternion q{0.5, 0.5, 0.5, 0.5};
q = Normalize(q); q = Normalize(q);
const auto &&[angle0, axis0] = QuaternionToAngleAxis(q); const auto &&[angle0, axis0] = QuaternionToAngleAxis(q);
std::vector<Point> p1{ std::vector<Point> p1{
{ 16.979, 13.301, 44.555 }, {16.979, 13.301, 44.555},
{ 18.150, 13.525, 43.680 }, {18.150, 13.525, 43.680},
{ 18.656, 14.966, 43.784 }, {18.656, 14.966, 43.784},
{ 17.890, 15.889, 44.078 }, {17.890, 15.889, 44.078},
{ 17.678, 13.270, 42.255 }, {17.678, 13.270, 42.255},
{ 16.248, 13.734, 42.347 }, {16.248, 13.734, 42.347},
{ 15.762, 13.216, 43.724 } {15.762, 13.216, 43.724}};
};
auto p2 = p1; auto p2 = p1;
...@@ -1757,3 +1744,38 @@ BOOST_AUTO_TEST_CASE(t1) ...@@ -1757,3 +1744,38 @@ BOOST_AUTO_TEST_CASE(t1)
// std::cout << "rmsd: " << RMSd(p1, p2) << std::endl; // std::cout << "rmsd: " << RMSd(p1, p2) << std::endl;
} }
BOOST_AUTO_TEST_CASE(parser_test_1)
{
auto data1 = R"(
data_QM
_test.text ??
)"_cf;
auto &db1 = data1.firstDatablock();
auto &test1 = db1["test"];
BOOST_CHECK_EQUAL(test1.size(), 1);
for (auto r : test1)
{
const auto &[text] = r.get<std::string>({"text"});
BOOST_CHECK_EQUAL(text, "??");
}
std::stringstream ss;
data1.save(ss);
auto data2 = cif::File(ss);
auto &db2 = data2.firstDatablock();
auto &test2 = db2["test"];
BOOST_CHECK_EQUAL(test2.size(), 1);
for (auto r : test2)
{
const auto &[text] = r.get<std::string>({"text"});
BOOST_CHECK_EQUAL(text, "??");
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment