Commit 50df2504 by Maarten L. Hekkelman

Merge branch 'develop' into trunk

parents f2cfe284 2409fc5b
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
cmake_minimum_required(VERSION 3.16) cmake_minimum_required(VERSION 3.16)
# set the project name # set the project name
project(cifpp VERSION 5.0.9 LANGUAGES CXX) project(cifpp VERSION 5.1.0 LANGUAGES CXX)
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
...@@ -382,6 +382,16 @@ install(FILES ...@@ -382,6 +382,16 @@ install(FILES
DESTINATION ${CIFPP_DATA_DIR} DESTINATION ${CIFPP_DATA_DIR}
) )
if(${CIFPP_CACHE_DIR})
install(FILES
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic
${COMPONENTS_CIF}
DESTINATION ${CIFPP_CACHE_DIR}
)
endif()
set(CONFIG_TEMPLATE_FILE ${PROJECT_SOURCE_DIR}/cmake/cifppConfig.cmake.in) set(CONFIG_TEMPLATE_FILE ${PROJECT_SOURCE_DIR}/cmake/cifppConfig.cmake.in)
configure_package_config_file( configure_package_config_file(
......
Version 5.1
- New parser, optimised for speed
- Fix in unique ID generator
Version 5.0.10
- Fix in progress_bar, was using too much CPU
- Optimised mmCIF parser
Version 5.0.9 Version 5.0.9
- Fix in dihedral angle calculations - Fix in dihedral angle calculations
- Added create_water to model - Added create_water to model
......
...@@ -32,5 +32,6 @@ namespace cif ...@@ -32,5 +32,6 @@ namespace cif
{ {
validator parse_dictionary(std::string_view name, std::istream &is); validator parse_dictionary(std::string_view name, std::istream &is);
void extend_dictionary(validator &v, std::istream &is);
} // namespace cif } // namespace cif
...@@ -29,7 +29,6 @@ ...@@ -29,7 +29,6 @@
#include "cif++/row.hpp" #include "cif++/row.hpp"
#include <map> #include <map>
#include <regex>
namespace cif namespace cif
{ {
...@@ -54,8 +53,6 @@ class sac_parser ...@@ -54,8 +53,6 @@ class sac_parser
public: public:
using datablock_index = std::map<std::string, std::size_t>; using datablock_index = std::map<std::string, std::size_t>;
sac_parser(std::istream &is, bool init = true);
virtual ~sac_parser() = default; virtual ~sac_parser() = default;
enum CharTraitsMask : uint8_t enum CharTraitsMask : uint8_t
...@@ -66,9 +63,14 @@ class sac_parser ...@@ -66,9 +63,14 @@ class sac_parser
kAnyPrintMask = 1 << 3 kAnyPrintMask = 1 << 3
}; };
static bool is_white(int ch) static constexpr bool is_space(int ch)
{
return ch == ' ' or ch == '\t' or ch == '\r' or ch == '\n';
}
static constexpr bool is_white(int ch)
{ {
return std::isspace(ch) or ch == '#'; return is_space(ch) or ch == '#';
} }
static constexpr bool is_ordinary(int ch) static constexpr bool is_ordinary(int ch)
...@@ -92,26 +94,7 @@ class sac_parser ...@@ -92,26 +94,7 @@ class sac_parser
(ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0); (ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
} }
static bool is_unquoted_string(std::string_view text) static bool is_unquoted_string(std::string_view text);
{
bool result = text.empty() or is_ordinary(text.front());
if (result)
{
for (auto ch : text)
{
if (is_non_blank(ch))
continue;
result = false;
break;
}
}
static const std::regex kReservedRx(R"(loop_|stop_|global_|data_\S+|save_\S+)", std::regex_constants::icase);
// but be careful it does not contain e.g. stop_
return result and not std::regex_match(text.begin(), text.end(), kReservedRx);
}
protected: protected:
static constexpr uint8_t kCharTraitsTable[128] = { static constexpr uint8_t kCharTraitsTable[128] = {
...@@ -133,7 +116,8 @@ class sac_parser ...@@ -133,7 +116,8 @@ class sac_parser
DATA, DATA,
LOOP, LOOP,
GLOBAL, GLOBAL,
SAVE, SAVE_,
SAVE_NAME,
STOP, STOP,
Tag, Tag,
Value Value
...@@ -148,7 +132,8 @@ class sac_parser ...@@ -148,7 +132,8 @@ class sac_parser
case CIFToken::DATA: return "DATA"; case CIFToken::DATA: return "DATA";
case CIFToken::LOOP: return "LOOP"; case CIFToken::LOOP: return "LOOP";
case CIFToken::GLOBAL: return "GLOBAL"; case CIFToken::GLOBAL: return "GLOBAL";
case CIFToken::SAVE: return "SAVE"; case CIFToken::SAVE_: return "SAVE";
case CIFToken::SAVE_NAME: return "SAVE+name";
case CIFToken::STOP: return "STOP"; case CIFToken::STOP: return "STOP";
case CIFToken::Tag: return "Tag"; case CIFToken::Tag: return "Tag";
case CIFToken::Value: return "Value"; case CIFToken::Value: return "Value";
...@@ -156,41 +141,13 @@ class sac_parser ...@@ -156,41 +141,13 @@ class sac_parser
} }
} }
enum class CIFValue // get_next_char takes the next character from the istream.
{ // This function also does carriage/linefeed translation.
Int,
Float,
Numeric,
String,
TextField,
Inapplicable,
Unknown
};
static constexpr const char *get_value_name(CIFValue type)
{
switch (type)
{
case CIFValue::Int: return "Int";
case CIFValue::Float: return "Float";
case CIFValue::Numeric: return "Numeric";
case CIFValue::String: return "String";
case CIFValue::TextField: return "TextField";
case CIFValue::Inapplicable: return "Inapplicable";
case CIFValue::Unknown: return "Unknown";
default: return "Invalid type parameter";
}
}
// get_next_char takes a char from the buffer, or if it is empty
// from the istream. This function also does carriage/linefeed
// translation.
int get_next_char(); int get_next_char();
// Put the last read character back into the istream
void retract(); void retract();
int restart(int start);
CIFToken get_next_token(); CIFToken get_next_token();
void match(CIFToken token); void match(CIFToken token);
...@@ -205,6 +162,9 @@ class sac_parser ...@@ -205,6 +162,9 @@ class sac_parser
void parse_file(); void parse_file();
protected: protected:
sac_parser(std::istream &is, bool init = true);
void parse_global(); void parse_global();
void parse_datablock(); void parse_datablock();
...@@ -227,13 +187,14 @@ class sac_parser ...@@ -227,13 +187,14 @@ class sac_parser
// production methods, these are pure virtual here // production methods, these are pure virtual here
virtual void produce_datablock(const std::string &name) = 0; virtual void produce_datablock(std::string_view name) = 0;
virtual void produce_category(const std::string &name) = 0; virtual void produce_category(std::string_view name) = 0;
virtual void produce_row() = 0; virtual void produce_row() = 0;
virtual void produce_item(const std::string &category, const std::string &item, const std::string &value) = 0; virtual void produce_item(std::string_view category, std::string_view item, std::string_view value) = 0;
protected: protected:
enum State
enum class State
{ {
Start, Start,
White, White,
...@@ -246,23 +207,21 @@ class sac_parser ...@@ -246,23 +207,21 @@ class sac_parser
UnquotedString, UnquotedString,
Tag, Tag,
TextField, TextField,
Float = 100, TextFieldNL,
Int = 110, Reserved,
Value = 300, Value
DATA,
SAVE
}; };
std::streambuf &m_source; std::streambuf &m_source;
// Parser state // Parser state
bool m_validate;
uint32_t m_line_nr; uint32_t m_line_nr;
bool m_bol; bool m_bol;
CIFToken m_lookahead; CIFToken m_lookahead;
std::string m_token_value;
CIFValue mTokenType; // token buffer
std::vector<int> m_buffer; // retract buffer, used to be a stack<char> std::vector<char> m_token_buffer;
std::string_view m_token_value;
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -276,13 +235,13 @@ class parser : public sac_parser ...@@ -276,13 +235,13 @@ class parser : public sac_parser
{ {
} }
void produce_datablock(const std::string &name) override; void produce_datablock(std::string_view name) override;
void produce_category(const std::string &name) override; void produce_category(std::string_view name) override;
void produce_row() override; void produce_row() override;
void produce_item(const std::string &category, const std::string &item, const std::string &value) override; void produce_item(std::string_view category, std::string_view item, std::string_view value) override;
protected: protected:
file &m_file; file &m_file;
......
...@@ -228,8 +228,9 @@ class validator_factory ...@@ -228,8 +228,9 @@ class validator_factory
const validator &operator[](std::string_view dictionary_name); const validator &operator[](std::string_view dictionary_name);
const validator &construct_validator(std::string_view name, std::istream &is);
private: private:
void construct_validator(std::string_view name, std::istream &is);
// -------------------------------------------------------------------- // --------------------------------------------------------------------
......
...@@ -1227,23 +1227,37 @@ std::string category::get_unique_id(std::function<std::string(int)> generator) ...@@ -1227,23 +1227,37 @@ std::string category::get_unique_id(std::function<std::string(int)> generator)
{ {
using namespace cif::literals; using namespace cif::literals;
std::string id_tag = "id";
if (m_cat_validator != nullptr and m_cat_validator->m_keys.size() == 1)
id_tag = m_cat_validator->m_keys.front();
// calling size() often is a waste of resources // calling size() often is a waste of resources
if (m_last_unique_num == 0) if (m_last_unique_num == 0)
m_last_unique_num = static_cast<uint32_t>(size()); m_last_unique_num = static_cast<uint32_t>(size());
for (;;) std::string result = generator(static_cast<int>(m_last_unique_num++));
{
std::string result = generator(static_cast<int>(m_last_unique_num++));
if (exists(key(id_tag) == result))
continue;
return result; std::string id_tag = "id";
if (m_cat_validator != nullptr and m_cat_validator->m_keys.size() == 1)
{
if (m_index == nullptr and m_cat_validator != nullptr)
m_index = new category_index(this);
for (;;)
{
if (m_index->find_by_value({{ id_tag, result }}) == nullptr)
break;
result = generator(static_cast<int>(m_last_unique_num++));
}
}
else
{
for (;;)
{
if (not exists(key(id_tag) == result))
break;
result = generator(static_cast<int>(m_last_unique_num++));
}
} }
return result;
} }
void category::update_value(const std::vector<row_handle> &rows, std::string_view tag, std::string_view value) void category::update_value(const std::vector<row_handle> &rows, std::string_view tag, std::string_view value)
......
...@@ -117,7 +117,7 @@ class dictionary_parser : public parser ...@@ -117,7 +117,7 @@ class dictionary_parser : public parser
if (not m_collected_item_types) if (not m_collected_item_types)
m_collected_item_types = collect_item_types(); m_collected_item_types = collect_item_types();
std::string saveFrameName = m_token_value; std::string saveFrameName { m_token_value };
if (saveFrameName.empty()) if (saveFrameName.empty())
error("Invalid save frame, should contain more than just 'save_' here"); error("Invalid save frame, should contain more than just 'save_' here");
...@@ -127,7 +127,7 @@ class dictionary_parser : public parser ...@@ -127,7 +127,7 @@ class dictionary_parser : public parser
datablock dict(m_token_value); datablock dict(m_token_value);
datablock::iterator cat = dict.end(); datablock::iterator cat = dict.end();
match(CIFToken::SAVE); match(CIFToken::SAVE_NAME);
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag) while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag)
{ {
if (m_lookahead == CIFToken::LOOP) if (m_lookahead == CIFToken::LOOP)
...@@ -183,7 +183,7 @@ class dictionary_parser : public parser ...@@ -183,7 +183,7 @@ class dictionary_parser : public parser
} }
} }
match(CIFToken::SAVE); match(CIFToken::SAVE_);
if (isCategorySaveFrame) if (isCategorySaveFrame)
{ {
...@@ -481,4 +481,11 @@ validator parse_dictionary(std::string_view name, std::istream &is) ...@@ -481,4 +481,11 @@ validator parse_dictionary(std::string_view name, std::istream &is)
return result; return result;
} }
} // namespace cif void extend_dictionary(validator &v, std::istream &is)
\ No newline at end of file {
file f;
dictionary_parser p(v, is, f);
p.load_dictionary();
}
} // namespace cif
...@@ -32,7 +32,6 @@ ...@@ -32,7 +32,6 @@
#include <cassert> #include <cassert>
#include <iostream> #include <iostream>
#include <map> #include <map>
#include <regex>
#include <stack> #include <stack>
namespace cif namespace cif
...@@ -40,13 +39,152 @@ namespace cif ...@@ -40,13 +39,152 @@ namespace cif
// -------------------------------------------------------------------- // --------------------------------------------------------------------
class reserved_words_automaton
{
public:
reserved_words_automaton() {}
enum move_result
{
undefined,
no_keyword,
data,
global,
loop,
save,
save_plus,
stop
};
constexpr bool finished() const
{
return m_state <= 0;
}
constexpr bool matched() const
{
return m_state < 0;
}
constexpr move_result move(int ch)
{
move_result result = undefined;
switch (m_state)
{
case 0:
break;
case -1: // data_
if (sac_parser::is_non_blank(ch))
m_seen_trailing_chars = true;
else if (m_seen_trailing_chars)
result = data;
else
result = no_keyword;
break;
case -2: // global_
result = sac_parser::is_non_blank(ch) ? no_keyword : global;
break;
case -3: // loop_
result = sac_parser::is_non_blank(ch) ? no_keyword : loop;
break;
case -4: // save_
if (sac_parser::is_non_blank(ch))
m_seen_trailing_chars = true;
else if (m_seen_trailing_chars)
result = save_plus;
else
result = save;
break;
case -5: // stop_
result = sac_parser::is_non_blank(ch) ? no_keyword : stop;
break;
default:
assert(m_state > 0 and m_state < NODE_COUNT);
for (;;)
{
if (s_dag[m_state].ch == (ch & ~0x20))
{
m_state = s_dag[m_state].next_match;
break;
}
m_state = s_dag[m_state].next_nomatch;
if (m_state == 0)
{
result = no_keyword;
break;
}
}
break;
}
if (result != undefined)
m_state = 0;
return result;
}
private:
static constexpr struct node
{
int16_t ch;
int8_t next_match;
int8_t next_nomatch;
} s_dag[] = {
{ 0 },
{ 'D', 5, 2 },
{ 'G', 9, 3 },
{ 'L', 15, 4 },
{ 'S', 19, 0 },
{ 'A', 6, 0 },
{ 'T', 7, 0 },
{ 'A', 8, 0 },
{ '_', -1, 0 },
{ 'L', 10, 0 },
{ 'O', 11, 0 },
{ 'B', 12, 0 },
{ 'A', 13, 0 },
{ 'L', 14, 0 },
{ '_', -2, 0 },
{ 'O', 16, 0},
{ 'O', 17, 0 },
{ 'P', 18, 0 },
{ '_', -3, 0 },
{ 'A', 21, 20 },
{ 'T', 24, 0 },
{ 'V', 22, 0 },
{ 'E', 23, 0 },
{ '_', -4, 0 },
{ 'O', 25, 0 },
{ 'P', 26, 0 },
{ '_', -5, 0 },
};
static constexpr int NODE_COUNT = sizeof(s_dag) / sizeof(node);
int m_state = 1;
bool m_seen_trailing_chars = false;
};
// --------------------------------------------------------------------
sac_parser::sac_parser(std::istream &is, bool init) sac_parser::sac_parser(std::istream &is, bool init)
: m_source(*is.rdbuf()) : m_source(*is.rdbuf())
{ {
m_token_buffer.reserve(8192);
if (is.rdbuf() == nullptr) if (is.rdbuf() == nullptr)
throw std::runtime_error("Attempt to read from uninitialised stream"); throw std::runtime_error("Attempt to read from uninitialised stream");
m_validate = true;
m_line_nr = 1; m_line_nr = 1;
m_bol = true; m_bol = true;
...@@ -54,45 +192,54 @@ sac_parser::sac_parser(std::istream &is, bool init) ...@@ -54,45 +192,54 @@ sac_parser::sac_parser(std::istream &is, bool init)
m_lookahead = get_next_token(); m_lookahead = get_next_token();
} }
bool sac_parser::is_unquoted_string(std::string_view text)
{
bool result = text.empty() or is_ordinary(text.front());
if (result)
{
reserved_words_automaton automaton;
for (char ch : text)
{
if (not is_non_blank(ch))
{
result = false;
break;
}
automaton.move(ch);
}
if (automaton.matched())
result = false;
}
return result;
}
// get_next_char takes a char from the buffer, or if it is empty // get_next_char takes a char from the buffer, or if it is empty
// from the istream. This function also does carriage/linefeed // from the istream. This function also does carriage/linefeed
// translation. // translation.
int sac_parser::get_next_char() int sac_parser::get_next_char()
{ {
int result = std::char_traits<char>::eof(); int result = m_source.sbumpc();
if (m_buffer.empty())
result = m_source.sbumpc();
else
{
result = m_buffer.back();
m_buffer.pop_back();
}
// very simple CR/LF translation into LF
if (result == '\r')
{
int lookahead = m_source.sbumpc();
if (lookahead != '\n')
m_buffer.push_back(lookahead);
result = '\n';
}
if (result == std::char_traits<char>::eof()) if (result == std::char_traits<char>::eof())
m_token_value.push_back(0); m_token_buffer.push_back(0);
else else
m_token_value.push_back(std::char_traits<char>::to_char_type(result));
if (result == '\n')
++m_line_nr;
if (VERBOSE >= 6)
{ {
std::cerr << "get_next_char => "; if (result == '\r')
if (iscntrl(result) or not isprint(result)) {
std::cerr << int(result) << std::endl; if (m_source.sgetc() == '\n')
else m_source.sbumpc();
std::cerr << char(result) << std::endl;
++m_line_nr;
result = '\n';
}
else if (result == '\n')
++m_line_nr;
m_token_buffer.push_back(std::char_traits<char>::to_char_type(result));
} }
return result; return result;
...@@ -100,44 +247,22 @@ int sac_parser::get_next_char() ...@@ -100,44 +247,22 @@ int sac_parser::get_next_char()
void sac_parser::retract() void sac_parser::retract()
{ {
assert(not m_token_value.empty()); assert(not m_token_buffer.empty());
char ch = m_token_value.back(); char ch = m_token_buffer.back();
if (ch == '\n') if (ch == '\n')
--m_line_nr; --m_line_nr;
m_buffer.push_back(ch == 0 ? std::char_traits<char>::eof() : std::char_traits<char>::to_int_type(ch)); if (ch != 0)
m_token_value.pop_back();
}
int sac_parser::restart(int start)
{
int result = 0;
while (not m_token_value.empty())
retract();
switch (start)
{ {
case State::Start: // since we always putback at most a single character,
result = State::Float; // the test below should never fail.
break;
case State::Float: if (m_source.sputbackc(ch) == std::char_traits<char>::eof())
result = State::Int; throw std::runtime_error("putback failure");
break;
case State::Int:
result = State::Value;
break;
default:
error("Invalid state in SacParser");
} }
m_bol = false; m_token_buffer.pop_back();
return result;
} }
sac_parser::CIFToken sac_parser::get_next_token() sac_parser::CIFToken sac_parser::get_next_token()
...@@ -146,11 +271,13 @@ sac_parser::CIFToken sac_parser::get_next_token() ...@@ -146,11 +271,13 @@ sac_parser::CIFToken sac_parser::get_next_token()
CIFToken result = CIFToken::Unknown; CIFToken result = CIFToken::Unknown;
int quoteChar = 0; int quoteChar = 0;
int state = State::Start, start = State::Start; State state = State::Start;
m_bol = false; m_bol = false;
m_token_value.clear(); m_token_buffer.clear();
mTokenType = CIFValue::Unknown; m_token_value = {};
reserved_words_automaton dag;
while (result == CIFToken::Unknown) while (result == CIFToken::Unknown)
{ {
...@@ -174,23 +301,27 @@ sac_parser::CIFToken sac_parser::get_next_token() ...@@ -174,23 +301,27 @@ sac_parser::CIFToken sac_parser::get_next_token()
state = State::Tag; state = State::Tag;
else if (ch == ';' and m_bol) else if (ch == ';' and m_bol)
state = State::TextField; state = State::TextField;
else if (ch == '?')
state = State::QuestionMark;
else if (ch == '\'' or ch == '"') else if (ch == '\'' or ch == '"')
{ {
quoteChar = ch; quoteChar = ch;
state = State::QuotedString; state = State::QuotedString;
} }
else if (dag.move(ch) == reserved_words_automaton::undefined)
state = State::Reserved;
else else
state = start = restart(start); state = State::Value;
break; break;
case State::White: case State::White:
if (ch == kEOF) if (ch == kEOF)
result = CIFToken::Eof; result = CIFToken::Eof;
else if (not isspace(ch)) else if (not is_space(ch))
{ {
state = State::Start; state = State::Start;
retract(); retract();
m_token_value.clear(); m_token_buffer.clear();
} }
else else
m_bol = (ch == '\n'); m_bol = (ch == '\n');
...@@ -201,38 +332,40 @@ sac_parser::CIFToken sac_parser::get_next_token() ...@@ -201,38 +332,40 @@ sac_parser::CIFToken sac_parser::get_next_token()
{ {
state = State::Start; state = State::Start;
m_bol = true; m_bol = true;
m_token_value.clear(); m_token_buffer.clear();
} }
else if (ch == kEOF) else if (ch == kEOF)
result = CIFToken::Eof; result = CIFToken::Eof;
else if (not is_any_print(ch)) else if (not is_any_print(ch))
error("invalid character in comment"); error("invalid character in comment");
break; break;
case State::QuestionMark:
if (not is_non_blank(ch))
{
retract();
result = CIFToken::Value;
}
else
state = State::Value;
break;
case State::TextField: case State::TextField:
if (ch == '\n') if (ch == '\n')
state = State::TextField + 1; state = State::TextFieldNL;
else if (ch == kEOF) else if (ch == kEOF)
error("unterminated textfield"); error("unterminated textfield");
// else if (ch == '\\')
// state = State::Esc;
else if (not is_any_print(ch) and cif::VERBOSE > 2) else if (not is_any_print(ch) and cif::VERBOSE > 2)
warning("invalid character in text field '" + std::string({static_cast<char>(ch)}) + "' (" + std::to_string((int)ch) + ")"); warning("invalid character in text field '" + std::string({static_cast<char>(ch)}) + "' (" + std::to_string((int)ch) + ")");
break; break;
// case State::Esc: case State::TextFieldNL:
// if (ch == '\n')
// break;
case State::TextField + 1:
if (is_text_lead(ch) or ch == ' ' or ch == '\t') if (is_text_lead(ch) or ch == ' ' or ch == '\t')
state = State::TextField; state = State::TextField;
else if (ch == ';') else if (ch == ';')
{ {
assert(m_token_value.length() >= 2); assert(m_token_buffer.size() >= 2);
m_token_value = m_token_value.substr(1, m_token_value.length() - 3); m_token_value = std::string_view(m_token_buffer.data() + 1, m_token_buffer.size() - 3);
mTokenType = CIFValue::TextField;
result = CIFToken::Value; result = CIFToken::Value;
} }
else if (ch == kEOF) else if (ch == kEOF)
...@@ -255,12 +388,10 @@ sac_parser::CIFToken sac_parser::get_next_token() ...@@ -255,12 +388,10 @@ sac_parser::CIFToken sac_parser::get_next_token()
{ {
retract(); retract();
result = CIFToken::Value; result = CIFToken::Value;
mTokenType = CIFValue::String; if (m_token_buffer.size() < 2)
if (m_token_value.length() < 2)
error("Invalid quoted string token"); error("Invalid quoted string token");
m_token_value = m_token_value.substr(1, m_token_value.length() - 2); m_token_value = std::string_view(m_token_buffer.data() + 1, m_token_buffer.size() - 2);
} }
else if (ch == quoteChar) else if (ch == quoteChar)
; ;
...@@ -277,149 +408,68 @@ sac_parser::CIFToken sac_parser::get_next_token() ...@@ -277,149 +408,68 @@ sac_parser::CIFToken sac_parser::get_next_token()
{ {
retract(); retract();
result = CIFToken::Tag; result = CIFToken::Tag;
m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.size());
} }
break; break;
case State::Float: case State::Reserved:
if (ch == '+' or ch == '-') switch (dag.move(ch))
{ {
state = State::Float + 1; case reserved_words_automaton::undefined:
} break;
else if (isdigit(ch))
state = State::Float + 1;
else
state = start = restart(start);
break;
case State::Float + 1:
// if (ch == '(') // numeric???
// mState = State::NumericSuffix;
// else
if (ch == '.')
state = State::Float + 2;
else if (tolower(ch) == 'e')
state = State::Float + 3;
else if (is_white(ch) or ch == kEOF)
{
retract();
result = CIFToken::Value;
mTokenType = CIFValue::Int;
}
else
state = start = restart(start);
break;
// parsed '.'
case State::Float + 2:
if (tolower(ch) == 'e')
state = State::Float + 3;
else if (is_white(ch) or ch == kEOF)
{
retract();
result = CIFToken::Value;
mTokenType = CIFValue::Float;
}
else
state = start = restart(start);
break;
// parsed 'e'
case State::Float + 3:
if (ch == '-' or ch == '+')
state = State::Float + 4;
else if (isdigit(ch))
state = State::Float + 5;
else
state = start = restart(start);
break;
case State::Float + 4:
if (isdigit(ch))
state = State::Float + 5;
else
state = start = restart(start);
break;
case State::Float + 5: case reserved_words_automaton::no_keyword:
if (is_white(ch) or ch == kEOF) if (not is_non_blank(ch))
{ {
retract(); retract();
result = CIFToken::Value; result = CIFToken::Value;
mTokenType = CIFValue::Float; m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.size());
} }
else else
state = start = restart(start); state = State::Value;
break; break;
case State::Int: case reserved_words_automaton::data:
if (isdigit(ch) or ch == '+' or ch == '-') retract();
state = State::Int + 1; m_token_value = std::string_view(m_token_buffer.data() + 5, m_token_buffer.size() - 5);
else result = CIFToken::DATA;
state = start = restart(start); break;
break;
case State::Int + 1: case reserved_words_automaton::global:
if (is_white(ch) or ch == kEOF) retract();
{ result = CIFToken::GLOBAL;
retract(); break;
result = CIFToken::Value;
mTokenType = CIFValue::Int;
}
else
state = start = restart(start);
break;
case State::Value: case reserved_words_automaton::loop:
if (ch == '_') retract();
{ result = CIFToken::LOOP;
std::string s = to_lower_copy(m_token_value); break;
if (s == "data_") case reserved_words_automaton::save:
{ retract();
state = State::DATA; result = CIFToken::SAVE_;
continue; break;
}
if (s == "save_")
{
state = State::SAVE;
continue;
}
}
if (result == CIFToken::Unknown and not is_non_blank(ch)) case reserved_words_automaton::save_plus:
{ retract();
retract(); m_token_value = std::string_view(m_token_buffer.data() + 5, m_token_buffer.size() - 5);
result = CIFToken::Value; result = CIFToken::SAVE_NAME;
break;
if (m_token_value == ".") case reserved_words_automaton::stop:
mTokenType = CIFValue::Inapplicable; retract();
else if (iequals(m_token_value, "global_"))
result = CIFToken::GLOBAL;
else if (iequals(m_token_value, "stop_"))
result = CIFToken::STOP; result = CIFToken::STOP;
else if (iequals(m_token_value, "loop_")) break;
result = CIFToken::LOOP;
else if (m_token_value == "?")
{
mTokenType = CIFValue::Unknown;
m_token_value.clear();
}
} }
break; break;
case State::DATA: case State::Value:
case State::SAVE:
if (not is_non_blank(ch)) if (not is_non_blank(ch))
{ {
retract(); retract();
result = CIFToken::Value;
if (state == State::DATA) m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.size());
result = CIFToken::DATA; break;
else
result = CIFToken::SAVE;
m_token_value.erase(m_token_value.begin(), m_token_value.begin() + 5);
} }
break; break;
...@@ -433,8 +483,6 @@ sac_parser::CIFToken sac_parser::get_next_token() ...@@ -433,8 +483,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
if (VERBOSE >= 5) if (VERBOSE >= 5)
{ {
std::cerr << get_token_name(result); std::cerr << get_token_name(result);
if (mTokenType != CIFValue::Unknown)
std::cerr << ' ' << get_value_name(mTokenType);
if (result != CIFToken::Eof) if (result != CIFToken::Eof)
std::cerr << " " << std::quoted(m_token_value); std::cerr << " " << std::quoted(m_token_value);
std::cerr << std::endl; std::cerr << std::endl;
...@@ -506,7 +554,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock) ...@@ -506,7 +554,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock)
break; break;
case string_quote: case string_quote:
if (std::isspace(ch)) if (is_space(ch))
state = start; state = start;
else else
state = string; state = string;
...@@ -518,7 +566,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock) ...@@ -518,7 +566,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock)
break; break;
case data: case data:
if (isspace(ch) and dblk[si] == 0) if (is_space(ch) and dblk[si] == 0)
found = true; found = true;
else if (dblk[si++] != ch) else if (dblk[si++] != ch)
state = start; state = start;
...@@ -596,7 +644,7 @@ sac_parser::datablock_index sac_parser::index_datablocks() ...@@ -596,7 +644,7 @@ sac_parser::datablock_index sac_parser::index_datablocks()
break; break;
case string_quote: case string_quote:
if (std::isspace(ch)) if (is_space(ch))
state = start; state = start;
else else
state = string; state = string;
...@@ -620,7 +668,7 @@ sac_parser::datablock_index sac_parser::index_datablocks() ...@@ -620,7 +668,7 @@ sac_parser::datablock_index sac_parser::index_datablocks()
case data_name: case data_name:
if (is_non_blank(ch)) if (is_non_blank(ch))
datablock.insert(datablock.end(), char(ch)); datablock.insert(datablock.end(), char(ch));
else if (isspace(ch)) else if (is_space(ch))
{ {
if (not datablock.empty()) if (not datablock.empty())
index[datablock] = m_source.pubseekoff(0, std::ios_base::cur, std::ios_base::in); index[datablock] = m_source.pubseekoff(0, std::ios_base::cur, std::ios_base::in);
...@@ -696,7 +744,7 @@ void sac_parser::parse_datablock() ...@@ -696,7 +744,7 @@ void sac_parser::parse_datablock()
static const std::string kUnitializedCategory("<invalid>"); static const std::string kUnitializedCategory("<invalid>");
std::string cat = kUnitializedCategory; // intial value acts as a guard for empty category names std::string cat = kUnitializedCategory; // intial value acts as a guard for empty category names
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag or m_lookahead == CIFToken::SAVE) while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag or m_lookahead == CIFToken::SAVE_NAME)
{ {
switch (m_lookahead) switch (m_lookahead)
{ {
...@@ -761,7 +809,7 @@ void sac_parser::parse_datablock() ...@@ -761,7 +809,7 @@ void sac_parser::parse_datablock()
break; break;
} }
case CIFToken::SAVE: case CIFToken::SAVE_NAME:
parse_save_frame(); parse_save_frame();
break; break;
...@@ -779,7 +827,7 @@ void sac_parser::parse_save_frame() ...@@ -779,7 +827,7 @@ void sac_parser::parse_save_frame()
// -------------------------------------------------------------------- // --------------------------------------------------------------------
void parser::produce_datablock(const std::string &name) void parser::produce_datablock(std::string_view name)
{ {
if (VERBOSE >= 4) if (VERBOSE >= 4)
std::cerr << "producing data_" << name << std::endl; std::cerr << "producing data_" << name << std::endl;
...@@ -788,7 +836,7 @@ void parser::produce_datablock(const std::string &name) ...@@ -788,7 +836,7 @@ void parser::produce_datablock(const std::string &name)
m_datablock = &(*iter); m_datablock = &(*iter);
} }
void parser::produce_category(const std::string &name) void parser::produce_category(std::string_view name)
{ {
if (VERBOSE >= 4) if (VERBOSE >= 4)
std::cerr << "producing category " << name << std::endl; std::cerr << "producing category " << name << std::endl;
...@@ -810,7 +858,7 @@ void parser::produce_row() ...@@ -810,7 +858,7 @@ void parser::produce_row()
// m_row.lineNr(m_line_nr); // m_row.lineNr(m_line_nr);
} }
void parser::produce_item(const std::string &category, const std::string &item, const std::string &value) void parser::produce_item(std::string_view category, std::string_view item, std::string_view value)
{ {
if (VERBOSE >= 4) if (VERBOSE >= 4)
std::cerr << "producing _" << category << '.' << item << " -> " << value << std::endl; std::cerr << "producing _" << category << '.' << item << " -> " << value << std::endl;
...@@ -821,4 +869,4 @@ void parser::produce_item(const std::string &category, const std::string &item, ...@@ -821,4 +869,4 @@ void parser::produce_item(const std::string &category, const std::string &item,
m_row[item] = m_token_value; m_row[item] = m_token_value;
} }
} // namespace cif } // namespace cif
\ No newline at end of file
...@@ -236,28 +236,19 @@ std::string cif_id_for_number(int number) ...@@ -236,28 +236,19 @@ std::string cif_id_for_number(int number)
{ {
std::string result; std::string result;
if (number >= 26 * 26 * 26) do
result = 'L' + std::to_string(number);
else
{ {
if (number >= 26 * 26) int r = number % 26;
{ result += 'A' + r;
int v = number / (26 * 26);
result += char('A' - 1 + v);
number %= (26 * 26);
}
if (number >= 26)
{
int v = number / 26;
result += char('A' - 1 + v);
number %= 26;
}
result += char('A' + number); number = (number - r) / 26 - 1;
} }
while (number >= 0);
std::reverse(result.begin(), result.end());
assert(not result.empty()); assert(not result.empty());
return result; return result;
} }
......
...@@ -40,7 +40,6 @@ ...@@ -40,7 +40,6 @@
#include <iostream> #include <iostream>
#include <map> #include <map>
#include <mutex> #include <mutex>
#include <regex>
#include <sstream> #include <sstream>
#include <thread> #include <thread>
...@@ -161,6 +160,8 @@ struct progress_bar_impl ...@@ -161,6 +160,8 @@ struct progress_bar_impl
void print_progress(); void print_progress();
void print_done(); void print_done();
using time_point = std::chrono::time_point<std::chrono::system_clock>;
int64_t m_max_value; int64_t m_max_value;
std::atomic<int64_t> m_consumed; std::atomic<int64_t> m_consumed;
int64_t m_last_consumed = 0; int64_t m_last_consumed = 0;
...@@ -168,8 +169,8 @@ struct progress_bar_impl ...@@ -168,8 +169,8 @@ struct progress_bar_impl
std::string m_action, m_message; std::string m_action, m_message;
std::mutex m_mutex; std::mutex m_mutex;
std::thread m_thread; std::thread m_thread;
std::chrono::time_point<std::chrono::system_clock> time_point m_start = std::chrono::system_clock::now();
m_start = std::chrono::system_clock::now(); time_point m_last = std::chrono::system_clock::now();
bool m_stop = false; bool m_stop = false;
}; };
...@@ -192,7 +193,9 @@ void progress_bar_impl::run() ...@@ -192,7 +193,9 @@ void progress_bar_impl::run()
{ {
while (not m_stop) while (not m_stop)
{ {
if (std::chrono::system_clock::now() - m_start < 2s) auto now = std::chrono::system_clock::now();
if (now - m_start < 2s or now - m_last < 100ms)
{ {
std::this_thread::sleep_for(10ms); std::this_thread::sleep_for(10ms);
continue; continue;
...@@ -206,6 +209,7 @@ void progress_bar_impl::run() ...@@ -206,6 +209,7 @@ void progress_bar_impl::run()
print_progress(); print_progress();
printedAny = true; printedAny = true;
m_last = std::chrono::system_clock::now();
} }
} }
catch (...) catch (...)
......
...@@ -491,9 +491,9 @@ const validator &validator_factory::operator[](std::string_view dictionary_name) ...@@ -491,9 +491,9 @@ const validator &validator_factory::operator[](std::string_view dictionary_name)
} }
} }
void validator_factory::construct_validator(std::string_view name, std::istream &is) const validator &validator_factory::construct_validator(std::string_view name, std::istream &is)
{ {
m_validators.emplace_back(parse_dictionary(name, is)); return m_validators.emplace_back(parse_dictionary(name, is));
} }
} // namespace cif } // namespace cif
#include <cif++.hpp>
class dummy_parser : public cif::sac_parser
{
public:
dummy_parser(std::istream &is)
: sac_parser(is)
{
}
void produce_datablock(std::string_view name) override
{
}
void produce_category(std::string_view name) override
{
}
void produce_row() override
{
}
void produce_item(std::string_view category, std::string_view item, std::string_view value) override
{
}
};
int main()
{
cif::gzio::ifstream in("/srv/data/pdb/mmCIF/gl/8glv.cif.gz");
dummy_parser parser(in);
parser.parse_file();
// cif::file f("/srv/data/pdb/mmCIF/gl/8glv.cif.gz");
return 0;
}
\ No newline at end of file
...@@ -75,6 +75,30 @@ bool init_unit_test() ...@@ -75,6 +75,30 @@ bool init_unit_test()
// -------------------------------------------------------------------- // --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(id_1)
{
BOOST_TEST(cif::cif_id_for_number(0) == "A");
BOOST_TEST(cif::cif_id_for_number(25) == "Z");
BOOST_TEST(cif::cif_id_for_number(26) == "AA");
BOOST_TEST(cif::cif_id_for_number(26 + 1) == "AB");
BOOST_TEST(cif::cif_id_for_number(26 + 26 * 26 - 1) == "ZZ");
BOOST_TEST(cif::cif_id_for_number(26 + 26 * 26) == "AAA");
BOOST_TEST(cif::cif_id_for_number(26 + 26 * 26 + 1) == "AAB");
std::set<std::string> testset;
for (int i = 0; i < 100000; ++i)
{
std::string id = cif::cif_id_for_number(i);
BOOST_TEST(testset.count(id) == 0);
testset.insert(id);
}
BOOST_TEST(testset.size() == 100000);
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(cc_1) BOOST_AUTO_TEST_CASE(cc_1)
{ {
std::tuple<std::string_view, float, char> tests[] = { std::tuple<std::string_view, float, char> tests[] = {
...@@ -2357,8 +2381,6 @@ _test.text ?? ...@@ -2357,8 +2381,6 @@ _test.text ??
BOOST_AUTO_TEST_CASE(output_test_1) BOOST_AUTO_TEST_CASE(output_test_1)
{ {
cif::VERBOSE = 5;
auto data1 = R"( auto data1 = R"(
data_Q data_Q
loop_ loop_
...@@ -2863,7 +2885,7 @@ save__cat_1.name ...@@ -2863,7 +2885,7 @@ save__cat_1.name
std::istream is_dict(&buffer); std::istream is_dict(&buffer);
auto validator = cif::parse_dictionary("test_dict.dic", is_dict); auto &validator = cif::validator_factory::instance().construct_validator("test_dict.dic", is_dict);
cif::file f; cif::file f;
f.set_validator(&validator); f.set_validator(&validator);
...@@ -2901,8 +2923,6 @@ _cat_1.name ...@@ -2901,8 +2923,6 @@ _cat_1.name
ss << f; ss << f;
cif::file f2(ss); cif::file f2(ss);
f2.set_validator(&validator);
BOOST_ASSERT(f2.is_valid()); BOOST_ASSERT(f2.is_valid());
auto &audit_conform = f2.front()["audit_conform"]; auto &audit_conform = f2.front()["audit_conform"];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment