Commit 50df2504 by Maarten L. Hekkelman

Merge branch 'develop' into trunk

parents f2cfe284 2409fc5b
......@@ -25,7 +25,7 @@
cmake_minimum_required(VERSION 3.16)
# set the project name
project(cifpp VERSION 5.0.9 LANGUAGES CXX)
project(cifpp VERSION 5.1.0 LANGUAGES CXX)
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
......@@ -382,6 +382,16 @@ install(FILES
DESTINATION ${CIFPP_DATA_DIR}
)
if(${CIFPP_CACHE_DIR})
install(FILES
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic
${COMPONENTS_CIF}
DESTINATION ${CIFPP_CACHE_DIR}
)
endif()
set(CONFIG_TEMPLATE_FILE ${PROJECT_SOURCE_DIR}/cmake/cifppConfig.cmake.in)
configure_package_config_file(
......
Version 5.1
- New parser, optimised for speed
- Fix in unique ID generator
Version 5.0.10
- Fix in progress_bar, was using too much CPU
- Optimised mmCIF parser
Version 5.0.9
- Fix in dihedral angle calculations
- Added create_water to model
......
......@@ -32,5 +32,6 @@ namespace cif
{
validator parse_dictionary(std::string_view name, std::istream &is);
void extend_dictionary(validator &v, std::istream &is);
} // namespace cif
......@@ -29,7 +29,6 @@
#include "cif++/row.hpp"
#include <map>
#include <regex>
namespace cif
{
......@@ -54,8 +53,6 @@ class sac_parser
public:
using datablock_index = std::map<std::string, std::size_t>;
sac_parser(std::istream &is, bool init = true);
virtual ~sac_parser() = default;
enum CharTraitsMask : uint8_t
......@@ -66,9 +63,14 @@ class sac_parser
kAnyPrintMask = 1 << 3
};
static bool is_white(int ch)
static constexpr bool is_space(int ch)
{
return ch == ' ' or ch == '\t' or ch == '\r' or ch == '\n';
}
static constexpr bool is_white(int ch)
{
return std::isspace(ch) or ch == '#';
return is_space(ch) or ch == '#';
}
static constexpr bool is_ordinary(int ch)
......@@ -92,26 +94,7 @@ class sac_parser
(ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
}
static bool is_unquoted_string(std::string_view text)
{
bool result = text.empty() or is_ordinary(text.front());
if (result)
{
for (auto ch : text)
{
if (is_non_blank(ch))
continue;
result = false;
break;
}
}
static const std::regex kReservedRx(R"(loop_|stop_|global_|data_\S+|save_\S+)", std::regex_constants::icase);
// but be careful it does not contain e.g. stop_
return result and not std::regex_match(text.begin(), text.end(), kReservedRx);
}
static bool is_unquoted_string(std::string_view text);
protected:
static constexpr uint8_t kCharTraitsTable[128] = {
......@@ -133,7 +116,8 @@ class sac_parser
DATA,
LOOP,
GLOBAL,
SAVE,
SAVE_,
SAVE_NAME,
STOP,
Tag,
Value
......@@ -148,7 +132,8 @@ class sac_parser
case CIFToken::DATA: return "DATA";
case CIFToken::LOOP: return "LOOP";
case CIFToken::GLOBAL: return "GLOBAL";
case CIFToken::SAVE: return "SAVE";
case CIFToken::SAVE_: return "SAVE";
case CIFToken::SAVE_NAME: return "SAVE+name";
case CIFToken::STOP: return "STOP";
case CIFToken::Tag: return "Tag";
case CIFToken::Value: return "Value";
......@@ -156,41 +141,13 @@ class sac_parser
}
}
enum class CIFValue
{
Int,
Float,
Numeric,
String,
TextField,
Inapplicable,
Unknown
};
static constexpr const char *get_value_name(CIFValue type)
{
switch (type)
{
case CIFValue::Int: return "Int";
case CIFValue::Float: return "Float";
case CIFValue::Numeric: return "Numeric";
case CIFValue::String: return "String";
case CIFValue::TextField: return "TextField";
case CIFValue::Inapplicable: return "Inapplicable";
case CIFValue::Unknown: return "Unknown";
default: return "Invalid type parameter";
}
}
// get_next_char takes a char from the buffer, or if it is empty
// from the istream. This function also does carriage/linefeed
// translation.
// get_next_char takes the next character from the istream.
// This function also does carriage/linefeed translation.
int get_next_char();
// Put the last read character back into the istream
void retract();
int restart(int start);
CIFToken get_next_token();
void match(CIFToken token);
......@@ -205,6 +162,9 @@ class sac_parser
void parse_file();
protected:
sac_parser(std::istream &is, bool init = true);
void parse_global();
void parse_datablock();
......@@ -227,13 +187,14 @@ class sac_parser
// production methods, these are pure virtual here
virtual void produce_datablock(const std::string &name) = 0;
virtual void produce_category(const std::string &name) = 0;
virtual void produce_datablock(std::string_view name) = 0;
virtual void produce_category(std::string_view name) = 0;
virtual void produce_row() = 0;
virtual void produce_item(const std::string &category, const std::string &item, const std::string &value) = 0;
virtual void produce_item(std::string_view category, std::string_view item, std::string_view value) = 0;
protected:
enum State
enum class State
{
Start,
White,
......@@ -246,23 +207,21 @@ class sac_parser
UnquotedString,
Tag,
TextField,
Float = 100,
Int = 110,
Value = 300,
DATA,
SAVE
TextFieldNL,
Reserved,
Value
};
std::streambuf &m_source;
// Parser state
bool m_validate;
uint32_t m_line_nr;
bool m_bol;
CIFToken m_lookahead;
std::string m_token_value;
CIFValue mTokenType;
std::vector<int> m_buffer; // retract buffer, used to be a stack<char>
// token buffer
std::vector<char> m_token_buffer;
std::string_view m_token_value;
};
// --------------------------------------------------------------------
......@@ -276,13 +235,13 @@ class parser : public sac_parser
{
}
void produce_datablock(const std::string &name) override;
void produce_datablock(std::string_view name) override;
void produce_category(const std::string &name) override;
void produce_category(std::string_view name) override;
void produce_row() override;
void produce_item(const std::string &category, const std::string &item, const std::string &value) override;
void produce_item(std::string_view category, std::string_view item, std::string_view value) override;
protected:
file &m_file;
......
......@@ -228,8 +228,9 @@ class validator_factory
const validator &operator[](std::string_view dictionary_name);
const validator &construct_validator(std::string_view name, std::istream &is);
private:
void construct_validator(std::string_view name, std::istream &is);
// --------------------------------------------------------------------
......
......@@ -1227,23 +1227,37 @@ std::string category::get_unique_id(std::function<std::string(int)> generator)
{
using namespace cif::literals;
std::string id_tag = "id";
if (m_cat_validator != nullptr and m_cat_validator->m_keys.size() == 1)
id_tag = m_cat_validator->m_keys.front();
// calling size() often is a waste of resources
if (m_last_unique_num == 0)
m_last_unique_num = static_cast<uint32_t>(size());
std::string result = generator(static_cast<int>(m_last_unique_num++));
std::string id_tag = "id";
if (m_cat_validator != nullptr and m_cat_validator->m_keys.size() == 1)
{
if (m_index == nullptr and m_cat_validator != nullptr)
m_index = new category_index(this);
for (;;)
{
std::string result = generator(static_cast<int>(m_last_unique_num++));
if (m_index->find_by_value({{ id_tag, result }}) == nullptr)
break;
result = generator(static_cast<int>(m_last_unique_num++));
}
}
else
{
for (;;)
{
if (not exists(key(id_tag) == result))
break;
if (exists(key(id_tag) == result))
continue;
result = generator(static_cast<int>(m_last_unique_num++));
}
}
return result;
}
}
void category::update_value(const std::vector<row_handle> &rows, std::string_view tag, std::string_view value)
......
......@@ -117,7 +117,7 @@ class dictionary_parser : public parser
if (not m_collected_item_types)
m_collected_item_types = collect_item_types();
std::string saveFrameName = m_token_value;
std::string saveFrameName { m_token_value };
if (saveFrameName.empty())
error("Invalid save frame, should contain more than just 'save_' here");
......@@ -127,7 +127,7 @@ class dictionary_parser : public parser
datablock dict(m_token_value);
datablock::iterator cat = dict.end();
match(CIFToken::SAVE);
match(CIFToken::SAVE_NAME);
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag)
{
if (m_lookahead == CIFToken::LOOP)
......@@ -183,7 +183,7 @@ class dictionary_parser : public parser
}
}
match(CIFToken::SAVE);
match(CIFToken::SAVE_);
if (isCategorySaveFrame)
{
......@@ -481,4 +481,11 @@ validator parse_dictionary(std::string_view name, std::istream &is)
return result;
}
void extend_dictionary(validator &v, std::istream &is)
{
file f;
dictionary_parser p(v, is, f);
p.load_dictionary();
}
} // namespace cif
......@@ -236,28 +236,19 @@ std::string cif_id_for_number(int number)
{
std::string result;
if (number >= 26 * 26 * 26)
result = 'L' + std::to_string(number);
else
{
if (number >= 26 * 26)
do
{
int v = number / (26 * 26);
result += char('A' - 1 + v);
number %= (26 * 26);
}
int r = number % 26;
result += 'A' + r;
if (number >= 26)
{
int v = number / 26;
result += char('A' - 1 + v);
number %= 26;
number = (number - r) / 26 - 1;
}
while (number >= 0);
result += char('A' + number);
}
std::reverse(result.begin(), result.end());
assert(not result.empty());
return result;
}
......
......@@ -40,7 +40,6 @@
#include <iostream>
#include <map>
#include <mutex>
#include <regex>
#include <sstream>
#include <thread>
......@@ -161,6 +160,8 @@ struct progress_bar_impl
void print_progress();
void print_done();
using time_point = std::chrono::time_point<std::chrono::system_clock>;
int64_t m_max_value;
std::atomic<int64_t> m_consumed;
int64_t m_last_consumed = 0;
......@@ -168,8 +169,8 @@ struct progress_bar_impl
std::string m_action, m_message;
std::mutex m_mutex;
std::thread m_thread;
std::chrono::time_point<std::chrono::system_clock>
m_start = std::chrono::system_clock::now();
time_point m_start = std::chrono::system_clock::now();
time_point m_last = std::chrono::system_clock::now();
bool m_stop = false;
};
......@@ -192,7 +193,9 @@ void progress_bar_impl::run()
{
while (not m_stop)
{
if (std::chrono::system_clock::now() - m_start < 2s)
auto now = std::chrono::system_clock::now();
if (now - m_start < 2s or now - m_last < 100ms)
{
std::this_thread::sleep_for(10ms);
continue;
......@@ -206,6 +209,7 @@ void progress_bar_impl::run()
print_progress();
printedAny = true;
m_last = std::chrono::system_clock::now();
}
}
catch (...)
......
......@@ -491,9 +491,9 @@ const validator &validator_factory::operator[](std::string_view dictionary_name)
}
}
void validator_factory::construct_validator(std::string_view name, std::istream &is)
const validator &validator_factory::construct_validator(std::string_view name, std::istream &is)
{
m_validators.emplace_back(parse_dictionary(name, is));
return m_validators.emplace_back(parse_dictionary(name, is));
}
} // namespace cif
#include <cif++.hpp>
class dummy_parser : public cif::sac_parser
{
public:
dummy_parser(std::istream &is)
: sac_parser(is)
{
}
void produce_datablock(std::string_view name) override
{
}
void produce_category(std::string_view name) override
{
}
void produce_row() override
{
}
void produce_item(std::string_view category, std::string_view item, std::string_view value) override
{
}
};
int main()
{
cif::gzio::ifstream in("/srv/data/pdb/mmCIF/gl/8glv.cif.gz");
dummy_parser parser(in);
parser.parse_file();
// cif::file f("/srv/data/pdb/mmCIF/gl/8glv.cif.gz");
return 0;
}
\ No newline at end of file
......@@ -75,6 +75,30 @@ bool init_unit_test()
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(id_1)
{
BOOST_TEST(cif::cif_id_for_number(0) == "A");
BOOST_TEST(cif::cif_id_for_number(25) == "Z");
BOOST_TEST(cif::cif_id_for_number(26) == "AA");
BOOST_TEST(cif::cif_id_for_number(26 + 1) == "AB");
BOOST_TEST(cif::cif_id_for_number(26 + 26 * 26 - 1) == "ZZ");
BOOST_TEST(cif::cif_id_for_number(26 + 26 * 26) == "AAA");
BOOST_TEST(cif::cif_id_for_number(26 + 26 * 26 + 1) == "AAB");
std::set<std::string> testset;
for (int i = 0; i < 100000; ++i)
{
std::string id = cif::cif_id_for_number(i);
BOOST_TEST(testset.count(id) == 0);
testset.insert(id);
}
BOOST_TEST(testset.size() == 100000);
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(cc_1)
{
std::tuple<std::string_view, float, char> tests[] = {
......@@ -2357,8 +2381,6 @@ _test.text ??
BOOST_AUTO_TEST_CASE(output_test_1)
{
cif::VERBOSE = 5;
auto data1 = R"(
data_Q
loop_
......@@ -2863,7 +2885,7 @@ save__cat_1.name
std::istream is_dict(&buffer);
auto validator = cif::parse_dictionary("test_dict.dic", is_dict);
auto &validator = cif::validator_factory::instance().construct_validator("test_dict.dic", is_dict);
cif::file f;
f.set_validator(&validator);
......@@ -2901,8 +2923,6 @@ _cat_1.name
ss << f;
cif::file f2(ss);
f2.set_validator(&validator);
BOOST_ASSERT(f2.is_valid());
auto &audit_conform = f2.front()["audit_conform"];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment