Commit b0ac33c1 by Maarten L. Hekkelman

optimisation in query

fix memory problem in cif::item, making it safer
parent 82e73a95
......@@ -13,5 +13,4 @@ msvc/
Testing/
rsrc/feature-request.txt
test/test-create_sugar_?.cif
test/oprofile_data/abi
test/oprofile_data/samples/operf.log
test/oprofile_data/
......@@ -550,6 +550,14 @@ class category
return create_item(ix, i.value());
}
item_value *create_item(const std::tuple<std::string_view,std::string> &i)
{
const auto &[name, value] = i;
uint16_t ix = add_column(name);
return create_item(ix, value);
}
void delete_item(item_value *iv)
{
if (iv->m_length >= item_value::kBufferSize)
......
......@@ -30,6 +30,7 @@
#include <functional>
#include <iostream>
#include <regex>
#include <utility>
#include <cif++/row.hpp>
......@@ -52,7 +53,7 @@ namespace detail
{
virtual ~condition_impl() {}
virtual void prepare(const category &c) {}
virtual condition_impl *prepare(const category &c) { return this; }
virtual bool test(row_handle r) const = 0;
virtual void str(std::ostream &os) const = 0;
virtual std::optional<row_handle> single() const { return {}; };
......@@ -106,12 +107,7 @@ class condition
m_impl = nullptr;
}
void prepare(const category &c)
{
if (m_impl)
m_impl->prepare(c);
m_prepared = true;
}
void prepare(const category &c);
bool operator()(row_handle r) const
{
......@@ -149,6 +145,9 @@ class condition
}
private:
void optimise(condition_impl *&impl);
condition_impl *m_impl;
bool m_prepared = false;
};
......@@ -162,9 +161,10 @@ namespace detail
{
}
void prepare(const category &c) override
condition_impl *prepare(const category &c) override
{
m_item_ix = get_column_ix(c, m_item_tag);
return this;
}
bool test(row_handle r) const override
......@@ -181,15 +181,15 @@ namespace detail
size_t m_item_ix = 0;
};
struct key_is_condition_impl : public condition_impl
struct key_equals_condition_impl : public condition_impl
{
key_is_condition_impl(item &&i)
key_equals_condition_impl(item &&i)
: m_item_tag(i.name())
, m_value(i.value())
{
}
void prepare(const category &c) override;
condition_impl *prepare(const category &c) override;
bool test(row_handle r) const override
{
......@@ -215,6 +215,51 @@ namespace detail
std::optional<row_handle> m_single_hit;
};
struct key_equals_or_empty_condition_impl : public condition_impl
{
key_equals_or_empty_condition_impl(key_equals_condition_impl *equals, key_is_empty_condition_impl *empty)
: m_item_tag(equals->m_item_tag)
, m_value(equals->m_value)
, m_icase(equals->m_icase)
, m_single_hit(equals->m_single_hit)
{
assert(empty->m_item_ix == equals->m_item_ix);
}
condition_impl *prepare(const category &c) override
{
m_item_ix = get_column_ix(c, m_item_tag);
m_icase = is_column_type_uchar(c, m_item_tag);
return this;
}
bool test(row_handle r) const override
{
bool result = false;
if (m_single_hit.has_value())
result = *m_single_hit == r;
else
result = r[m_item_ix].empty() or r[m_item_ix].compare(m_value, m_icase) == 0;
return result;
}
void str(std::ostream &os) const override
{
os << m_item_tag << (m_icase ? "^ " : " ") << " == " << m_value << " OR " << m_item_tag << " IS NULL";
}
virtual std::optional<row_handle> single() const override
{
return m_single_hit;
}
std::string m_item_tag;
size_t m_item_ix = 0;
bool m_icase = false;
std::string m_value;
std::optional<row_handle> m_single_hit;
};
struct key_compare_condition_impl : public condition_impl
{
template <typename COMP>
......@@ -225,10 +270,11 @@ namespace detail
{
}
void prepare(const category &c) override
condition_impl *prepare(const category &c) override
{
m_item_ix = get_column_ix(c, m_item_tag);
m_icase = is_column_type_uchar(c, m_item_tag);
return this;
}
bool test(row_handle r) const override
......@@ -257,9 +303,10 @@ namespace detail
{
}
void prepare(const category &c) override
condition_impl *prepare(const category &c) override
{
m_item_ix = get_column_ix(c, m_item_tag);
return this;
}
bool test(row_handle r) const override
......@@ -364,48 +411,78 @@ namespace detail
struct and_condition_impl : public condition_impl
{
and_condition_impl(condition &&a, condition &&b)
: mA(nullptr)
, mB(nullptr)
{
std::swap(mA, a.m_impl);
std::swap(mB, b.m_impl);
mSub.emplace_back(std::exchange(a.m_impl, nullptr));
mSub.emplace_back(std::exchange(b.m_impl, nullptr));
}
~and_condition_impl()
{
delete mA;
delete mB;
for (auto sub : mSub)
delete sub;
}
void prepare(const category &c) override
{
mA->prepare(c);
mB->prepare(c);
}
condition_impl *prepare(const category &c) override;
bool test(row_handle r) const override
{
return mA->test(r) and mB->test(r);
bool result = true;
for (auto sub : mSub)
{
if (sub->test(r))
continue;
result = false;
break;
}
return result;
}
void str(std::ostream &os) const override
{
os << '(';
mA->str(os);
os << ") AND (";
mB->str(os);
bool first = true;
for (auto sub : mSub)
{
if (first)
first = false;
else
os << " AND ";
sub->str(os);
}
os << ')';
}
virtual std::optional<row_handle> single() const override
{
auto sa = mA->single();
auto sb = mB->single();
return sa == sb ? sa : std::optional<row_handle>();
std::optional<row_handle> result;
for (auto sub : mSub)
{
auto s = sub->single();
if (not result.has_value())
{
result = s;
continue;
}
if (s == result)
continue;
result.reset();
break;
}
return result;
}
condition_impl *mA;
condition_impl *mB;
std::vector<condition_impl *> mSub;
};
struct or_condition_impl : public condition_impl
......@@ -424,11 +501,7 @@ namespace detail
delete mB;
}
void prepare(const category &c) override
{
mA->prepare(c);
mB->prepare(c);
}
condition_impl *prepare(const category &c) override;
bool test(row_handle r) const override
{
......@@ -474,9 +547,10 @@ namespace detail
delete mA;
}
void prepare(const category &c) override
condition_impl *prepare(const category &c) override
{
mA->prepare(c);
mA = mA->prepare(c);
return this;
}
bool test(row_handle r) const override
......@@ -543,13 +617,13 @@ struct key
template <typename T>
condition operator==(const key &key, const T &v)
{
return condition(new detail::key_is_condition_impl({ key.m_item_tag, v }));
return condition(new detail::key_equals_condition_impl({ key.m_item_tag, v }));
}
inline condition operator==(const key &key, const char *value)
{
if (value != nullptr and *value != 0)
return condition(new detail::key_is_condition_impl({ key.m_item_tag, value }));
return condition(new detail::key_equals_condition_impl({ key.m_item_tag, value }));
else
return condition(new detail::key_is_empty_condition_impl(key.m_item_tag));
}
......
......@@ -59,10 +59,8 @@ class item
/// content a single character string with content \a value
item(std::string_view name, char value)
: m_name(name)
, m_value(m_buffer, 1)
, m_value({ value })
{
m_buffer[0] = value;
m_buffer[1] = 0;
}
/// \brief constructor for an item with name \a name and as
......@@ -75,13 +73,15 @@ class item
using namespace std;
using namespace cif;
auto r = to_chars(m_buffer, m_buffer + sizeof(m_buffer) - 1, value, chars_format::fixed, precision);
char buffer[32];
auto r = to_chars(buffer, buffer + sizeof(buffer) - 1, value, chars_format::fixed, precision);
if (r.ec != std::errc())
throw std::runtime_error("Could not format number");
assert(r.ptr >= m_buffer and r.ptr < m_buffer + sizeof(m_buffer));
assert(r.ptr >= buffer and r.ptr < buffer + sizeof(buffer));
*r.ptr = 0;
m_value = std::string_view(m_buffer, r.ptr - m_buffer);
m_value.assign(buffer, r.ptr - buffer);
}
/// \brief constructor for an item with name \a name and as
......@@ -94,13 +94,15 @@ class item
using namespace std;
using namespace cif;
auto r = to_chars(m_buffer, m_buffer + sizeof(m_buffer) - 1, value, chars_format::general);
char buffer[32];
auto r = to_chars(buffer, buffer + sizeof(buffer) - 1, value, chars_format::general);
if (r.ec != std::errc())
throw std::runtime_error("Could not format number");
assert(r.ptr >= m_buffer and r.ptr < m_buffer + sizeof(m_buffer));
assert(r.ptr >= buffer and r.ptr < buffer + sizeof(buffer));
*r.ptr = 0;
m_value = std::string_view(m_buffer, r.ptr - m_buffer);
m_value.assign(buffer, r.ptr - buffer);
}
/// \brief constructor for an item with name \a name and as
......@@ -109,13 +111,15 @@ class item
item(const std::string_view name, const T &value)
: m_name(name)
{
auto r = std::to_chars(m_buffer, m_buffer + sizeof(m_buffer) - 1, value);
char buffer[32];
auto r = std::to_chars(buffer, buffer + sizeof(buffer) - 1, value);
if (r.ec != std::errc())
throw std::runtime_error("Could not format number");
assert(r.ptr >= m_buffer and r.ptr < m_buffer + sizeof(m_buffer));
assert(r.ptr >= buffer and r.ptr < buffer + sizeof(buffer));
*r.ptr = 0;
m_value = std::string_view(m_buffer, r.ptr - m_buffer);
m_value.assign(buffer, r.ptr - buffer);
}
/// \brief constructor for an item with name \a name and as
......@@ -152,10 +156,17 @@ class item
/// \brief the length of the value string
size_t length() const { return m_value.length(); }
/// \brief support for structured binding
template<size_t N>
decltype(auto) get() const
{
if constexpr (N == 0) return name();
else if constexpr (N == 1) return value();
}
private:
std::string_view m_name;
std::string_view m_value;
char m_buffer[64]; // TODO: optimize this magic number, might be too large
std::string m_value;
};
// --------------------------------------------------------------------
......@@ -193,7 +204,7 @@ struct item_value
// By using std::string_view instead of c_str we obain a
// nice performance gain since we avoid many calls to strlen.
std::string_view text() const
constexpr inline std::string_view text() const
{
return { m_length >= kBufferSize ? m_data : m_local_data, m_length };
}
......@@ -289,9 +300,6 @@ struct item_handle
std::string_view text() const;
// bool operator!=(const std::string &s) const { return s != c_str(); }
// bool operator==(const std::string &s) const { return s == c_str(); }
item_handle(uint16_t column, row_handle &row)
: m_column(column)
, m_row_handle(row)
......@@ -491,3 +499,21 @@ struct item_handle::item_value_as<T, std::enable_if_t<std::is_same_v<T, std::str
};
} // namespace cif
namespace std
{
template<> struct tuple_size<::cif::item>
: public std::integral_constant<std::size_t, 2> {};
template<> struct tuple_element<0, ::cif::item>
{
using type = decltype(std::declval<::cif::item>().name());
};
template<> struct tuple_element<1, ::cif::item>
{
using type = decltype(std::declval<::cif::item>().value());
};
}
\ No newline at end of file
......@@ -307,13 +307,13 @@ class row_initializer : public std::vector<item>
row_initializer(row_handle rh);
void set_value(std::string_view name, std::string_view value);
void set_value(item &&i)
void set_value(const item &i)
{
set_value(i.name(), i.value());
}
void set_value_if_empty(std::string_view name, std::string_view value);
void set_value_if_empty(item &&i)
void set_value_if_empty(const item &i)
{
set_value_if_empty(i.name(), i.value());
}
......
......@@ -367,7 +367,7 @@ row *category_index::find_by_value(row_initializer k) const
{
auto fld = m_category.get_column_name(f);
auto ki = find_if(k.begin(), k.end(), [&fld](item &i) { return i.name() == fld; });
auto ki = find_if(k.begin(), k.end(), [&fld](auto &i) { return i.name() == fld; });
if (ki == k.end())
k2.emplace_back(fld, "");
else
......@@ -1715,54 +1715,54 @@ category::iterator category::insert_impl(const_iterator pos, row *n)
// #endif
}
category::iterator category::erase_impl(const_iterator pos)
{
if (pos == cend())
return end();
assert(false);
// TODO: implement
// row *n = const_cast<row *>(pos.row());
// row *cur;
// if (m_head == n)
// {
// m_head = static_cast<row *>(m_head->m_next);
// if (m_head == nullptr)
// m_tail = nullptr;
// n->m_next = nullptr;
// delete_row(n);
// cur = m_head;
// }
// else
// {
// cur = static_cast<row *>(n->m_next);
// if (m_tail == n)
// m_tail = static_cast<row *>(n->m_prev);
// row *p = m_head;
// while (p != nullptr and p->m_next != n)
// p = p->m_next;
// if (p != nullptr and p->m_next == n)
// {
// p->m_next = n->m_next;
// if (p->m_next != nullptr)
// p->m_next->m_prev = p;
// n->m_next = nullptr;
// }
// else
// throw std::runtime_error("remove for a row not found in the list");
// delete_row(n);
// }
// return iterator(*this, cur);
}
// category::iterator category::erase_impl(const_iterator pos)
// {
// if (pos == cend())
// return end();
// assert(false);
// // TODO: implement
// // row *n = const_cast<row *>(pos.row());
// // row *cur;
// // if (m_head == n)
// // {
// // m_head = static_cast<row *>(m_head->m_next);
// // if (m_head == nullptr)
// // m_tail = nullptr;
// // n->m_next = nullptr;
// // delete_row(n);
// // cur = m_head;
// // }
// // else
// // {
// // cur = static_cast<row *>(n->m_next);
// // if (m_tail == n)
// // m_tail = static_cast<row *>(n->m_prev);
// // row *p = m_head;
// // while (p != nullptr and p->m_next != n)
// // p = p->m_next;
// // if (p != nullptr and p->m_next == n)
// // {
// // p->m_next = n->m_next;
// // if (p->m_next != nullptr)
// // p->m_next->m_prev = p;
// // n->m_next = nullptr;
// // }
// // else
// // throw std::runtime_error("remove for a row not found in the list");
// // delete_row(n);
// // }
// // return iterator(*this, cur);
// }
void category::swap_item(size_t column_ix, row_handle &a, row_handle &b)
{
......
......@@ -61,7 +61,7 @@ bool is_column_type_uchar(const category &cat, std::string_view col)
namespace detail
{
void key_is_condition_impl::prepare(const category &c)
condition_impl *key_equals_condition_impl::prepare(const category &c)
{
m_item_ix = get_column_ix(c, m_item_tag);
m_icase = is_column_type_uchar(c, m_item_tag);
......@@ -72,8 +72,67 @@ namespace detail
{
m_single_hit = c[{ { m_item_tag, m_value } }];
}
return this;
}
condition_impl *and_condition_impl::prepare(const category &c)
{
for (auto &sub : mSub)
sub = sub->prepare(c);
for (;;)
{
auto si = find_if(mSub.begin(), mSub.end(), [](condition_impl *sub) { return dynamic_cast<and_condition_impl *>(sub) != nullptr; });
if (si == mSub.end())
break;
and_condition_impl *sub_and = static_cast<and_condition_impl *>(*si);
mSub.erase(si);
mSub.insert(mSub.end(), sub_and->mSub.begin(), sub_and->mSub.end());
sub_and->mSub.clear();
delete sub_and;
}
return this;
}
condition_impl *or_condition_impl::prepare(const category &c)
{
condition_impl *result = this;
mA = mA->prepare(c);
mB = mB->prepare(c);
key_equals_condition_impl *equals = dynamic_cast<key_equals_condition_impl*>(mA);
key_is_empty_condition_impl *empty = dynamic_cast<key_is_empty_condition_impl*>(mB);
if (equals == nullptr and empty == nullptr)
{
equals = dynamic_cast<key_equals_condition_impl*>(mB);
empty = dynamic_cast<key_is_empty_condition_impl*>(mA);
}
if (equals != nullptr and empty != nullptr)
{
result = new detail::key_equals_or_empty_condition_impl(equals, empty);
result = result->prepare(c);
delete this;
}
return result;
}
} // namespace detail
void condition::prepare(const category &c)
{
if (m_impl)
m_impl = m_impl->prepare(c);
m_prepared = true;
}
} // namespace cif
......@@ -206,8 +206,8 @@ void file::save(const std::filesystem::path &p) const
void file::save(std::ostream &os) const
{
if (not is_valid())
std::cout << "File is not valid!" << std::endl;
// if (not is_valid())
// std::cout << "File is not valid!" << std::endl;
for (auto &db : *this)
db.write(os);
......
......@@ -99,7 +99,10 @@ float atom::atom_impl::get_property_float(std::string_view name) const
void atom::atom_impl::set_property(const std::string_view name, const std::string &value)
{
row().assign(name, value, true, true);
auto r = row();
if (not r)
throw std::runtime_error("Trying to modify a row that does not exist");
r.assign(name, value, true, true);
}
// int atom::atom_impl::compare(const atom_impl &b) const
......@@ -2231,9 +2234,9 @@ branch &structure::create_branch(std::vector<row_initializer> atoms)
// sanity check
for (auto &nag_atom : atoms)
{
for (auto info : nag_atom)
for (const auto &[name, value] : nag_atom)
{
if (info.name() == "label_comp_id" and info.value() != "NAG")
if (name == "label_comp_id" and value != "NAG")
throw std::logic_error("The first sugar in a branch should be a NAG");
}
}
......@@ -2316,14 +2319,14 @@ branch &structure::extend_branch(const std::string &asym_id, std::vector<row_ini
for (auto &atom : atom_info)
{
for (auto info : atom)
for (const auto &[name, value] : atom)
{
if (info.name() != "label_comp_id")
if (name != "label_comp_id")
continue;
if (compoundID.empty())
compoundID = info.value();
else if (info.value() != compoundID)
compoundID = value;
else if (value != compoundID)
throw std::logic_error("All atoms should be of the same type");
}
}
......
......@@ -88,7 +88,7 @@ void row_initializer::set_value(std::string_view name, std::string_view value)
void row_initializer::set_value_if_empty(std::string_view name, std::string_view value)
{
if (find_if(begin(), end(), [name](item &i) { return i.name() == name; }) == end())
if (find_if(begin(), end(), [name](auto &i) { return i.name() == name; }) == end())
emplace_back(name, value);
}
......
......@@ -118,14 +118,8 @@ BOOST_AUTO_TEST_CASE(create_sugar_1)
// NOTE, row_initializer does not actually hold the data, so copy it first
// before it gets destroyed by remove_residue
std::list<std::string> storage;
for (auto r : as.find("label_asym_id"_key == "L"))
{
auto &ri = ai.emplace_back(r);
for (auto &rii : ri)
rii.value(storage.emplace_back(rii.value()));
}
s.remove_residue(NAG);
......@@ -156,7 +150,6 @@ BOOST_AUTO_TEST_CASE(create_sugar_2)
BOOST_CHECK_EQUAL(bH.size(), 2);
std::list<std::string> storage;
std::vector<cif::row_initializer> ai[2];
auto &db = s.get_datablock();
......@@ -165,17 +158,11 @@ BOOST_AUTO_TEST_CASE(create_sugar_2)
for (size_t i = 0; i < 2; ++i)
{
for (auto r : as.find("label_asym_id"_key == "H" and "auth_seq_id"_key == i + 1))
{
auto &ri = ai[i].emplace_back(r);
for (auto &rii : ri)
rii.value(storage.emplace_back(rii.value()));
}
}
s.remove_branch(bH);
file.save(gTestDir / "test-create_sugar_2-0.cif");
BOOST_CHECK(file.is_valid());
auto &bN = s.create_branch(ai[0]);
......@@ -217,7 +204,7 @@ BOOST_AUTO_TEST_CASE(delete_sugar_1)
BOOST_CHECK(file.is_valid());
file.save(gTestDir / "test-create_sugar_3.cif");
// file.save(gTestDir / "test-create_sugar_3.cif");
BOOST_CHECK_NO_THROW(cif::mm::structure s2(file));
}
......@@ -142,13 +142,17 @@ BOOST_AUTO_TEST_CASE(item_1)
BOOST_CHECK_EQUAL(i2.value(), ci2.value());
BOOST_CHECK_EQUAL(i3.value(), ci3.value());
item mi1(std::move(i1));
item mi2(std::move(i2));
item mi3(std::move(i3));
item mi1(std::move(ci1));
item mi2(std::move(ci2));
item mi3(std::move(ci3));
BOOST_CHECK_EQUAL(i1.value(), mi1.value());
BOOST_CHECK_EQUAL(i2.value(), mi2.value());
BOOST_CHECK_EQUAL(i3.value(), mi3.value());
BOOST_CHECK(ci1.empty());
BOOST_CHECK(ci2.empty());
BOOST_CHECK(ci3.empty());
}
// --------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment