Commit 6c935996 by maarten

renaming intermediate backup

git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@170 a1961a4f-ab94-4bcc-80e8-33b5a54de466
parent ca881b82
......@@ -2,7 +2,7 @@
#pragma once
#include "libcif/config.h"
#include "cif++/Config.h"
#include <boost/filesystem/operations.hpp>
#include <boost/math/quaternion.hpp>
......@@ -10,7 +10,7 @@
namespace libcif
{
enum atom_type : uint8
enum AtomType : uint8
{
Nn = 0, // Unknown
......@@ -143,9 +143,9 @@ enum atom_type : uint8
};
// --------------------------------------------------------------------
// atom_type_info
// AtomTypeInfo
enum radius_type {
enum RadiusType {
eRadiusCalculated,
eRadiusEmpirical,
eRadiusCovalentEmpirical,
......@@ -159,9 +159,9 @@ enum radius_type {
eRadiusTypeCount
};
struct atom_type_info
struct AtomTypeInfo
{
atom_type type;
AtomType type;
std::string name;
std::string symbol;
float weight;
......@@ -169,36 +169,36 @@ struct atom_type_info
float radii[eRadiusTypeCount];
};
extern const atom_type_info kKnownAtoms[];
extern const AtomTypeInfo kKnownAtoms[];
// --------------------------------------------------------------------
// atom_type_traits
// AtomTypeTraits
class atom_type_traits
class AtomTypeTraits
{
public:
atom_type_traits(atom_type a);
atom_type_traits(const std::string& symbol);
AtomTypeTraits(AtomType a);
AtomTypeTraits(const std::string& symbol);
atom_type type() const { return m_info->type; }
std::string name() const { return m_info->name; }
std::string symbol() const { return m_info->symbol; }
float weight() const { return m_info->weight; }
AtomType type() const { return mInfo->type; }
std::string name() const { return mInfo->name; }
std::string symbol() const { return mInfo->symbol; }
float weight() const { return mInfo->weight; }
bool is_metal() const { return m_info->metal; }
bool isMetal() const { return mInfo->metal; }
static bool is_element(const std::string& symbol);
static bool is_metal(const std::string& symbol);
static bool isElement(const std::string& symbol);
static bool isMetal(const std::string& symbol);
float radius(radius_type type = eRadiusSingleBond) const
float radius(RadiusType type = eRadiusSingleBond) const
{
if (type >= eRadiusTypeCount)
throw std::invalid_argument("invalid radius requested");
return m_info->radii[type] / 100.f;
return mInfo->radii[type] / 100.f;
}
private:
const struct atom_type_info* m_info;
const struct AtomTypeInfo* mInfo;
};
}
// cif parsing library
#pragma once
#include "cif++/Config.h"
#include <regex>
#include <iostream>
#include <set>
#include <boost/lexical_cast.hpp>
#include <boost/any.hpp>
#include "cif++/CifUtils.h"
extern int VERBOSE;
/*
Simple C++ interface to CIF files.
Assumptions: a file contains one or more datablocks modelled by the class datablock.
Each datablock contains categories. These map to the original tables used to fill
the mmCIF file. Each Category can contain multiple Items, the columns in the table.
Values are stored as character strings internally.
Synopsis:
// create a cif file
cif::datablock e("1MVE");
e.append(cif::Category{"_entry", { "id", "1MVE" } });
cif::Category atomSite("atom_site");
size_t nr{};
for (myAtom: atoms)
{
atomSite.push_back({
{ "group_PDB", "ATOM" },
{ "id", ++nr },
{ "type_symbol", myAtom.type.str() },
...
});
}
e.append(move(atomSite));
cif::File f;
f.append(e);
ofstream os("1mve.cif");
f.write(os);
// read
f.read(ifstream{"1mve.cif"});
auto& e = f.firstDatablock();
cout << "ID of datablock: " << e.id() << endl;
auto& atoms = e["atom_site"];
for (auto& atom: atoms)
{
cout << atom["group_PDB"] << ", "
<< atom["id"] << ", "
...
float x, y, z;
cif::tie(x, y, z) = atom.get("Cartn_x", "Cartn_y", "Cartn_z");
...
}
Another way of querying a Category is by using this construct:
auto cat& = e["atom_site"];
auto Rows = cat.find(Key("label_asym_id") == "A" and Key("label_seq_id") == 1);
*/
namespace cif
{
using std::string;
using std::vector;
// mmCIF mapping
// A CIF data file in this case contains entries (data blocks) which can contain
// one or more Category objects. Each Category object contains arrays of Items.
// Better, you can consider the categories as tables containing columns which
// are the Items.
class File;
class Datablock;
class Category;
class Row; // a flyweight class that references data in categories
class Item;
class Validator;
struct ValidateItem;
struct ValidateCategory;
struct ItemColumn;
struct ItemRow;
struct ItemValue;
// --------------------------------------------------------------------
// class Item
//
// This class is only transient, it is used to construct new Rows.
// Access to already stored data is through an ItemReference object.
class Item
{
public:
typedef enum { notApplicable, notDefined, text, number } ItemContentType;
Item() {}
template<typename T>
Item(const string& name, const T& value);
Item(const Item& rhs) : mName(rhs.mName), mValue(rhs.mValue) {}
Item(Item&& rhs) : mName(std::move(rhs.mName)), mValue(std::move(rhs.mValue)) {}
Item& operator=(const Item& rhs)
{
if (this != &rhs)
{
mName = rhs.mName;
mValue = rhs.mValue;
}
return *this;
}
Item& operator=(Item&& rhs)
{
if (this != &rhs)
{
mName = std::move(rhs.mName);
mValue = std::move(rhs.mValue);
}
return *this;
}
const string& name() const { return mName; }
const string& value() const { return mValue; }
void value(const string& v) { mValue = v; }
bool empty() const { return mValue.empty(); }
size_t length() const { return mValue.length(); }
const char* c_str() const { return mValue.c_str(); }
private:
string mName;
string mValue;
};
template<typename T>
inline
Item::Item(const string& name, const T& value)
: mName(name), mValue(boost::lexical_cast<string>(value))
{
}
template<>
inline
Item::Item(const string& name, const string& value)
: mName(name), mValue(value)
{
}
// --------------------------------------------------------------------
// class datablock acts as an STL container for Category objects
class Datablock
{
public:
friend class File;
typedef std::list<Category> CategoryList;
typedef CategoryList::iterator iterator;
typedef CategoryList::const_iterator const_iterator;
Datablock(const string& name);
~Datablock();
Datablock(const Datablock&) = delete;
Datablock& operator=(const Datablock&) = delete;
string getName() const { return mName; }
void setName(const string& n) { mName = n; }
string firstItem(const string& tag) const;
iterator begin() { return mCategories.begin(); }
iterator end() { return mCategories.end(); }
const_iterator begin() const { return mCategories.begin(); }
const_iterator end() const { return mCategories.end(); }
Category& operator[](const string& name);
std::tuple<iterator,bool> emplace(const std::string& name);
void validate();
void setValidator(Validator* v);
// this one only looks up a Category, returns nullptr if it does not exist
Category* get(const string& name);
void getTagOrder(vector<string>& tags) const;
private:
void write(std::ostream& os);
void write(std::ostream& os, const vector<string>& order);
std::list<Category> mCategories;
string mName;
Validator* mValidator;
Datablock* mNext;
};
// --------------------------------------------------------------------
// class Row acts as a container for Item objects, It has a more useful
// interface for accessing the contained columns. The get() method
// returns a RowResult object that can be used to access only a subset
// of column values by index or by name.
namespace detail
{
// ItemReference is a helper class
struct ItemReference
{
const char* mName;
ItemRow* mRow;
template<typename T>
ItemReference& operator=(const T& value)
{
this->operator=(boost::lexical_cast<string>(value));
return *this;
}
// operator string() const { return c_str(); }
template<typename T>
T as() const
{
T result = 0;
if (not empty())
result = boost::lexical_cast<T>(c_str());
return result;
}
template<typename T>
int compare(const T& value) const
{
int result = 0;
try
{
double v = boost::lexical_cast<T>(c_str());
if (v < value)
result = -1;
else if (v > value)
result = 1;
}
catch (...)
{
if (VERBOSE)
std::cerr << "conversion error in compare for '" << c_str() << '\'' << std::endl;
result = 1;
}
return result;
}
bool empty() const;
// bool unapplicable() const;
const char* c_str() const;
bool operator!=(const string& s) const { return s != c_str(); }
bool operator==(const string& s) const { return s == c_str(); }
};
template<>
inline
string ItemReference::as<string>() const
{
return string(c_str());
}
template<>
inline
const char* ItemReference::as<const char*>() const
{
return c_str();
}
template<>
inline
int ItemReference::compare<string>(const string& value) const
{
return icompare(c_str(), value.c_str());
}
template<>
inline
int ItemReference::compare(const char* const& value) const
{
return cif::icompare(c_str(), value);
}
inline std::ostream& operator<<(std::ostream& os, const ItemReference& rhs)
{
os << rhs.c_str();
return os;
}
template<>
ItemReference& ItemReference::operator=(const string& value);
// some helper classes to help create tuple result types
template<typename...> struct tupleCatter;
template<typename... Ts>
struct tupleCatter<std::tuple<Ts...>>
{
typedef std::tuple<Ts...> type;
};
template<typename... T1s, typename... T2s, typename... Rem>
struct tupleCatter<std::tuple<T1s...>, std::tuple<T2s...>, Rem...>
{
typedef typename tupleCatter<std::tuple<T1s..., T2s...>, Rem...>::type type;
};
template<typename...> struct colGetter;
template<typename T>
struct colGetter<T>
{
typedef std::tuple<const ItemReference> type;
template<typename Res>
static type get(Res& rs)
{
size_t index = Res::N - 1;
return std::tuple<const ItemReference>{ rs[index] };
}
};
template<typename T, typename... Ts>
struct colGetter<T, Ts...>
{
typedef colGetter<Ts...> next;
typedef typename tupleCatter<std::tuple<const ItemReference>, typename next::type>::type type;
template<typename Res>
static type get(Res& rs)
{
typedef colGetter<Ts...> next;
size_t index = Res::N - 1 - sizeof...(Ts);
return std::tuple_cat(std::tuple<const ItemReference>{ rs[index]}, next::get(rs));
}
};
template<typename... C>
struct getRowResult
{
enum { N = sizeof...(C) };
typedef typename colGetter<C...>::type tupleType;
// const ItemReference operator[](const string& col) const
// {
// return mRow[col];
// }
const ItemReference operator[](size_t ix) const
{
return mRow[mColumns[ix]];
}
getRowResult(Row& r, C... columns)
: mRow(r), mColumns({{columns...}}) {}
Row& mRow;
std::array<const char*, N> mColumns;
};
// we want to be able to tie some variables to a RowResult, for this we use tiewraps
template<int IX, typename... Ts>
struct tieWrap;
template<int IX, typename T>
struct tieWrap<IX,T>
{
tieWrap(T& t)
: mVal(t) {}
template<typename Res>
void operator=(const Res& rr)
{
typedef typename std::remove_reference<T>::type basicType;
const ItemReference v = rr[IX];
basicType tv = v.as<basicType>();
mVal = tv;
}
T& mVal;
};
template<int IX, typename T, typename... Ts>
struct tieWrap<IX, T, Ts...>
{
typedef tieWrap<IX + 1, Ts...> next;
tieWrap(T& t, Ts&... ts)
: mVal(t), mNext(ts...) {}
template<typename Res>
void operator=(const Res& rr)
{
typedef typename std::remove_reference<T>::type basicType;
const ItemReference v = rr[IX];
basicType tv = v.as<basicType>();
mVal = tv;
mNext.operator=(rr);
}
T& mVal;
next mNext;
};
}
template<typename... Ts>
auto tie(Ts&... v) -> detail::tieWrap<0, Ts...>
{
return detail::tieWrap<0, Ts...>(v...);
}
class Row
{
public:
friend class Category;
friend class catIndex;
friend class RowComparator;
friend struct detail::ItemReference;
Row(ItemRow* data = nullptr) : mData(data) {}
Row(const Row& rhs);
Row& operator=(const Row& rhs);
struct const_iterator : public std::iterator<std::forward_iterator_tag, const Item>
{
typedef std::iterator<std::forward_iterator_tag, Item> baseType;
typedef typename baseType::pointer pointer;
typedef typename baseType::reference reference;
const_iterator(ItemRow* data, ItemValue* ptr);
reference operator*() { return mCurrent; }
pointer operator->() { return &mCurrent; }
const_iterator& operator++();
const_iterator operator++(int) { const_iterator result(*this); this->operator++(); return result; }
bool operator==(const const_iterator& rhs) const { return mPtr == rhs.mPtr; }
bool operator!=(const const_iterator& rhs) const { return mPtr != rhs.mPtr; }
private:
void fetch();
ItemRow* mData;
ItemValue* mPtr;
Item mCurrent;
};
// checks for an initialized Row:
operator bool() const { return mData != nullptr; }
bool empty() const;
const_iterator begin() const;
const_iterator end() const;
// TODO: implement real const version?
const detail::ItemReference operator[](const char* ItemTag) const
{
return detail::ItemReference{ItemTag, mData};
}
detail::ItemReference operator[](const char* ItemTag)
{
return detail::ItemReference{ItemTag, mData};
}
const detail::ItemReference operator[](const string& ItemTag) const
{
return detail::ItemReference{ItemTag.c_str(), mData};
}
detail::ItemReference operator[](const string& ItemTag)
{
return detail::ItemReference{ItemTag.c_str(), mData};
}
template<typename... C>
auto get(C... columns) -> detail::getRowResult<C...>
{
return detail::getRowResult<C...>(*this, columns...);
}
bool operator==(const Row& rhs) const
{
return mData == rhs.mData;
}
ItemRow* data() const { return mData; }
void swap(Row& rhs)
{
std::swap(mData, rhs.mData);
}
private:
void assign(const string& name, const string& value, bool emplacing);
void assign(const Item& i, bool emplacing);
ItemRow* mData;
};
// swap for Rows is defined below
// --------------------------------------------------------------------
// some more templates to be able to do querying
namespace detail
{
struct ConditionImpl
{
virtual ~ConditionImpl() {}
virtual bool test(const Category& c, const Row& r) const = 0;
virtual std::string str() const = 0;
};
}
struct Condition
{
Condition(detail::ConditionImpl* impl) : mImpl(impl) {}
Condition(Condition&& rhs)
: mImpl(nullptr)
{
std::swap(mImpl, rhs.mImpl);
}
Condition& operator=(Condition&& rhs)
{
std::swap(mImpl, rhs.mImpl);
return *this;
}
~Condition()
{
delete mImpl;
}
bool operator()(const Category& c, const Row& r) const
{
assert(mImpl);
return mImpl->test(c, r);
}
std::string str() const
{
return mImpl->str();
}
detail::ConditionImpl* mImpl;
};
namespace detail
{
template<typename T>
struct KeyIsConditionImpl : public ConditionImpl
{
typedef T valueType;
KeyIsConditionImpl(const string& ItemTag, const valueType& value)
: mItemTag(ItemTag), mValue(value) {}
virtual bool test(const Category& c, const Row& r) const
{
return r[mItemTag].template compare<valueType>(mValue) == 0;
}
virtual std::string str() const
{
return mItemTag + " == " + boost::lexical_cast<std::string>(mValue);
}
string mItemTag;
valueType mValue;
};
template<typename T>
struct KeyIsNotConditionImpl : public ConditionImpl
{
typedef T valueType;
KeyIsNotConditionImpl(const string& ItemTag, const valueType& value)
: mItemTag(ItemTag), mValue(value) {}
virtual bool test(const Category& c, const Row& r) const
{
return r[mItemTag].template compare<valueType>(mValue) != 0;
}
virtual std::string str() const
{
return mItemTag + " != " + boost::lexical_cast<std::string>(mValue);
}
string mItemTag;
valueType mValue;
};
template<typename COMP>
struct KeyCompareConditionImpl : public ConditionImpl
{
KeyCompareConditionImpl(const string& ItemTag, COMP&& comp)
: mItemTag(ItemTag), mComp(std::move(comp)) {}
virtual bool test(const Category& c, const Row& r) const
{
return mComp(c, r);
}
virtual std::string str() const
{
return mItemTag + " compare " /*+ boost::lexical_cast<std::string>(mValue)*/;
}
string mItemTag;
COMP mComp;
};
struct KeyMatchesConditionImpl : public ConditionImpl
{
KeyMatchesConditionImpl(const string& ItemTag, const std::regex& rx)
: mItemTag(ItemTag), mRx(rx) {}
virtual bool test(const Category& c, const Row& r) const
{
return std::regex_match(r[mItemTag].as<string>(), mRx);
}
virtual std::string str() const
{
return mItemTag + " ~= " + "<rx>";
}
string mItemTag;
std::regex mRx;
};
template<typename T>
struct anyIsConditionImpl : public ConditionImpl
{
typedef T valueType;
anyIsConditionImpl(const valueType& value)
: mValue(value) {}
virtual bool test(const Category& c, const Row& r) const;
virtual std::string str() const
{
return "any == " + boost::lexical_cast<std::string>(mValue);
}
valueType mValue;
};
struct anyMatchesConditionImpl : public ConditionImpl
{
anyMatchesConditionImpl(const std::regex& rx)
: mRx(rx) {}
virtual bool test(const Category& c, const Row& r) const;
virtual std::string str() const
{
return "any ~= <rx>";
}
std::regex mRx;
};
struct andConditionImpl : public ConditionImpl
{
andConditionImpl(Condition&& a, Condition&& b)
: mA(nullptr), mB(nullptr)
{
std::swap(mA, a.mImpl);
std::swap(mB, b.mImpl);
}
~andConditionImpl()
{
delete mA;
delete mB;
}
virtual bool test(const Category& c, const Row& r) const
{
return mA->test(c, r) and mB->test(c, r);
}
virtual std::string str() const
{
return "(" + mA->str() + ") and (" + mB->str() + ")";
}
ConditionImpl* mA;
ConditionImpl* mB;
};
struct orConditionImpl : public ConditionImpl
{
orConditionImpl(Condition&& a, Condition&& b)
: mA(nullptr), mB(nullptr)
{
std::swap(mA, a.mImpl);
std::swap(mB, b.mImpl);
}
~orConditionImpl()
{
delete mA;
delete mB;
}
virtual bool test(const Category& c, const Row& r) const
{
return mA->test(c, r) or mB->test(c, r);
}
virtual std::string str() const
{
return "(" + mA->str() + ") or (" + mB->str() + ")";
}
ConditionImpl* mA;
ConditionImpl* mB;
};
}
inline Condition operator&&(Condition&& a, Condition&& b)
{
return Condition(new detail::andConditionImpl(std::move(a), std::move(b)));
}
inline Condition operator||(Condition&& a, Condition&& b)
{
return Condition(new detail::orConditionImpl(std::move(a), std::move(b)));
}
struct Key
{
Key(const string& ItemTag) : mItemTag(ItemTag) {}
Key(const char* ItemTag) : mItemTag(ItemTag) {}
template<typename T>
Condition operator==(const T& v) const
{
return Condition(new detail::KeyIsConditionImpl<T>(mItemTag, v));
}
Condition operator==(const char* v) const
{
string value(v ? v : "");
return Condition(new detail::KeyIsConditionImpl<std::string>(mItemTag, value));
}
template<typename T>
Condition operator!=(const T& v) const
{
return Condition(new detail::KeyIsNotConditionImpl<T>(mItemTag, v));
}
Condition operator!=(const char* v) const
{
string value(v ? v : "");
return Condition(new detail::KeyIsNotConditionImpl<std::string>(mItemTag, value));
}
template<typename T>
Condition operator>(const T& v) const
{
auto comp = [this, v](const Category& c, const Row& r) -> bool { return r[this->mItemTag].as<T>() > v; };
return Condition(new detail::KeyCompareConditionImpl<decltype(comp)>(mItemTag, std::move(comp)));
}
template<typename T>
Condition operator>=(const T& v) const
{
auto comp = [this, v](const Category& c, const Row& r) -> bool { return r[this->mItemTag].as<T>() >= v; };
return Condition(new detail::KeyCompareConditionImpl<decltype(comp)>(mItemTag, std::move(comp)));
}
template<typename T>
Condition operator<(const T& v) const
{
auto comp = [this, v](const Category& c, const Row& r) -> bool { return r[this->mItemTag].as<T>() < v; };
return Condition(new detail::KeyCompareConditionImpl<decltype(comp)>(mItemTag, std::move(comp)));
}
template<typename T>
Condition operator<=(const T& v) const
{
auto comp = [this, v](const Category& c, const Row& r) -> bool { return r[this->mItemTag].as<T>() <= v; };
return Condition(new detail::KeyCompareConditionImpl<decltype(comp)>(mItemTag, std::move(comp)));
}
string mItemTag;
};
template<>
inline
Condition Key::operator==(const std::regex& rx) const
{
return Condition(new detail::KeyMatchesConditionImpl(mItemTag, rx));
}
struct any
{
template<typename T>
Condition operator==(const T& v) const
{
return Condition(new detail::anyIsConditionImpl<T>(v));
}
};
template<>
inline
Condition any::operator==(const std::regex& rx) const
{
return Condition(new detail::anyMatchesConditionImpl(rx));
}
// --------------------------------------------------------------------
// class RowSet is used to return find results. Use it to re-order the results
// or to group them
class RowSet : public vector<Row>
{
public:
RowSet(Category& cat);
RowSet& orderBy(const string& Item)
{ return orderBy({ Item }); }
RowSet& orderBy(std::initializer_list<string> Items);
private:
Category& mCat;
};
// --------------------------------------------------------------------
// class Category acts as an STL container for Row objects
class Category
{
public:
friend class Datablock;
friend class Row;
friend struct detail::ItemReference;
Category(Datablock& db, const string& name, Validator* Validator);
Category(const Category&) = delete;
Category& operator=(const Category&) = delete;
~Category();
const string name() const { return mName; }
const detail::ItemReference getFirstItem(const char* ItemName) const;
struct iterator : public std::iterator<std::forward_iterator_tag, Row>
{
friend class Category;
typedef std::iterator<std::forward_iterator_tag, Row> baseType;
typedef typename baseType::pointer pointer;
typedef typename baseType::reference reference;
iterator(ItemRow* data) : mCurrent(data) {}
reference operator*() { return mCurrent; }
pointer operator->() { return &mCurrent; }
iterator& operator++();
iterator operator++(int) { iterator result(*this); this->operator++(); return result; }
bool operator==(const iterator& rhs) const { return mCurrent == rhs.mCurrent; }
bool operator!=(const iterator& rhs) const { return not (mCurrent == rhs.mCurrent); }
private:
Row mCurrent;
};
iterator begin();
iterator end();
bool empty() const;
size_t size() const;
void clear();
Row front() { return Row(mHead); }
Row back() { return Row(mTail); }
Row operator[](Condition&& cond);
RowSet find(Condition&& cond);
bool exists(Condition&& cond);
RowSet orderBy(const string& Item)
{ return orderBy({ Item }); }
RowSet orderBy(std::initializer_list<string> Items);
std::tuple<Row,bool> emplace(Item value) { return emplace({ value }); }
std::tuple<Row,bool> emplace(std::initializer_list<Item> values)
{ return emplace(values.begin(), values.end()); }
std::tuple<Row,bool> emplace(Row r);
template<class Iter>
std::tuple<Row,bool> emplace(Iter b, Iter e);
void erase(Condition&& cond);
void erase(Row r);
void erase(iterator ri);
void validate();
const Validator& getValidator() const;
const ValidateCategory* getCatValidator() const { return mCatValidator; }
void setValidator(Validator* v);
iset fields() const;
iset mandatoryFields() const;
iset keyFields() const;
void drop(const string& field);
void getTagOrder(vector<string>& tags) const;
// return index for known column, or the next available column index
size_t getColumnIndex(const string& name) const;
const string& getColumnName(size_t columnIndex) const;
void reorderByIndex();
private:
void write(std::ostream& os);
void write(std::ostream& os, const vector<string>& order);
void write(std::ostream& os, const vector<int>& order, bool includeEmptyColumns);
size_t addColumn(const string& name);
Datablock& mDb;
string mName;
Validator* mValidator;
const ValidateCategory* mCatValidator = nullptr;
vector<ItemColumn> mColumns;
ItemRow* mHead;
ItemRow* mTail;
class catIndex* mIndex;
};
// --------------------------------------------------------------------
class File
{
public:
friend class parser;
friend class Validator;
File();
File(std::istream& is, bool validate = false);
File(File&& rhs);
File(const File& rhs) = delete;
File& operator=(const File& rhs) = delete;
~File();
void load(std::istream& is);
void save(std::ostream& os);
void save(std::ostream& os, const vector<string>& order) { write(os, order); }
void write(std::ostream& os, const vector<string>& order);
void loadDictionary(); // load the default dictionary, that is mmcifDdl in this case
void loadDictionary(const char* dict); // load one of the compiled in dictionaries
void loadDictionary(std::istream& is); // load dictionary from input stream
void validate();
Datablock& firstDatablock() { return *mHead; }
void append(Datablock* e);
Datablock& operator[](const string& name);
struct iterator : public std::iterator<std::forward_iterator_tag, Datablock>
{
typedef std::iterator<std::forward_iterator_tag, Datablock> baseType;
typedef typename baseType::pointer pointer;
typedef typename baseType::reference reference;
iterator(Datablock* db) : mCurrent(db) {}
reference operator*() { return *mCurrent; }
pointer operator->() { return mCurrent; }
iterator& operator++();
iterator operator++(int) { iterator result(*this); this->operator++(); return result; }
bool operator==(const iterator& rhs) const { return mCurrent == rhs.mCurrent; }
bool operator!=(const iterator& rhs) const { return not (mCurrent == rhs.mCurrent); }
private:
Datablock* mCurrent;
};
iterator begin() const;
iterator end() const;
const Validator& getValidator() const;
void getTagOrder(vector<string>& tags) const;
private:
void setValidator(Validator* v);
Datablock* mHead;
Validator* mValidator;
};
// --------------------------------------------------------------------
// some postponed inlines
namespace detail
{
template<typename T>
inline
bool anyIsConditionImpl<T>::test(const Category& c, const Row& r) const
{
bool result = false;
for (auto& f: c.fields())
{
try
{
if (r[f].as<valueType>() == mValue)
{
result = true;
break;
}
}
catch (...) {}
}
return result;
}
inline bool anyMatchesConditionImpl::test(const Category& c, const Row& r) const
{
bool result = false;
for (auto& f: c.fields())
{
try
{
if (std::regex_match(r[f].as<string>(), mRx))
{
result = true;
break;
}
}
catch (...) {}
}
return result;
}
}
}
namespace std
{
template<>
inline void swap(cif::Row& a, cif::Row& b)
{
a.swap(b);
}
}
#pragma once
#include "cif++/Cif++.h"
void WritePDBFile(std::ostream& pdbFile, cif::File& cifFile);
// CIF parser
// CIF Parser
#include "libcif/cif++.h"
#include "cif++/Cif++.h"
#include <stack>
......@@ -9,10 +9,10 @@ namespace cif
// --------------------------------------------------------------------
class cif_parser_error : public std::runtime_error
class CifParserError : public std::runtime_error
{
public:
cif_parser_error(uint32 line_nr, const std::string& message);
CifParserError(uint32 lineNr, const std::string& message);
};
// --------------------------------------------------------------------
......@@ -28,38 +28,38 @@ enum CharTraitsMask: uint8 {
kAnyPrintMask = 1 << 3
};
inline bool is_white(int ch)
inline bool isWhite(int ch)
{
return std::isspace(ch) or ch == '#';
}
inline bool is_ordinary(int ch)
inline bool isOrdinary(int ch)
{
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kOrdinaryMask) != 0;
}
inline bool is_non_blank(int ch)
inline bool isNonBlank(int ch)
{
return ch > 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kNonBlankMask) != 0;
}
inline bool is_text_lead(int ch)
inline bool isTextLead(int ch)
{
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kTextLeadMask) != 0;
}
inline bool is_any_print(int ch)
inline bool isAnyPrint(int ch)
{
return ch == '\t' or
(ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
}
inline bool is_unquoted_string(const char* s)
inline bool isUnquotedString(const char* s)
{
bool result = is_ordinary(*s++);
bool result = isOrdinary(*s++);
while (result and *s != 0)
{
result = is_non_blank(*s);
result = isNonBlank(*s);
++s;
}
return result;
......@@ -67,16 +67,16 @@ inline bool is_unquoted_string(const char* s)
// --------------------------------------------------------------------
std::tuple<std::string,std::string> split_tag_name(const std::string& tag);
std::tuple<std::string,std::string> splitTagName(const std::string& tag);
// --------------------------------------------------------------------
// sac parser, analogous to SAX parser (simple api for xml)
// sac Parser, analogous to SAX Parser (simple api for xml)
class sac_parser
class SacParser
{
public:
sac_parser(std::istream& is);
virtual ~sac_parser() {}
SacParser(std::istream& is);
virtual ~SacParser() {}
enum CIFToken
{
......@@ -108,30 +108,30 @@ class sac_parser
static const char* kValueName[];
int get_next_char();
int getNextChar();
void retract();
void restart();
CIFToken get_next_token();
CIFToken getNextToken();
void match(CIFToken token);
void parse_file();
void parse_global();
void parse_data_block();
void parseFile();
void parseGlobal();
void parseDataBlock();
virtual void parse_save_frame();
virtual void parseSaveFrame();
void parse_dictionary();
void parseDictionary();
void error(const std::string& msg);
// production methods, these are pure virtual here
virtual void produce_datablock(const std::string& name) = 0;
virtual void produce_category(const std::string& name) = 0;
virtual void produce_row() = 0;
virtual void produce_item(const std::string& category, const std::string& item, const string& value) = 0;
virtual void produceDatablock(const std::string& name) = 0;
virtual void produceCategory(const std::string& name) = 0;
virtual void produceRow() = 0;
virtual void produceItem(const std::string& category, const std::string& item, const string& value) = 0;
protected:
......@@ -153,60 +153,60 @@ class sac_parser
eStateValue = 300
};
std::istream& m_data;
// parser state
bool m_validate;
uint32 m_line_nr;
bool m_bol;
int m_state, m_start;
CIFToken m_lookahead;
std::string m_token_value;
CIFValueType m_token_type;
std::stack<int> m_buffer;
std::istream& mData;
// Parser state
bool mValidate;
uint32 mLineNr;
bool mBol;
int mState, mStart;
CIFToken mLookahead;
std::string mTokenValue;
CIFValueType mTokenType;
std::stack<int> mBuffer;
};
// --------------------------------------------------------------------
class parser : public sac_parser
class Parser : public SacParser
{
public:
parser(std::istream& is, file& f);
Parser(std::istream& is, File& f);
virtual void produce_datablock(const std::string& name);
virtual void produce_category(const std::string& name);
virtual void produce_row();
virtual void produce_item(const std::string& category, const std::string& item, const std::string& value);
virtual void produceDatablock(const std::string& name);
virtual void produceCategory(const std::string& name);
virtual void produceRow();
virtual void produceItem(const std::string& category, const std::string& item, const std::string& value);
protected:
file& m_file;
datablock* m_data_block;
datablock::iterator m_cat;
row m_row;
File& mFile;
Datablock* mDataBlock;
Datablock::iterator mCat;
Row mRow;
};
// --------------------------------------------------------------------
class dict_parser : public parser
class DictParser : public Parser
{
public:
dict_parser(validator& validator, std::istream& is);
~dict_parser();
DictParser(Validator& validator, std::istream& is);
~DictParser();
void load_dictionary();
void loadDictionary();
private:
virtual void parse_save_frame();
virtual void parseSaveFrame();
bool collect_item_types();
void link_items();
bool collectItemTypes();
void linkItems();
validator& m_validator;
file m_file;
struct dict_parser_data_impl* m_impl;
bool m_collected_item_types = false;
Validator& mValidator;
File mFile;
struct DictParserDataImpl* mImpl;
bool mCollectedItemTypes = false;
};
}
......@@ -5,7 +5,7 @@
#include <vector>
#include <set>
#include "libcif/config.h"
#include "cif++/Config.h"
namespace cif
{
......@@ -19,8 +19,8 @@ int icompare(const std::string& a, const std::string& b);
bool iequals(const char* a, const char* b);
int icompare(const char* a, const char* b);
void to_lower(std::string& s);
std::string to_lower_copy(const std::string& s);
void toLower(std::string& s);
std::string toLowerCopy(const std::string& s);
// To make life easier, we also define iless and iset using iequals
......@@ -46,11 +46,11 @@ inline char tolower(char ch)
// --------------------------------------------------------------------
std::tuple<std::string,std::string> split_tag_name(const std::string& tag);
std::tuple<std::string,std::string> splitTagName(const std::string& tag);
// --------------------------------------------------------------------
// custom wordwrapping routine
std::vector<std::string> word_wrap(const std::string& text, unsigned int width);
std::vector<std::string> wordWrap(const std::string& text, unsigned int width);
}
// cif parsing library
#include "cif++/Cif++.h"
#include <boost/filesystem/path.hpp>
// the std regex of gcc is crashing....
#include <boost/regex.hpp>
#include <set>
namespace cif
{
struct ValidateCategory;
// --------------------------------------------------------------------
class ValidationError : public std::exception
{
public:
ValidationError(const std::string& msg) : mMsg(msg) {}
const char* what() const noexcept { return mMsg.c_str(); }
std::string mMsg;
};
// --------------------------------------------------------------------
enum DDL_PrimitiveType
{
ptChar, ptUChar, ptNumb
};
DDL_PrimitiveType mapToPrimitiveType(const std::string& s);
struct ValidateType
{
std::string mName;
DDL_PrimitiveType mPrimitiveType;
boost::regex mRx;
bool operator<(const ValidateType& rhs) const
{
return icompare(mName, rhs.mName) < 0;
}
// compare values based on type
// int compare(const std::string& a, const std::string& b) const
// {
// return compare(a.c_str(), b.c_str());
// }
int compare(const char* a, const char* b) const;
};
struct ValidateItem
{
std::string mTag;
bool mMandatory;
const ValidateType* mType;
cif::iset mEnums;
ValidateItem* mParent = nullptr;
std::set<ValidateItem*>
mChildren;
ValidateCategory* mCategory = nullptr;
std::set<ValidateItem*>
mForeignKeys;
void setParent(ValidateItem* parent);
bool operator<(const ValidateItem& rhs) const
{
return icompare(mTag, rhs.mTag) < 0;
}
bool operator==(const ValidateItem& rhs) const
{
return iequals(mTag, rhs.mTag);
}
void operator()(std::string value) const;
};
struct ValidateCategory
{
std::string mName;
std::vector<string> mKeys;
cif::iset mGroups;
cif::iset mMandatoryFields;
std::set<ValidateItem> mItemValidators;
bool operator<(const ValidateCategory& rhs) const
{
return icompare(mName, rhs.mName) < 0;
}
void addItemValidator(ValidateItem&& v);
const ValidateItem* getValidatorForItem(std::string tag) const;
const std::set<ValidateItem>& itemValidators() const
{
return mItemValidators;
}
};
// --------------------------------------------------------------------
class Validator
{
public:
friend class DictParser;
Validator();
~Validator();
Validator(const Validator& rhs) = delete;
Validator& operator=(const Validator& rhs) = delete;
Validator(Validator&& rhs);
Validator& operator=(Validator&& rhs);
void addTypeValidator(ValidateType&& v);
const ValidateType* getValidatorForType(std::string typeCode) const;
void addCategoryValidator(ValidateCategory&& v);
const ValidateCategory* getValidatorForCategory(std::string category) const;
void reportError(const std::string& msg);
std::string dictName() const { return mName; }
void dictName(const std::string& name) { mName = name; }
std::string dictVersion() const { return mVersion; }
void dictVersion(const std::string& version) { mVersion = version; }
private:
// name is fully qualified here:
ValidateItem* getValidatorForItem(std::string name) const;
std::string mName;
std::string mVersion;
bool mStrict = false;
// std::set<uint32> mSubCategories;
std::set<ValidateType> mTypeValidators;
std::set<ValidateCategory> mCategoryValidators;
};
}
// Lib for working with structures as contained in mmCIF and PDB files
#pragma once
#include <set>
#include <tuple>
#include <vector>
#include <map>
#include "libcif++/AtomType.h"
namespace libcif
{
// --------------------------------------------------------------------
// The chemical composition of the structure in an mmCIF file is
// defined in the class composition. A compositon consists of
// entities. Each Entity can be either a polymer, a non-polymer
// a macrolide or a water molecule.
// Entities themselves are made up of compounds. And compounds
// contain CompoundAtom records for each atom.
class Composition;
class Entity;
class Compound;
struct CompoundAtom;
// --------------------------------------------------------------------
// struct containing information about an atom in a chemical compound
// This information comes from the CCP4 monomer library.
struct CompoundAtom
{
std::string id;
AtomType typeSymbol;
std::string typeEnergy;
float partialCharge;
};
// --------------------------------------------------------------------
// a class that contains information about a chemical compound.
// This information is derived from the ccp4 monomer library by default.
// To create compounds, you'd best use the factory method.
class Compound
{
public:
Compound(const std::string& id, const std::string& name,
const std::string& group, std::vector<CompoundAtom>&& atoms,
std::map<std::tuple<std::string,std::string>,float>&& bonds)
: mId(id), mName(name), mGroup(group)
, mAtoms(std::move(atoms)), mBonds(std::move(bonds))
{
}
~Compound();
// factory method, create a Compound based on the three letter code
// (for amino acids) or the one-letter code (for bases) or the
// code as it is known in the CCP4 monomer library.
static const Compound* create(const std::string& id);
// this second factory method can create a Compound even if it is not
// recorded in the library. It will take the values from the CCP4 lib
// unless the value passed to this function is not empty.
static const Compound* create(const std::string& id, const std::string& name,
const std::string& type, const std::string& formula);
// add an additional path to the monomer library.
static void addMonomerLibraryPath(const std::string& dir);
// accessors
std::string id() const { return mId; }
std::string name() const { return mName; }
std::string type() const;
// std::string group() const { return mGroup; }
std::vector<CompoundAtom> atoms() const { return mAtoms; }
CompoundAtom getAtomById(const std::string& atomId) const;
bool atomsBonded(const std::string& atomId_1, const std::string& atomId_2) const;
float atomBondValue(const std::string& atomId_1, const std::string& atomId_2) const;
std::string formula() const;
float formulaWeight() const;
int charge() const;
bool isWater() const;
private:
// Entity& mEntity;
std::string mId;
std::string mName;
std::string mGroup;
std::vector<CompoundAtom> mAtoms;
std::map<std::tuple<std::string,std::string>,float> mBonds;
};
// --------------------------------------------------------------------
// an Entity. This is a base class for PolymerEntity and NonPolyEntity
// The latter can be either a regular non-polymer (residue), a macrolide or
// water.
class Entity
{
public:
Entity(const std::string& id, const std::string& type, const std::string& description);
virtual ~Entity();
std::string id() const;
std::string type() const;
std::string description() const;
virtual float formulaWeight() const = 0;
private:
std::string mId;
std::string mType;
std::string mDescription;
};
// --------------------------------------------------------------------
// A polymer Entity
class PolymerEntity : public Entity
{
public:
PolymerEntity(const std::string& id, const std::string& description);
~PolymerEntity();
std::string seqOneLetterCode(bool cannonical) const;
std::string pdbxStrandId() const;
virtual float formulaWeight() const;
class monomer
{
public:
friend class PolymerEntity;
size_t num() const; // sequence number
bool hetero() const; // whether this position contains alternate Compounds
const Compound& comp(size_t altNr) const; // the chemical Compound of this monomer
private:
monomer* mNext;
monomer* mAlt;
size_t mNum;
Compound* mComp;
};
class iterator : public std::iterator<std::forward_iterator_tag, const monomer>
{
public:
typedef std::iterator<std::forward_iterator_tag, const monomer> baseType;
typedef baseType::reference reference;
typedef baseType::pointer pointer;
iterator(monomer* monomer = nullptr)
: mCursor(monomer) {}
iterator(const iterator& rhs)
: mCursor(rhs.mCursor)
{
}
iterator& operator=(const iterator& rhs)
{
mCursor = rhs.mCursor;
return *this;
}
reference operator*() { return *mCursor; }
pointer operator->() { return mCursor; }
iterator& operator++() { mCursor = mCursor->mNext; return *this; }
iterator operator++(int)
{
iterator tmp(*this);
operator++();
return tmp;
}
bool operator==(const iterator& rhs) const { return mCursor == rhs.mCursor; }
bool operator!=(const iterator& rhs) const { return mCursor != rhs.mCursor; }
private:
monomer* mCursor;
};
iterator begin() const { return iterator(mSeq); }
iterator end() const { return iterator(); }
const monomer& operator[](size_t index) const;
private:
Entity& mEntity;
monomer* mSeq;
};
// --------------------------------------------------------------------
// nonPoly Entity
class NonPolyEntity : public Entity
{
public:
NonPolyEntity(const std::string& id, const std::string& type, const std::string& description);
~NonPolyEntity();
Compound& comp() const;
virtual float formulaWeight() const;
private:
Compound* mCompound;
};
}
#pragma once
#include "libcif++/cif++.h"
// --------------------------------------------------------------------
struct PDBRecord
{
PDBRecord* mNext;
uint32 mLineNr;
char mName[11];
size_t mVlen;
char mValue[0];
PDBRecord(uint32 lineNr, const std::string& name, const std::string& value);
~PDBRecord();
void* operator new(size_t);
void* operator new(size_t size, size_t vLen);
void operator delete(void* p);
bool is(const char* name) const;
char vC(size_t column);
std::string vS(size_t columnFirst, size_t columnLast = std::numeric_limits<size_t>::max());
int vI(int columnFirst, int columnLast);
std::string vF(size_t columnFirst, size_t columnLast);
};
// --------------------------------------------------------------------
void ReadPDBFile(std::istream& pdbFile, cif::file& cifFile);
......@@ -11,15 +11,15 @@ class Remark3Parser
public:
virtual ~Remark3Parser() {}
static bool Parse(const std::string& expMethod, PDBRecord* r, cif::datablock& db);
static bool parse(const std::string& expMethod, PDBRecord* r, cif::datablock& db);
virtual std::string Program();
virtual std::string Version();
virtual std::string program();
virtual std::string version();
protected:
Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::datablock& db,
const TemplateLine templatelines[], uint32 templateLineCount, std::regex program_version);
const TemplateLine templatelines[], uint32 templateLineCount, std::regex programVersion);
virtual float Parse();
std::string NextLine();
......@@ -31,17 +31,17 @@ class Remark3Parser
virtual void Fixup() {}
std::string m_name;
std::string m_expMethod;
PDBRecord* m_rec;
cif::datablock m_db;
std::string m_line;
std::smatch m_m;
uint32 m_state;
const TemplateLine* m_template;
uint32 m_templateCount;
std::regex m_program_version;
std::string mName;
std::string mExpMethod;
PDBRecord* mRec;
cif::datablock mDb;
std::string mLine;
std::smatch mM;
uint32 mState;
const TemplateLine* mTemplate;
uint32 mTemplateCount;
std::regex mProgramVersion;
};
#pragma once
#include <map>
#include <string>
#include <boost/filesystem/path.hpp>
extern const std::map<std::string,char> kAAMap, kBaseMap;
class PeptideDB
{
public:
static PeptideDB& Instance();
void PushDictionary(boost::filesystem::path dict);
void PopDictionary();
bool IsKnownPeptide(const std::string& res_name) const;
bool IsKnownBase(const std::string& res_name) const;
std::string GetNameForResidue(const std::string& res_name) const;
std::string GetFormulaForResidue(const std::string& res_name) const;
std::string Unalias(const std::string& res_name) const;
private:
PeptideDB();
~PeptideDB();
PeptideDB(const PeptideDB&) = delete;
PeptideDB& operator=(const PeptideDB&) = delete;
struct PeptideDBImpl* mImpl;
static PeptideDB* sInstance;
};
......@@ -2,7 +2,7 @@
#pragma once
#include <libcif/config.h>
#include "cif++/Config.h"
#include <boost/filesystem/operations.hpp>
#include <boost/math/quaternion.hpp>
......@@ -19,76 +19,76 @@ const long double
// --------------------------------------------------------------------
// point, a location with x, y and z coordinates as float.
// Point, a location with x, y and z coordinates as float.
// This one is derived from a tuple<float,float,float> so
// you can do things like:
//
// float x, y, z;
// tie(x, y, z) = atom.loc();
struct point : public std::tuple<float,float,float>
struct Point : public std::tuple<float,float,float>
{
typedef std::tuple<float,float,float> base_type;
point() : base_type(0.f, 0.f, 0.f) {}
point(float x, float y, float z) : base_type(x, y, z) {}
point(const clipper::Coord_orth& pt): base_type(pt[0], pt[1], pt[2]) {}
Point() : base_type(0.f, 0.f, 0.f) {}
Point(float x, float y, float z) : base_type(x, y, z) {}
Point(const clipper::Coord_orth& pt): base_type(pt[0], pt[1], pt[2]) {}
point& operator=(const clipper::Coord_orth& rhs)
Point& operator=(const clipper::Coord_orth& rhs)
{
x(rhs[0]);
y(rhs[1]);
z(rhs[2]);
setX(rhs[0]);
setY(rhs[1]);
setZ(rhs[2]);
return *this;
}
float& x() { return std::get<0>(*this); }
float x() const { return std::get<0>(*this); }
void x(float x) { std::get<0>(*this) = x; }
float& getX() { return std::get<0>(*this); }
float getX() const { return std::get<0>(*this); }
void setX(float x) { std::get<0>(*this) = x; }
float& y() { return std::get<1>(*this); }
float y() const { return std::get<1>(*this); }
void y(float y) { std::get<1>(*this) = y; }
float& getY() { return std::get<1>(*this); }
float getY() const { return std::get<1>(*this); }
void setY(float y) { std::get<1>(*this) = y; }
float& z() { return std::get<2>(*this); }
float z() const { return std::get<2>(*this); }
void z(float z) { std::get<2>(*this) = z; }
float& getZ() { return std::get<2>(*this); }
float getZ() const { return std::get<2>(*this); }
void setZ(float z) { std::get<2>(*this) = z; }
point& operator+=(const point& rhs)
Point& operator+=(const Point& rhs)
{
x() += rhs.x();
y() += rhs.y();
z() += rhs.z();
getX() += rhs.getX();
getY() += rhs.getY();
getZ() += rhs.getZ();
return *this;
}
point& operator-=(const point& rhs)
Point& operator-=(const Point& rhs)
{
x() -= rhs.x();
y() -= rhs.y();
z() -= rhs.z();
getX() -= rhs.getX();
getY() -= rhs.getY();
getZ() -= rhs.getZ();
return *this;
}
point& operator*=(float rhs)
Point& operator*=(float rhs)
{
x() *= rhs;
y() *= rhs;
z() *= rhs;
getX() *= rhs;
getY() *= rhs;
getZ() *= rhs;
return *this;
}
point& operator/=(float rhs)
Point& operator/=(float rhs)
{
x() *= rhs;
y() *= rhs;
z() *= rhs;
getX() *= rhs;
getY() *= rhs;
getZ() *= rhs;
return *this;
}
float normalize()
{
auto length = x() * x() + y() * y() + z() * z();
auto length = getX() * getX() + getY() * getY() + getZ() * getZ();
if (length > 0)
{
length = std::sqrt(length);
......@@ -99,133 +99,132 @@ struct point : public std::tuple<float,float,float>
void rotate(const boost::math::quaternion<float>& q)
{
boost::math::quaternion<float> p(0, x(), y(), z());
boost::math::quaternion<float> p(0, getX(), getY(), getZ());
p = q * p * boost::math::conj(q);
x() = p.R_component_2();
y() = p.R_component_3();
z() = p.R_component_4();
getX() = p.R_component_2();
getY() = p.R_component_3();
getZ() = p.R_component_4();
}
operator clipper::Coord_orth() const
{
return clipper::Coord_orth(x(), y(), z());
return clipper::Coord_orth(getX(), getY(), getZ());
}
};
inline std::ostream& operator<<(std::ostream& os, const point& pt)
inline std::ostream& operator<<(std::ostream& os, const Point& pt)
{
os << '(' << pt.x() << ',' << pt.y() << ',' << pt.z() << ')';
os << '(' << pt.getX() << ',' << pt.getY() << ',' << pt.getZ() << ')';
return os;
}
inline point operator+(const point& lhs, const point& rhs)
inline Point operator+(const Point& lhs, const Point& rhs)
{
return point(lhs.x() + rhs.x(), lhs.y() + rhs.y(), lhs.z() + rhs.z());
return Point(lhs.getX() + rhs.getX(), lhs.getY() + rhs.getY(), lhs.getZ() + rhs.getZ());
}
inline point operator-(const point& lhs, const point& rhs)
inline Point operator-(const Point& lhs, const Point& rhs)
{
return point(lhs.x() - rhs.x(), lhs.y() - rhs.y(), lhs.z() - rhs.z());
return Point(lhs.getX() - rhs.getX(), lhs.getY() - rhs.getY(), lhs.getZ() - rhs.getZ());
}
inline point operator-(const point& pt)
inline Point operator-(const Point& pt)
{
return point(-pt.x(), -pt.y(), -pt.z());
return Point(-pt.getX(), -pt.getY(), -pt.getZ());
}
inline point operator*(const point& pt, float f)
inline Point operator*(const Point& pt, float f)
{
return point(pt.x() * f, pt.y() * f, pt.z() * f);
return Point(pt.getX() * f, pt.getY() * f, pt.getZ() * f);
}
inline point operator/(const point& pt, float f)
inline Point operator/(const Point& pt, float f)
{
return point(pt.x() / f, pt.y() / f, pt.z() / f);
return Point(pt.getX() / f, pt.getY() / f, pt.getZ() / f);
}
// --------------------------------------------------------------------
// several standard 3d operations
inline double DistanceSquared(const point& a, const point& b)
inline double DistanceSquared(const Point& a, const Point& b)
{
return
(a.x() - b.x()) * (a.x() - b.x()) +
(a.y() - b.y()) * (a.y() - b.y()) +
(a.z() - b.z()) * (a.z() - b.z());
(a.getX() - b.getX()) * (a.getX() - b.getX()) +
(a.getY() - b.getY()) * (a.getY() - b.getY()) +
(a.getZ() - b.getZ()) * (a.getZ() - b.getZ());
}
inline double Distance(const point& a, const point& b)
inline double Distance(const Point& a, const Point& b)
{
return sqrt(
(a.x() - b.x()) * (a.x() - b.x()) +
(a.y() - b.y()) * (a.y() - b.y()) +
(a.z() - b.z()) * (a.z() - b.z()));
(a.getX() - b.getX()) * (a.getX() - b.getX()) +
(a.getY() - b.getY()) * (a.getY() - b.getY()) +
(a.getZ() - b.getZ()) * (a.getZ() - b.getZ()));
}
inline float DotProduct(const point& a, const point& b)
inline float DotProduct(const Point& a, const Point& b)
{
return a.x() * b.x() + a.y() * b.y() + a.z() * b.z();
return a.getX() * b.getX() + a.getY() * b.getY() + a.getZ() * b.getZ();
}
inline point CrossProduct(const point& a, const point& b)
inline Point CrossProduct(const Point& a, const Point& b)
{
return point(a.y() * b.z() - b.y() * a.z(),
a.z() * b.x() - b.z() * a.x(),
a.x() * b.y() - b.x() * a.y());
return Point(a.getY() * b.getZ() - b.getY() * a.getZ(),
a.getZ() * b.getX() - b.getZ() * a.getX(),
a.getX() * b.getY() - b.getX() * a.getY());
}
float DihedralAngle(const point& p1, const point& p2, const point& p3, const point& p4);
float CosinusAngle(const point& p1, const point& p2, const point& p3, const point& p4);
float DihedralAngle(const Point& p1, const Point& p2, const Point& p3, const Point& p4);
float CosinusAngle(const Point& p1, const Point& p2, const Point& p3, const Point& p4);
// --------------------------------------------------------------------
// We use quaternions to do rotations in 3d space
quaternion Normalize(quaternion q);
//std::tuple<double,point> QuaternionToAngleAxis(quaternion q);
point Centroid(std::vector<point>& points);
point CenterPoints(std::vector<point>& points);
quaternion AlignPoints(const std::vector<point>& a, const std::vector<point>& b);
double RMSd(const std::vector<point>& a, const std::vector<point>& b);
//std::tuple<double,Point> QuaternionToAngleAxis(quaternion q);
Point Centroid(std::vector<Point>& Points);
Point CenterPoints(std::vector<Point>& Points);
quaternion AlignPoints(const std::vector<Point>& a, const std::vector<Point>& b);
double RMSd(const std::vector<Point>& a, const std::vector<Point>& b);
// --------------------------------------------------------------------
// Helper class to generate evenly divided points on a sphere
// Helper class to generate evenly divided Points on a sphere
// we use a fibonacci sphere to calculate even distribution of the dots
template<int N>
class spherical_dots
class SphericalDots
{
public:
enum { P = 2 * N + 1 };
typedef typename std::array<point,P> array_type;
typedef typename std::array<Point,P> array_type;
typedef typename array_type::const_iterator iterator;
static spherical_dots& instance()
static SphericalDots& instance()
{
static spherical_dots s_instance;
return s_instance;
static SphericalDots sInstance;
return sInstance;
}
size_t size() const { return m_points.size(); }
const point operator[](uint32 inIx) const { return m_points[inIx]; }
iterator begin() const { return m_points.begin(); }
iterator end() const { return m_points.end(); }
size_t size() const { return mPoints.size(); }
const Point operator[](uint32 inIx) const { return mPoints[inIx]; }
iterator begin() const { return mPoints.begin(); }
iterator end() const { return mPoints.end(); }
double weight() const { return m_weight; }
double weight() const { return mWeight; }
spherical_dots()
SphericalDots()
{
using namespace std;
const double
kGoldenRatio = (1 + std::sqrt(5.0)) / 2;
m_weight = (4 * kPI) / P;
mWeight = (4 * kPI) / P;
auto p = m_points.begin();
auto p = mPoints.begin();
for (int32 i = -N; i <= N; ++i)
{
......@@ -242,11 +241,11 @@ class spherical_dots
private:
array_type m_points;
double m_weight;
array_type mPoints;
double mWeight;
};
typedef spherical_dots<50> spherical_dots_50;
typedef SphericalDots<50> SphericalDots_50;
}
// Lib for working with structures as contained in mmCIF and PDB files
#pragma once
#include <boost/filesystem/operations.hpp>
#include <boost/math/quaternion.hpp>
#include <boost/any.hpp>
#include "cif++/AtomType.h"
#include "cif++/Point.h"
#include "cif++/Compound.h"
/*
To modify a structure, you will have to use actions.
The currently supported actions are:
// - Move atom to new location
- Remove atom
// - Add new atom that was formerly missing
// - Add alternate Residue
-
Other important design principles:
- all objects here are references to the actual data. Not models of
the data itself. That means that if you copy an atom, you copy the
reference to an atom in the structure. You're not creating a new
atom. This may sound obvious, but it is not if you are used to
copy semantics in the C++ world.
*/
// forward declaration
namespace cif
{
class Datablock;
};
namespace libcif
{
class Atom;
class Residue;
class Monomer;
class Polymer;
class Structure;
class File;
// --------------------------------------------------------------------
// We do not want to introduce a dependency on cif++ here, we might want
// to change the backend storage in the future.
// So, in order to access the data we use properties based on boost::any
// Eventually this should be moved to std::variant, but that's only when
// c++17 is acceptable.
struct Property
{
Property() {}
Property(const std::string& name, const boost::any& value)
: name(name), value(value) {}
std::string name;
boost::any value;
};
typedef std::vector<Property> PropertyList;
// --------------------------------------------------------------------
class Atom
{
public:
// Atom(const structure& s, const std::string& id);
Atom(struct AtomImpl* impl);
Atom(const File& f, const std::string& id);
Atom(const Atom& rhs);
~Atom();
Atom& operator=(const Atom& rhs);
std::string id() const;
AtomType type() const;
point location() const;
const compound& comp() const;
const entity& ent() const;
bool isWater() const;
int charge() const;
boost::any property(const std::string& name) const;
void property(const std::string& name, const boost::any& value);
// specifications
std::string labelAtomId() const;
std::string labelCompId() const;
std::string labelAsymId() const;
int labelSeqId() const;
std::string labelAltId() const;
std::string authAtomId() const;
std::string authCompId() const;
std::string authAsymId() const;
int authSeqId() const;
std::string pdbxAuthInsCode() const;
std::string authAltId() const;
bool operator==(const Atom& rhs) const;
const File& getFile() const;
private:
struct AtomImpl* mImpl;
};
typedef std::vector<Atom> AtomView;
// --------------------------------------------------------------------
class Residue : public std::enable_shared_from_this<Residue>
{
public:
Residue(const compound& cmp) : mCompound(cmp) {}
const compound& comp() const { return mCompound; }
virtual AtomView atoms();
private:
const compound& mCompound;
};
//// --------------------------------------------------------------------
//// a monomer models a single Residue in a protein chain
//
//class monomer : public Residue
//{
// public:
// monomer(polymer& polymer, size_t seqId, const std::string& compId,
// const std::string& altId);
//
// int num() const { return mNum; }
//// polymer& getPolymer();
//
//// std::vector<monomer_ptr> alternates();
//
// private:
// polymer_ptr mPolymer;
// int mNum;
//};
//
//// --------------------------------------------------------------------
//
//class polymer : public std::enable_shared_from_this<polymer>
//{
// public:
// polymer(const polymerEntity& pe, const std::string& asymId);
//
// struct iterator : public std::iterator<std::random_access_iterator_tag, monomer>
// {
// typedef std::iterator<std::bidirectional_iterator_tag, monomer> base_type;
// typedef base_type::reference reference;
// typedef base_type::pointer pointer;
//
// iterator(polymer& list, uint32 index);
// iterator(iterator&& rhs);
// iterator(const iterator& rhs);
// iterator& operator=(const iterator& rhs);
// iterator& operator=(iterator&& rhs);
//
// reference operator*();
// pointer operator->();
//
// iterator& operator++();
// iterator operator++(int);
//
// iterator& operator--();
// iterator operator--(int);
//
// bool operator==(const iterator& rhs) const;
// bool operator!=(const iterator& rhs) const;
// };
//
// iterator begin();
// iterator end();
//
// private:
// polymer_entity mEntity;
// std::string mAsymId;
// std::vector<Residue_ptr> mMonomers;
//};
// --------------------------------------------------------------------
// file is a reference to the data stored in e.g. the cif file.
// This object is not copyable.
class File : public std::enable_shared_from_this<File>
{
public:
File();
File(boost::filesystem::path p);
~File();
File(const File&) = delete;
File& operator=(const File&) = delete;
void load(boost::filesystem::path p);
void save(boost::filesystem::path p);
structure* model(size_t nr = 1);
struct FileImpl& impl() const { return *mImpl; }
std::vector<const entity*> entities();
cif::datablock& data();
private:
struct FileImpl* mImpl;
};
// --------------------------------------------------------------------
class structure
{
public:
structure(File& p, uint32 modelNr = 1);
structure(const structure&);
structure& operator=(const structure&);
~structure();
File& getFile() const;
AtomView atoms() const;
AtomView waters() const;
Atom getAtomById(std::string id) const;
Atom getAtomByLocation(point pt, float maxDistance) const;
Atom getAtomForLabel(const std::string& atomId, const std::string& asymId,
const std::string& compId, int seqId, const std::string& altId = "");
Atom getAtomForAuth(const std::string& atomId, const std::string& asymId,
const std::string& compId, int seqId, const std::string& altId = "",
const std::string& pdbxAuthInsCode = "");
// map between auth and label locations
std::tuple<std::string,int,std::string> MapAuthToLabel(const std::string& asymId,
const std::string& seqId, const std::string& compId, const std::string& insCode = "");
std::tuple<std::string,std::string,std::string,std::string> MapLabelToAuth(
const std::string& asymId, int seqId, const std::string& compId);
// returns chain, seqnr
std::tuple<std::string,std::string> MapLabelToAuth(
const std::string& asymId, int seqId);
// returns chain,seqnr,comp,iCode
std::tuple<std::string,int,std::string,std::string> MapLabelToPDB(
const std::string& asymId, int seqId, const std::string& compId);
std::tuple<std::string,int,std::string,std::string> MapPDBToLabel(
const std::string& asymId, int seqId, const std::string& compId, const std::string& iCode);
// Actions
void removeAtom(Atom& a);
private:
struct StructureImpl* mImpl;
};
}
// cif parsing library
#pragma once
#include "libcif/config.h"
#include <regex>
#include <iostream>
#include <set>
#include <boost/lexical_cast.hpp>
#include <boost/any.hpp>
#include "cif-utils.h"
extern int VERBOSE;
/*
Simple C++ interface to CIF files.
Assumptions: a file contains one or more datablocks modelled by the class datablock.
Each datablock contains categories. These map to the original tables used to fill
the mmCIF file. Each category can contain multiple items, the columns in the table.
Values are stored as character strings internally.
Synopsis:
// create a cif file
cif::datablock e("1MVE");
e.append(cif::category{"_entry", { "id", "1MVE" } });
cif::category atom_site("atom_site");
size_t nr{};
for (my_atom: atoms)
{
atom_site.push_back({
{ "group_PDB", "ATOM" },
{ "id", ++nr },
{ "type_symbol", my_atom.type.str() },
...
});
}
e.append(move(atom_site));
cif::file f;
f.append(e);
ofstream os("1mve.cif");
f.write(os);
// read
f.read(ifstream{"1mve.cif"});
auto& e = f.first_datablock();
cout << "ID of datablock: " << e.id() << endl;
auto& atoms = e["atom_site"];
for (auto& atom: atoms)
{
cout << atom["group_PDB"] << ", "
<< atom["id"] << ", "
...
float x, y, z;
cif::tie(x, y, z) = atom.get("Cartn_x", "Cartn_y", "Cartn_z");
...
}
Another way of querying a category is by using this construct:
auto cat& = e["atom_site"];
auto rows = cat.find(key("label_asym_id") == "A" and key("label_seq_id") == 1);
*/
namespace cif
{
using std::string;
using std::vector;
// mmCIF mapping
// A CIF data file in this case contains entries (data blocks) which can contain
// one or more category objects. Each category object contains arrays of items.
// Better, you can consider the categories as tables containing columns which
// are the items.
class file;
class datablock;
class category;
class row; // a flyweight class that references data in categories
class item;
class validator;
struct validate_item;
struct validate_category;
struct item_column;
struct item_row;
struct item_value;
// --------------------------------------------------------------------
// class item
//
// This class is only transient, it is used to construct new rows.
// Access to already stored data is through an item_reference object.
class item
{
public:
typedef enum { not_applicable, not_defined, text, number } item_content_type;
item() {}
template<typename T>
item(const string& name, const T& value);
item(const item& rhs) : m_name(rhs.m_name), m_value(rhs.m_value) {}
item(item&& rhs) : m_name(std::move(rhs.m_name)), m_value(std::move(rhs.m_value)) {}
item& operator=(const item& rhs)
{
if (this != &rhs)
{
m_name = rhs.m_name;
m_value = rhs.m_value;
}
return *this;
}
item& operator=(item&& rhs)
{
if (this != &rhs)
{
m_name = std::move(rhs.m_name);
m_value = std::move(rhs.m_value);
}
return *this;
}
const string& name() const { return m_name; }
const string& value() const { return m_value; }
void value(const string& v) { m_value = v; }
bool empty() const { return m_value.empty(); }
size_t length() const { return m_value.length(); }
const char* c_str() const { return m_value.c_str(); }
private:
string m_name;
string m_value;
};
template<typename T>
inline
item::item(const string& name, const T& value)
: m_name(name), m_value(boost::lexical_cast<string>(value))
{
}
template<>
inline
item::item(const string& name, const string& value)
: m_name(name), m_value(value)
{
}
// --------------------------------------------------------------------
// class datablock acts as an STL container for category objects
class datablock
{
public:
friend class file;
typedef std::list<category> category_list;
typedef category_list::iterator iterator;
typedef category_list::const_iterator const_iterator;
datablock(const string& name);
~datablock();
datablock(const datablock&) = delete;
datablock& operator=(const datablock&) = delete;
string name() const { return m_name; }
void set_name(const string& n) { m_name = n; }
string first_item(const string& tag) const;
iterator begin() { return m_categories.begin(); }
iterator end() { return m_categories.end(); }
const_iterator begin() const { return m_categories.begin(); }
const_iterator end() const { return m_categories.end(); }
category& operator[](const string& name);
std::tuple<iterator,bool> emplace(const std::string& name);
void validate();
void set_validator(validator* v);
// this one only looks up a category, returns nullptr if it does not exist
category* get(const string& name);
void get_tag_order(vector<string>& tags) const;
private:
void write(std::ostream& os);
void write(std::ostream& os, const vector<string>& order);
std::list<category> m_categories;
string m_name;
validator* m_validator;
datablock* m_next;
};
// --------------------------------------------------------------------
// class row acts as a container for item objects, It has a more useful
// interface for accessing the contained columns. The get() method
// returns a row_result object that can be used to access only a subset
// of column values by index or by name.
namespace detail
{
// item_reference is a helper class
struct item_reference
{
const char* m_name;
item_row* m_row;
template<typename T>
item_reference& operator=(const T& value)
{
this->operator=(boost::lexical_cast<string>(value));
return *this;
}
// operator string() const { return c_str(); }
template<typename T>
T as() const
{
T result = 0;
if (not empty())
result = boost::lexical_cast<T>(c_str());
return result;
}
template<typename T>
int compare(const T& value) const
{
int result = 0;
try
{
double v = boost::lexical_cast<T>(c_str());
if (v < value)
result = -1;
else if (v > value)
result = 1;
}
catch (...)
{
if (VERBOSE)
std::cerr << "conversion error in compare for '" << c_str() << '\'' << std::endl;
result = 1;
}
return result;
}
bool empty() const;
// bool unapplicable() const;
const char* c_str() const;
bool operator!=(const string& s) const { return s != c_str(); }
bool operator==(const string& s) const { return s == c_str(); }
};
template<>
inline
string item_reference::as<string>() const
{
return string(c_str());
}
template<>
inline
const char* item_reference::as<const char*>() const
{
return c_str();
}
template<>
inline
int item_reference::compare<string>(const string& value) const
{
return icompare(c_str(), value.c_str());
}
template<>
inline
int item_reference::compare(const char* const& value) const
{
return cif::icompare(c_str(), value);
}
inline std::ostream& operator<<(std::ostream& os, const item_reference& rhs)
{
os << rhs.c_str();
return os;
}
template<>
item_reference& item_reference::operator=(const string& value);
// some helper classes to help create tuple result types
template<typename...> struct tuple_catter;
template<typename... Ts>
struct tuple_catter<std::tuple<Ts...>>
{
typedef std::tuple<Ts...> type;
};
template<typename... T1s, typename... T2s, typename... Rem>
struct tuple_catter<std::tuple<T1s...>, std::tuple<T2s...>, Rem...>
{
typedef typename tuple_catter<std::tuple<T1s..., T2s...>, Rem...>::type type;
};
template<typename...> struct col_getter;
template<typename T>
struct col_getter<T>
{
typedef std::tuple<const item_reference> type;
template<typename Res>
static type get(Res& rs)
{
size_t index = Res::N - 1;
return std::tuple<const item_reference>{ rs[index] };
}
};
template<typename T, typename... Ts>
struct col_getter<T, Ts...>
{
typedef col_getter<Ts...> next;
typedef typename tuple_catter<std::tuple<const item_reference>, typename next::type>::type type;
template<typename Res>
static type get(Res& rs)
{
typedef col_getter<Ts...> next;
size_t index = Res::N - 1 - sizeof...(Ts);
return std::tuple_cat(std::tuple<const item_reference>{ rs[index]}, next::get(rs));
}
};
template<typename... C>
struct get_row_result
{
enum { N = sizeof...(C) };
typedef typename col_getter<C...>::type tuple_type;
// const item_reference operator[](const string& col) const
// {
// return m_row[col];
// }
const item_reference operator[](size_t ix) const
{
return m_row[m_columns[ix]];
}
get_row_result(row& r, C... columns)
: m_row(r), m_columns({{columns...}}) {}
row& m_row;
std::array<const char*, N> m_columns;
};
// we want to be able to tie some variables to a row_result, for this we use tiewraps
template<int IX, typename... Ts>
struct tie_wrap;
template<int IX, typename T>
struct tie_wrap<IX,T>
{
tie_wrap(T& t)
: m_val(t) {}
template<typename Res>
void operator=(const Res& rr)
{
typedef typename std::remove_reference<T>::type basic_type;
const item_reference v = rr[IX];
basic_type tv = v.as<basic_type>();
m_val = tv;
}
T& m_val;
};
template<int IX, typename T, typename... Ts>
struct tie_wrap<IX, T, Ts...>
{
typedef tie_wrap<IX + 1, Ts...> next;
tie_wrap(T& t, Ts&... ts)
: m_val(t), m_next(ts...) {}
template<typename Res>
void operator=(const Res& rr)
{
typedef typename std::remove_reference<T>::type basic_type;
const item_reference v = rr[IX];
basic_type tv = v.as<basic_type>();
m_val = tv;
m_next.operator=(rr);
}
T& m_val;
next m_next;
};
}
template<typename... Ts>
auto tie(Ts&... v) -> detail::tie_wrap<0, Ts...>
{
return detail::tie_wrap<0, Ts...>(v...);
}
class row
{
public:
friend class category;
friend class cat_index;
friend class row_comparator;
friend struct detail::item_reference;
row(item_row* data = nullptr) : m_data(data) {}
row(const row& rhs);
row& operator=(const row& rhs);
struct const_iterator : public std::iterator<std::forward_iterator_tag, const item>
{
typedef std::iterator<std::forward_iterator_tag, item> base_type;
typedef typename base_type::pointer pointer;
typedef typename base_type::reference reference;
const_iterator(item_row* data, item_value* ptr);
reference operator*() { return m_current; }
pointer operator->() { return &m_current; }
const_iterator& operator++();
const_iterator operator++(int) { const_iterator result(*this); this->operator++(); return result; }
bool operator==(const const_iterator& rhs) const { return m_ptr == rhs.m_ptr; }
bool operator!=(const const_iterator& rhs) const { return m_ptr != rhs.m_ptr; }
private:
void fetch();
item_row* m_data;
item_value* m_ptr;
item m_current;
};
// checks for an initialized row:
operator bool() const { return m_data != nullptr; }
bool empty() const;
const_iterator begin() const;
const_iterator end() const;
// TODO: implement real const version?
const detail::item_reference operator[](const char* item_tag) const
{
return detail::item_reference{item_tag, m_data};
}
detail::item_reference operator[](const char* item_tag)
{
return detail::item_reference{item_tag, m_data};
}
const detail::item_reference operator[](const string& item_tag) const
{
return detail::item_reference{item_tag.c_str(), m_data};
}
detail::item_reference operator[](const string& item_tag)
{
return detail::item_reference{item_tag.c_str(), m_data};
}
template<typename... C>
auto get(C... columns) -> detail::get_row_result<C...>
{
return detail::get_row_result<C...>(*this, columns...);
}
bool operator==(const row& rhs) const
{
return m_data == rhs.m_data;
}
item_row* data() const { return m_data; }
void swap(row& rhs)
{
std::swap(m_data, rhs.m_data);
}
private:
void assign(const string& name, const string& value, bool emplacing);
void assign(const item& i, bool emplacing);
item_row* m_data;
};
// swap for rows is defined below
// --------------------------------------------------------------------
// some more templates to be able to do querying
namespace detail
{
struct condition_impl
{
virtual ~condition_impl() {}
virtual bool test(const category& c, const row& r) const = 0;
virtual std::string str() const = 0;
};
}
struct condition
{
condition(detail::condition_impl* impl) : m_impl(impl) {}
condition(condition&& rhs)
: m_impl(nullptr)
{
std::swap(m_impl, rhs.m_impl);
}
condition& operator=(condition&& rhs)
{
std::swap(m_impl, rhs.m_impl);
return *this;
}
~condition()
{
delete m_impl;
}
bool operator()(const category& c, const row& r) const
{
assert(m_impl);
return m_impl->test(c, r);
}
std::string str() const
{
return m_impl->str();
}
detail::condition_impl* m_impl;
};
namespace detail
{
template<typename T>
struct key_is_condition_impl : public condition_impl
{
typedef T value_type;
key_is_condition_impl(const string& item_tag, const value_type& value)
: m_item_tag(item_tag), m_value(value) {}
virtual bool test(const category& c, const row& r) const
{
return r[m_item_tag].template compare<value_type>(m_value) == 0;
}
virtual std::string str() const
{
return m_item_tag + " == " + boost::lexical_cast<std::string>(m_value);
}
string m_item_tag;
value_type m_value;
};
template<typename T>
struct key_is_not_condition_impl : public condition_impl
{
typedef T value_type;
key_is_not_condition_impl(const string& item_tag, const value_type& value)
: m_item_tag(item_tag), m_value(value) {}
virtual bool test(const category& c, const row& r) const
{
return r[m_item_tag].template compare<value_type>(m_value) != 0;
}
virtual std::string str() const
{
return m_item_tag + " != " + boost::lexical_cast<std::string>(m_value);
}
string m_item_tag;
value_type m_value;
};
template<typename COMP>
struct key_compare_condition_impl : public condition_impl
{
key_compare_condition_impl(const string& item_tag, COMP&& comp)
: m_item_tag(item_tag), m_comp(std::move(comp)) {}
virtual bool test(const category& c, const row& r) const
{
return m_comp(c, r);
}
virtual std::string str() const
{
return m_item_tag + " compare " /*+ boost::lexical_cast<std::string>(m_value)*/;
}
string m_item_tag;
COMP m_comp;
};
struct key_matches_condition_impl : public condition_impl
{
key_matches_condition_impl(const string& item_tag, const std::regex& rx)
: m_item_tag(item_tag), m_rx(rx) {}
virtual bool test(const category& c, const row& r) const
{
return std::regex_match(r[m_item_tag].as<string>(), m_rx);
}
virtual std::string str() const
{
return m_item_tag + " ~= " + "<rx>";
}
string m_item_tag;
std::regex m_rx;
};
template<typename T>
struct any_is_condition_impl : public condition_impl
{
typedef T value_type;
any_is_condition_impl(const value_type& value)
: m_value(value) {}
virtual bool test(const category& c, const row& r) const;
virtual std::string str() const
{
return "any == " + boost::lexical_cast<std::string>(m_value);
}
value_type m_value;
};
struct any_matches_condition_impl : public condition_impl
{
any_matches_condition_impl(const std::regex& rx)
: m_rx(rx) {}
virtual bool test(const category& c, const row& r) const;
virtual std::string str() const
{
return "any ~= <rx>";
}
std::regex m_rx;
};
struct and_condition_impl : public condition_impl
{
and_condition_impl(condition&& a, condition&& b)
: m_a(nullptr), m_b(nullptr)
{
std::swap(m_a, a.m_impl);
std::swap(m_b, b.m_impl);
}
~and_condition_impl()
{
delete m_a;
delete m_b;
}
virtual bool test(const category& c, const row& r) const
{
return m_a->test(c, r) and m_b->test(c, r);
}
virtual std::string str() const
{
return "(" + m_a->str() + ") and (" + m_b->str() + ")";
}
condition_impl* m_a;
condition_impl* m_b;
};
struct or_condition_impl : public condition_impl
{
or_condition_impl(condition&& a, condition&& b)
: m_a(nullptr), m_b(nullptr)
{
std::swap(m_a, a.m_impl);
std::swap(m_b, b.m_impl);
}
~or_condition_impl()
{
delete m_a;
delete m_b;
}
virtual bool test(const category& c, const row& r) const
{
return m_a->test(c, r) or m_b->test(c, r);
}
virtual std::string str() const
{
return "(" + m_a->str() + ") or (" + m_b->str() + ")";
}
condition_impl* m_a;
condition_impl* m_b;
};
}
inline condition operator&&(condition&& a, condition&& b)
{
return condition(new detail::and_condition_impl(std::move(a), std::move(b)));
}
inline condition operator||(condition&& a, condition&& b)
{
return condition(new detail::or_condition_impl(std::move(a), std::move(b)));
}
struct key
{
key(const string& item_tag) : m_item_tag(item_tag) {}
key(const char* item_tag) : m_item_tag(item_tag) {}
template<typename T>
condition operator==(const T& v) const
{
return condition(new detail::key_is_condition_impl<T>(m_item_tag, v));
}
condition operator==(const char* v) const
{
string value(v ? v : "");
return condition(new detail::key_is_condition_impl<std::string>(m_item_tag, value));
}
template<typename T>
condition operator!=(const T& v) const
{
return condition(new detail::key_is_not_condition_impl<T>(m_item_tag, v));
}
condition operator!=(const char* v) const
{
string value(v ? v : "");
return condition(new detail::key_is_not_condition_impl<std::string>(m_item_tag, value));
}
template<typename T>
condition operator>(const T& v) const
{
auto comp = [this, v](const category& c, const row& r) -> bool { return r[this->m_item_tag].as<T>() > v; };
return condition(new detail::key_compare_condition_impl<decltype(comp)>(m_item_tag, std::move(comp)));
}
template<typename T>
condition operator>=(const T& v) const
{
auto comp = [this, v](const category& c, const row& r) -> bool { return r[this->m_item_tag].as<T>() >= v; };
return condition(new detail::key_compare_condition_impl<decltype(comp)>(m_item_tag, std::move(comp)));
}
template<typename T>
condition operator<(const T& v) const
{
auto comp = [this, v](const category& c, const row& r) -> bool { return r[this->m_item_tag].as<T>() < v; };
return condition(new detail::key_compare_condition_impl<decltype(comp)>(m_item_tag, std::move(comp)));
}
template<typename T>
condition operator<=(const T& v) const
{
auto comp = [this, v](const category& c, const row& r) -> bool { return r[this->m_item_tag].as<T>() <= v; };
return condition(new detail::key_compare_condition_impl<decltype(comp)>(m_item_tag, std::move(comp)));
}
string m_item_tag;
};
template<>
inline
condition key::operator==(const std::regex& rx) const
{
return condition(new detail::key_matches_condition_impl(m_item_tag, rx));
}
struct any
{
template<typename T>
condition operator==(const T& v) const
{
return condition(new detail::any_is_condition_impl<T>(v));
}
};
template<>
inline
condition any::operator==(const std::regex& rx) const
{
return condition(new detail::any_matches_condition_impl(rx));
}
// --------------------------------------------------------------------
// class rowset is used to return find results. Use it to re-order the results
// or to group them
class rowset : public vector<row>
{
public:
rowset(category& cat);
rowset& orderBy(const string& item)
{ return orderBy({ item }); }
rowset& orderBy(std::initializer_list<string> items);
private:
category& m_cat;
};
// --------------------------------------------------------------------
// class category acts as an STL container for row objects
class category
{
public:
friend class datablock;
friend class row;
friend struct detail::item_reference;
category(datablock& db, const string& name, validator* validator);
category(const category&) = delete;
category& operator=(const category&) = delete;
~category();
const string name() const { return m_name; }
const detail::item_reference get_first_item(const char* item_name) const;
struct iterator : public std::iterator<std::forward_iterator_tag, row>
{
friend class category;
typedef std::iterator<std::forward_iterator_tag, row> base_type;
typedef typename base_type::pointer pointer;
typedef typename base_type::reference reference;
iterator(item_row* data) : m_current(data) {}
reference operator*() { return m_current; }
pointer operator->() { return &m_current; }
iterator& operator++();
iterator operator++(int) { iterator result(*this); this->operator++(); return result; }
bool operator==(const iterator& rhs) const { return m_current == rhs.m_current; }
bool operator!=(const iterator& rhs) const { return not (m_current == rhs.m_current); }
private:
row m_current;
};
iterator begin();
iterator end();
bool empty() const;
size_t size() const;
void clear();
row front() { return row(m_head); }
row back() { return row(m_tail); }
row operator[](condition&& cond);
rowset find(condition&& cond);
bool exists(condition&& cond);
rowset orderBy(const string& item)
{ return orderBy({ item }); }
rowset orderBy(std::initializer_list<string> items);
std::tuple<row,bool> emplace(item value) { return emplace({ value }); }
std::tuple<row,bool> emplace(std::initializer_list<item> values)
{ return emplace(values.begin(), values.end()); }
std::tuple<row,bool> emplace(row r);
template<class Iter>
std::tuple<row,bool> emplace(Iter b, Iter e);
void erase(condition&& cond);
void erase(row r);
void erase(iterator ri);
void validate();
const validator& get_validator() const;
const validate_category* get_cat_validator() const { return m_cat_validator; }
void set_validator(validator* v);
iset fields() const;
iset mandatory_fields() const;
iset key_fields() const;
void drop(const string& field);
void get_tag_order(vector<string>& tags) const;
// return index for known column, or the next available column index
size_t get_column_index(const string& name) const;
const string& get_column_name(size_t column_index) const;
void reorderByIndex();
private:
void write(std::ostream& os);
void write(std::ostream& os, const vector<string>& order);
void write(std::ostream& os, const vector<int>& order, bool includeEmptyColumns);
size_t add_column(const string& name);
datablock& m_db;
string m_name;
validator* m_validator;
const validate_category* m_cat_validator = nullptr;
vector<item_column> m_columns;
item_row* m_head;
item_row* m_tail;
class cat_index* m_index;
};
// --------------------------------------------------------------------
class file
{
public:
friend class parser;
friend class validator;
file();
file(std::istream& is, bool validate = false);
file(file&& rhs);
file(const file& rhs) = delete;
file& operator=(const file& rhs) = delete;
~file();
void load(std::istream& is);
void save(std::ostream& os);
void save(std::ostream& os, const vector<string>& order) { write(os, order); }
void write(std::ostream& os, const vector<string>& order);
void load_dictionary(); // load the default dictionary, that is mmcif_ddl in this case
void load_dictionary(const char* dict); // load one of the compiled in dictionaries
void load_dictionary(std::istream& is); // load dictionary from input stream
void validate();
datablock& first_datablock() { return *m_head; }
void append(datablock* e);
datablock& operator[](const string& name);
struct iterator : public std::iterator<std::forward_iterator_tag, datablock>
{
typedef std::iterator<std::forward_iterator_tag, datablock> base_type;
typedef typename base_type::pointer pointer;
typedef typename base_type::reference reference;
iterator(datablock* db) : m_current(db) {}
reference operator*() { return *m_current; }
pointer operator->() { return m_current; }
iterator& operator++();
iterator operator++(int) { iterator result(*this); this->operator++(); return result; }
bool operator==(const iterator& rhs) const { return m_current == rhs.m_current; }
bool operator!=(const iterator& rhs) const { return not (m_current == rhs.m_current); }
private:
datablock* m_current;
};
iterator begin() const;
iterator end() const;
const validator& get_validator() const;
void get_tag_order(vector<string>& tags) const;
private:
void set_validator(validator* v);
datablock* m_head;
validator* m_validator;
};
// --------------------------------------------------------------------
// some postponed inlines
namespace detail
{
template<typename T>
inline
bool any_is_condition_impl<T>::test(const category& c, const row& r) const
{
bool result = false;
for (auto& f: c.fields())
{
try
{
if (r[f].as<value_type>() == m_value)
{
result = true;
break;
}
}
catch (...) {}
}
return result;
}
inline bool any_matches_condition_impl::test(const category& c, const row& r) const
{
bool result = false;
for (auto& f: c.fields())
{
try
{
if (std::regex_match(r[f].as<string>(), m_rx))
{
result = true;
break;
}
}
catch (...) {}
}
return result;
}
}
}
namespace std
{
template<>
inline void swap(cif::row& a, cif::row& b)
{
a.swap(b);
}
}
// cif parsing library
#include "libcif/cif++.h"
#include <boost/filesystem/path.hpp>
// the std regex of gcc is crashing....
#include <boost/regex.hpp>
#include <set>
namespace cif
{
struct validate_category;
// --------------------------------------------------------------------
class validation_error : public std::exception
{
public:
validation_error(const std::string& msg) : m_msg(msg) {}
const char* what() const noexcept { return m_msg.c_str(); }
std::string m_msg;
};
// --------------------------------------------------------------------
enum DDL_PrimitiveType
{
ptChar, ptUChar, ptNumb
};
DDL_PrimitiveType map_to_primitive_type(const std::string& s);
struct validate_type
{
std::string m_name;
DDL_PrimitiveType m_primitive_type;
boost::regex m_rx;
bool operator<(const validate_type& rhs) const
{
return icompare(m_name, rhs.m_name) < 0;
}
// compare values based on type
// int compare(const std::string& a, const std::string& b) const
// {
// return compare(a.c_str(), b.c_str());
// }
int compare(const char* a, const char* b) const;
};
struct validate_item
{
std::string m_tag;
bool m_mandatory;
const validate_type* m_type;
cif::iset m_enums;
validate_item* m_parent = nullptr;
std::set<validate_item*>
m_children;
validate_category* m_category = nullptr;
std::set<validate_item*>
m_foreign_keys;
void set_parent(validate_item* parent);
bool operator<(const validate_item& rhs) const
{
return icompare(m_tag, rhs.m_tag) < 0;
}
bool operator==(const validate_item& rhs) const
{
return iequals(m_tag, rhs.m_tag);
}
void operator()(std::string value) const;
};
struct validate_category
{
std::string m_name;
std::vector<string> m_keys;
cif::iset m_groups;
cif::iset m_mandatory_fields;
std::set<validate_item> m_item_validators;
bool operator<(const validate_category& rhs) const
{
return icompare(m_name, rhs.m_name) < 0;
}
void add_item_validator(validate_item&& v);
const validate_item* get_validator_for_item(std::string tag) const;
const std::set<validate_item>& item_validators() const
{
return m_item_validators;
}
};
// --------------------------------------------------------------------
class validator
{
public:
friend class dict_parser;
validator();
~validator();
validator(const validator& rhs) = delete;
validator& operator=(const validator& rhs) = delete;
validator(validator&& rhs);
validator& operator=(validator&& rhs);
void add_type_validator(validate_type&& v);
const validate_type* get_validator_for_type(std::string type_code) const;
void add_category_validator(validate_category&& v);
const validate_category* get_validator_for_category(std::string category) const;
void report_error(const std::string& msg);
std::string dict_name() const { return m_name; }
void dict_name(const std::string& name) { m_name = name; }
std::string dict_version() const { return m_version; }
void dict_version(const std::string& version) { m_version = version; }
private:
// name is fully qualified here:
validate_item* get_validator_for_item(std::string name) const;
std::string m_name;
std::string m_version;
bool m_strict = false;
// std::set<uint32> m_sub_categories;
std::set<validate_type> m_type_validators;
std::set<validate_category> m_category_validators;
};
}
#pragma once
#include "cif++.h"
void WritePDBFile(std::ostream& pdbFile, cif::file& cifFile);
// Lib for working with structures as contained in mmCIF and PDB files
#pragma once
#include <set>
#include <tuple>
#include <vector>
#include <map>
#include "libcif/atom_type.h"
namespace libcif
{
// --------------------------------------------------------------------
// The chemical composition of the structure in an mmCIF file is
// defined in the class composition. A compositon consists of
// entities. Each entity can be either a polymer, a non-polymer
// a macrolide or a water molecule.
// Entities themselves are made up of compounds. And compounds
// contain comp_atom records for each atom.
class composition;
class entity;
class compound;
struct comp_atom;
// --------------------------------------------------------------------
// struct containing information about an atom in a chemical compound
// This information comes from the CCP4 monomer library.
struct comp_atom
{
std::string id;
atom_type type_symbol;
std::string type_energy;
float partial_charge;
};
// --------------------------------------------------------------------
// a class that contains information about a chemical compound.
// This information is derived from the ccp4 monomer library by default.
// To create compounds, you'd best use the factory method.
class compound
{
public:
compound(const std::string& id, const std::string& name,
const std::string& group, std::vector<comp_atom>&& atoms,
std::map<std::tuple<std::string,std::string>,float>&& bonds)
: m_id(id), m_name(name), m_group(group)
, m_atoms(std::move(atoms)), m_bonds(std::move(bonds))
{
}
~compound();
// factory method, create a compound based on the three letter code
// (for amino acids) or the one-letter code (for bases) or the
// code as it is known in the CCP4 monomer library.
static const compound* create(const std::string& id);
// this second factory method can create a compound even if it is not
// recorded in the library. It will take the values from the CCP4 lib
// unless the value passed to this function is not empty.
static const compound* create(const std::string& id, const std::string& name,
const std::string& type, const std::string& formula);
// add an additional path to the monomer library.
static void add_monomer_library_path(const std::string& dir);
// accessors
std::string id() const { return m_id; }
std::string name() const { return m_name; }
std::string type() const;
// std::string group() const { return m_group; }
std::vector<comp_atom> atoms() const { return m_atoms; }
comp_atom get_atom_by_id(const std::string& atom_id) const;
bool atoms_bonded(const std::string& atom_id_1, const std::string& atom_id_2) const;
float atom_bond_value(const std::string& atom_id_1, const std::string& atom_id_2) const;
std::string formula() const;
float formula_weight() const;
int charge() const;
bool is_water() const;
private:
// entity& m_entity;
std::string m_id;
std::string m_name;
std::string m_group;
std::vector<comp_atom> m_atoms;
std::map<std::tuple<std::string,std::string>,float> m_bonds;
};
// --------------------------------------------------------------------
// an entity. This is a base class for polymer_entity and non_poly_entity
// The latter can be either a regular non-polymer (residue), a macrolide or
// water.
class entity
{
public:
entity(const std::string& id, const std::string& type, const std::string& description);
virtual ~entity();
std::string id() const;
std::string type() const;
std::string description() const;
virtual float formula_weight() const = 0;
private:
std::string m_id;
std::string m_type;
std::string m_description;
};
// --------------------------------------------------------------------
// A polymer entity
class polymer_entity : public entity
{
public:
polymer_entity(const std::string& id, const std::string& description);
~polymer_entity();
std::string seq_one_letter_code(bool cannonical) const;
std::string pdbx_strand_id() const;
virtual float formula_weight() const;
class monomer
{
public:
friend class polymer_entity;
size_t num() const; // sequence number
bool hetero() const; // whether this position contains alternate compounds
const compound& comp(size_t alt_nr) const; // the chemical compound of this monomer
private:
monomer* m_next;
monomer* m_alt;
size_t m_num;
compound* m_comp;
};
class iterator : public std::iterator<std::forward_iterator_tag, const monomer>
{
public:
typedef std::iterator<std::forward_iterator_tag, const monomer> base_type;
typedef base_type::reference reference;
typedef base_type::pointer pointer;
iterator(monomer* monomer = nullptr)
: m_cursor(monomer) {}
iterator(const iterator& rhs)
: m_cursor(rhs.m_cursor)
{
}
iterator& operator=(const iterator& rhs)
{
m_cursor = rhs.m_cursor;
return *this;
}
reference operator*() { return *m_cursor; }
pointer operator->() { return m_cursor; }
iterator& operator++() { m_cursor = m_cursor->m_next; return *this; }
iterator operator++(int)
{
iterator tmp(*this);
operator++();
return tmp;
}
bool operator==(const iterator& rhs) const { return m_cursor == rhs.m_cursor; }
bool operator!=(const iterator& rhs) const { return m_cursor != rhs.m_cursor; }
private:
monomer* m_cursor;
};
iterator begin() const { return iterator(m_seq); }
iterator end() const { return iterator(); }
const monomer& operator[](size_t index) const;
private:
entity& m_entity;
monomer* m_seq;
};
// --------------------------------------------------------------------
// non_poly entity
class non_poly_entity : public entity
{
public:
non_poly_entity(const std::string& id, const std::string& type, const std::string& description);
~non_poly_entity();
compound& comp() const;
virtual float formula_weight() const;
private:
compound* m_compound;
};
}
// Copyright Maarten L. Hekkelman 2006-2010
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
#ifndef MRSRC_H
#define MRSRC_H
#include <string>
#include <list>
#include <exception>
/*
Resources are data sources for the application.
They are retrieved by name.
Basic usage:
mrsrc::rsrc rsrc("dialogs/my-cool-dialog.glade");
if (rsrc)
{
GladeXML* glade = glade_xml_new_from_buffer(rsrc.data(), rsrc.size(), NULL, "japi");
...
}
*/
namespace mrsrc {
struct rsrc_imp
{
unsigned int m_next;
unsigned int m_child;
unsigned int m_name;
unsigned int m_size;
unsigned int m_data;
};
}
// The following three variables are generated by the japi resource compiler:
extern const mrsrc::rsrc_imp gResourceIndex[];
extern const char gResourceData[];
extern const char gResourceName[];
namespace mrsrc
{
class rsrc_not_found_exception : public std::exception
{
public:
virtual const char* what() const throw() { return "resource not found"; }
};
class rsrc;
typedef std::list<rsrc> rsrc_list;
class rsrc
{
public:
rsrc() : m_impl(gResourceIndex) {}
rsrc(const rsrc& other)
: m_impl(other.m_impl) {}
rsrc& operator=(const rsrc& other)
{
m_impl = other.m_impl;
return *this;
}
rsrc(const std::string& path);
std::string name() const { return gResourceName + m_impl->m_name; }
const char* data() const { return gResourceData + m_impl->m_data; }
unsigned long size() const { return m_impl->m_size; }
operator bool () const { return m_impl != NULL and m_impl->m_size > 0; }
rsrc_list children() const;
private:
rsrc(const rsrc_imp* imp)
: m_impl(imp) {}
const rsrc_imp* m_impl;
};
inline
rsrc_list rsrc::children() const
{
rsrc_list result;
if (m_impl->m_child)
{
const rsrc_imp* impl = gResourceIndex + m_impl->m_child;
result.push_back(rsrc(impl));
while (impl->m_next)
{
impl = gResourceIndex + impl->m_next;
result.push_back(rsrc(impl));
}
}
return result;
}
inline
rsrc::rsrc(const std::string& path)
{
// static_assert(sizeof(m_impl->m_next) == 4, "invalid size for unsigned int");
m_impl = gResourceIndex;
std::string p(path);
// would love to use boost functions here, but then the dependancies
// should be minimal of course.
while (not p.empty())
{
if (m_impl->m_child == 0) // no children, this is an error
throw rsrc_not_found_exception();
m_impl = gResourceIndex + m_impl->m_child;
std::string::size_type s = p.find('/');
std::string name;
if (s != std::string::npos)
{
name = p.substr(0, s);
p.erase(0, s + 1);
}
else
std::swap(name, p);
while (name != gResourceName + m_impl->m_name)
{
if (m_impl->m_next == 0)
throw rsrc_not_found_exception();
m_impl = gResourceIndex + m_impl->m_next;
}
}
}
}
#endif
#pragma once
#include "cif++.h"
// --------------------------------------------------------------------
struct PDBRecord
{
PDBRecord* m_next;
uint32 m_line_nr;
char m_name[11];
size_t m_vlen;
char m_value[0];
PDBRecord(uint32 line_nr, const std::string& name, const std::string& value);
~PDBRecord();
void* operator new(size_t);
void* operator new(size_t size, size_t v_len);
void operator delete(void* p);
bool is(const char* name) const;
char v_c(size_t column);
std::string v_s(size_t column_first, size_t column_last = std::numeric_limits<size_t>::max());
int v_i(int column_first, int column_last);
std::string v_f(size_t column_first, size_t column_last);
};
// --------------------------------------------------------------------
void ReadPDBFile(std::istream& pdbFile, cif::file& cifFile);
// Lib for working with structures as contained in mmCIF and PDB files
#pragma once
#include <boost/filesystem/operations.hpp>
#include <boost/math/quaternion.hpp>
#include <boost/any.hpp>
#include "libcif/atom_type.h"
#include "libcif/point.h"
#include "libcif/compound.h"
/*
To modify a structure, you will have to use actions.
The currently supported actions are:
// - Move atom to new location
- Remove atom
// - Add new atom that was formerly missing
// - Add alternate residue
-
Other important design principles:
- all objects here are references to the actual data. Not models of
the data itself. That means that if you copy an atom, you copy the
reference to an atom in the structure. You're not creating a new
atom. This may sound obvious, but it is not if you are used to
copy semantics in the C++ world.
*/
// forward declaration
namespace cif
{
class datablock;
};
namespace libcif
{
class atom;
class residue;
class monomer;
class polymer;
class structure;
class file;
// --------------------------------------------------------------------
// We do not want to introduce a dependency on cif++ here, we might want
// to change the backend storage in the future.
// So, in order to access the data we use properties based on boost::any
// Eventually this should be moved to std::variant, but that's only when
// c++17 is acceptable.
struct property
{
property() {}
property(const std::string& name, const boost::any& value)
: name(name), value(value) {}
std::string name;
boost::any value;
};
typedef std::vector<property> property_list;
// --------------------------------------------------------------------
class atom
{
public:
// atom(const structure& s, const std::string& id);
atom(struct atom_impl* impl);
atom(const file& f, const std::string& id);
atom(const atom& rhs);
~atom();
atom& operator=(const atom& rhs);
std::string id() const;
atom_type type() const;
point location() const;
const compound& comp() const;
const entity& ent() const;
bool is_water() const;
int charge() const;
boost::any property(const std::string& name) const;
void property(const std::string& name, const boost::any& value);
// specifications
std::string label_atom_id() const;
std::string label_comp_id() const;
std::string label_asym_id() const;
int label_seq_id() const;
std::string label_alt_id() const;
std::string auth_atom_id() const;
std::string auth_comp_id() const;
std::string auth_asym_id() const;
int auth_seq_id() const;
std::string pdbx_auth_ins_code() const;
std::string auth_alt_id() const;
bool operator==(const atom& rhs) const;
const file& get_file() const;
private:
struct atom_impl* m_impl;
};
typedef std::vector<atom> atom_view;
// --------------------------------------------------------------------
class residue : public std::enable_shared_from_this<residue>
{
public:
residue(const compound& cmp) : m_compound(cmp) {}
const compound& comp() const { return m_compound; }
virtual atom_view atoms();
private:
const compound& m_compound;
};
//// --------------------------------------------------------------------
//// a monomer models a single residue in a protein chain
//
//class monomer : public residue
//{
// public:
// monomer(polymer& polymer, size_t seq_id, const std::string& comp_id,
// const std::string& alt_id);
//
// int num() const { return m_num; }
//// polymer& get_polymer();
//
//// std::vector<monomer_ptr> alternates();
//
// private:
// polymer_ptr m_polymer;
// int m_num;
//};
//
//// --------------------------------------------------------------------
//
//class polymer : public std::enable_shared_from_this<polymer>
//{
// public:
// polymer(const polymer_entity& pe, const std::string& asym_id);
//
// struct iterator : public std::iterator<std::random_access_iterator_tag, monomer>
// {
// typedef std::iterator<std::bidirectional_iterator_tag, monomer> base_type;
// typedef base_type::reference reference;
// typedef base_type::pointer pointer;
//
// iterator(polymer& list, uint32 index);
// iterator(iterator&& rhs);
// iterator(const iterator& rhs);
// iterator& operator=(const iterator& rhs);
// iterator& operator=(iterator&& rhs);
//
// reference operator*();
// pointer operator->();
//
// iterator& operator++();
// iterator operator++(int);
//
// iterator& operator--();
// iterator operator--(int);
//
// bool operator==(const iterator& rhs) const;
// bool operator!=(const iterator& rhs) const;
// };
//
// iterator begin();
// iterator end();
//
// private:
// polymer_entity m_entity;
// std::string m_asym_id;
// std::vector<residue_ptr> m_monomers;
//};
// --------------------------------------------------------------------
// file is a reference to the data stored in e.g. the cif file.
// This object is not copyable.
class file : public std::enable_shared_from_this<file>
{
public:
file();
file(boost::filesystem::path p);
~file();
file(const file&) = delete;
file& operator=(const file&) = delete;
void load(boost::filesystem::path p);
void save(boost::filesystem::path p);
structure* model(size_t nr = 1);
struct file_impl& impl() const { return *m_impl; }
std::vector<const entity*> entities();
cif::datablock& data();
private:
struct file_impl* m_impl;
};
// --------------------------------------------------------------------
class structure
{
public:
structure(file& p, uint32 model_nr = 1);
structure(const structure&);
structure& operator=(const structure&);
~structure();
file& get_file() const;
atom_view atoms() const;
atom_view waters() const;
atom get_atom_by_id(std::string id) const;
atom get_atom_by_location(point pt, float max_distance) const;
atom get_atom_for_label(const std::string& atom_id, const std::string& asym_id,
const std::string& comp_id, int seq_id, const std::string& alt_id = "");
atom get_atom_for_auth(const std::string& atom_id, const std::string& asym_id,
const std::string& comp_id, int seq_id, const std::string& alt_id = "",
const std::string& pdbx_auth_ins_code = "");
// map between auth and label locations
std::tuple<std::string,int,std::string> MapAuthToLabel(const std::string& asym_id,
const std::string& seq_id, const std::string& comp_id, const std::string& ins_code = "");
std::tuple<std::string,std::string,std::string,std::string> MapLabelToAuth(
const std::string& asym_id, int seq_id, const std::string& comp_id);
// returns chain, seqnr
std::tuple<std::string,std::string> MapLabelToAuth(
const std::string& asym_id, int seq_id);
// returns chain,seqnr,comp,iCode
std::tuple<std::string,int,std::string,std::string> MapLabelToPDB(
const std::string& asym_id, int seq_id, const std::string& comp_id);
std::tuple<std::string,int,std::string,std::string> MapPDBToLabel(
const std::string& asym_id, int seq_id, const std::string& comp_id, const std::string& iCode);
// Actions
void remove_atom(atom& a);
private:
friend class action;
struct structure_impl* m_impl;
};
}
// Lib for working with structures as contained in mmCIF and PDB files
#include "libcif/atom_type.h"
#include "libcif/cif++.h"
#include "cif++/AtomType.h"
#include "cif++/Cif++.h"
using namespace std;
......@@ -10,7 +10,7 @@ namespace libcif
const float kNA = nan("1");
const atom_type_info kKnownAtoms[] =
const AtomTypeInfo kKnownAtoms[] =
{
{ Nn, "Unknown", "Nn", 0, false, { kNA, kNA, kNA, kNA, kNA, kNA, kNA } },
{ H, "Hydro­gen", "H", 1.008, false, { 53, 25, 37, 32, kNA, kNA, 120 } },
......@@ -133,35 +133,35 @@ const atom_type_info kKnownAtoms[] =
{ Lr, "Lawren­cium", "Lr", 266, true, { kNA, kNA, kNA, 161, 141, kNA, kNA } }
};
uint32 kKnownAtomsCount = sizeof(kKnownAtoms) / sizeof(atom_type_info);
uint32 kKnownAtomsCount = sizeof(kKnownAtoms) / sizeof(AtomTypeInfo);
// --------------------------------------------------------------------
// atom_type_traits
// AtomTypeTraits
atom_type_traits::atom_type_traits(const string& symbol)
: m_info(nullptr)
AtomTypeTraits::AtomTypeTraits(const string& symbol)
: mInfo(nullptr)
{
for (auto& i: kKnownAtoms)
{
if (cif::iequals(i.symbol, symbol))
{
m_info = &i;
mInfo = &i;
break;
}
}
if (m_info == nullptr)
if (mInfo == nullptr)
throw invalid_argument("Not a known element: " + symbol);
}
atom_type_traits::atom_type_traits(atom_type t)
AtomTypeTraits::AtomTypeTraits(AtomType t)
{
if (t < H or t > Lr)
throw invalid_argument("atom_type out of range");
m_info = &kKnownAtoms[t];
throw invalid_argument("atomType out of range");
mInfo = &kKnownAtoms[t];
}
bool atom_type_traits::is_element(const string& symbol)
bool AtomTypeTraits::isElement(const string& symbol)
{
bool result = false;
......@@ -177,7 +177,7 @@ bool atom_type_traits::is_element(const string& symbol)
return result;
}
bool atom_type_traits::is_metal(const std::string& symbol)
bool AtomTypeTraits::isMetal(const string& symbol)
{
bool result = false;
......
// cif parsing library
#include <cassert>
#include <stack>
#include <tuple>
#include <regex>
#include <set>
#include <unordered_map>
#include <boost/algorithm/string.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/fstream.hpp>
#if defined(USE_RSRC)
#include "cif++/mrsrc.h"
#endif
#include "cif++/Cif++.h"
#include "cif++/CifParser.h"
#include "cif++/CifValidator.h"
#include "cif++/CifUtils.h"
using namespace std;
namespace ba = boost::algorithm;
namespace fs = boost::filesystem;
extern int VERBOSE;
namespace cif
{
static const char* kEmptyResult = "";
// --------------------------------------------------------------------
// most internal data structures are stored as linked lists
// Item values are stored in a simple struct. They should be const anyway
struct ItemValue
{
ItemValue* mNext;
uint32 mColumnIndex;
char mText[0];
ItemValue(const char* v, uint32 columnIndex);
~ItemValue();
void* operator new(size_t size, size_t dataSize);
void operator delete(void* p);
};
// --------------------------------------------------------------------
ItemValue::ItemValue(const char* value, uint32 columnIndex)
: mNext(nullptr), mColumnIndex(columnIndex)
{
strcpy(mText, value);
}
ItemValue::~ItemValue()
{
// remove recursion (and be paranoid)
while (mNext != nullptr and mNext != this)
{
auto n = mNext;
mNext = n->mNext;
n->mNext = nullptr;
delete n;
}
}
void* ItemValue::operator new(size_t size, size_t dataSize)
{
return malloc(size + dataSize + 1);
}
void ItemValue::operator delete(void* p)
{
free(p);
}
// --------------------------------------------------------------------
// itemColumn contains info about a column or field in a Category
struct ItemColumn
{
string mName; // store lower-case, for optimization
const ValidateItem* mValidator;
};
// itemRow contains the actual values for a Row in a Category
struct ItemRow
{
~ItemRow();
void drop(uint32 columnIx);
const char* c_str(uint32 columnIx) const;
string str() const
{
stringstream s;
s << '{';
for (auto v = mValues; v != nullptr; v = v->mNext)
{
s << mCategory->getColumnName(v->mColumnIndex)
<< ':'
<< v->mText;
if (v->mNext != nullptr)
s << ", ";
}
s << '}';
return s.str();
}
ItemRow* mNext;
Category* mCategory;
ItemValue* mValues;
};
ostream& operator<<(ostream& os, const ItemRow& r)
{
os << r.mCategory->name() << '[';
for (auto iv = r.mValues; iv != nullptr; iv = iv->mNext)
{
os << iv->mText;
if (iv->mNext)
os << ',';
}
os << ']';
return os;
}
// --------------------------------------------------------------------
ItemRow::~ItemRow()
{
// remove recursive
while (mNext != nullptr and mNext != this)
{
auto n = mNext;
mNext = n->mNext;
n->mNext = nullptr;
delete n;
}
delete mValues;
}
void ItemRow::drop(uint32 columnIx)
{
if (mValues != nullptr and mValues->mColumnIndex == columnIx)
{
auto v = mValues;
mValues = mValues->mNext;
v->mNext = nullptr;
delete v;
}
else
{
for (auto v = mValues; v->mNext != nullptr; v = v->mNext)
{
if (v->mNext->mColumnIndex == columnIx)
{
auto vn = v->mNext;
v->mNext = vn->mNext;
vn->mNext = nullptr;
delete vn;
break;
}
}
}
#if DEBUG
for (auto iv = mValues; iv != nullptr; iv = iv->mNext)
assert(iv != iv->mNext and (iv->mNext == nullptr or iv != iv->mNext->mNext));
#endif
}
const char* ItemRow::c_str(uint32 columnIx) const
{
const char* result = kEmptyResult;
for (auto v = mValues; v != nullptr; v = v->mNext)
{
if (v->mColumnIndex == columnIx)
{
result = v->mText;
break;
}
}
return result;
}
// --------------------------------------------------------------------
namespace detail
{
template<>
ItemReference& ItemReference::operator=(const string& value)
{
Row(mRow).assign(mName, value, false);
return *this;
}
const char*
ItemReference::c_str() const
{
const char* result = kEmptyResult;
if (mRow != nullptr /* and mRow->mCategory != nullptr*/)
{
// assert(mRow->mCategory);
auto cix = mRow->mCategory->getColumnIndex(mName);
for (auto iv = mRow->mValues; iv != nullptr; iv = iv->mNext)
{
if (iv->mColumnIndex == cix)
{
if (iv->mText[0] != '.' or iv->mText[1] != 0)
result = iv->mText;
break;
}
}
}
return result;
}
bool ItemReference::empty() const
{
return c_str() == kEmptyResult;
}
}
// --------------------------------------------------------------------
// Datablock implementation
Datablock::Datablock(const string& name)
: mName(name), mValidator(nullptr), mNext(nullptr)
{
}
Datablock::~Datablock()
{
delete mNext;
}
string Datablock::firstItem(const string& tag) const
{
string result;
string catName, itemName;
std::tie(catName, itemName) = splitTagName(tag);
for (auto& cat: mCategories)
{
if (iequals(cat.name(), catName))
{
result = cat.getFirstItem(itemName.c_str()).as<string>();
break;
}
}
return result;
}
auto Datablock::emplace(const string& name) -> tuple<iterator,bool>
{
bool isNew = false;
iterator i = find_if(begin(), end(), [name](const Category& cat) -> bool
{ return iequals(cat.name(), name); });
if (i == end())
{
isNew = true;
i = mCategories.emplace(end(), *this, name, mValidator);
}
return make_tuple(i, isNew);
}
Category& Datablock::operator[](const string& name)
{
iterator i;
std::tie(i, ignore) = emplace(name);
return *i;
}
Category* Datablock::get(const string& name)
{
auto i = find_if(begin(), end(), [name](const Category& cat) -> bool
{ return iequals(cat.name(), name); });
return i == end() ? nullptr : &*i;
}
void Datablock::validate()
{
if (mValidator == nullptr)
throw runtime_error("Validator not specified");
for (auto& cat: *this)
cat.validate();
}
void Datablock::setValidator(Validator* v)
{
mValidator = v;
for (auto& cat: *this)
cat.setValidator(v);
}
void Datablock::getTagOrder(vector<string>& tags) const
{
for (auto& cat: *this)
cat.getTagOrder(tags);
}
void Datablock::write(ostream& os)
{
os << "data_" << mName << endl
<< "# " << endl;
// mmcif support, sort of. First write the 'entry' Category
// and if it exists, _AND_ we have a Validator, write out the
// audit_conform record.
for (auto& cat: mCategories)
{
if (cat.name() == "entry")
{
cat.write(os);
if (mValidator != nullptr)
{
Category auditConform(*this, "audit_conform", nullptr);
auditConform.emplace({
{ "dict_name", mValidator->dictName() },
{ "dict_version", mValidator->dictVersion() }
});
auditConform.write(os);
}
break;
}
}
for (auto& cat: mCategories)
{
if (cat.name() != "entry" and cat.name() != "audit_conform")
cat.write(os);
}
}
void Datablock::write(ostream& os, const vector<string>& order)
{
os << "data_" << mName << endl
<< "# " << endl;
vector<string> catOrder;
for (auto& o: order)
{
string cat, Item;
std::tie(cat, Item) = splitTagName(o);
if (find_if(catOrder.rbegin(), catOrder.rend(), [cat](const string& s) -> bool { return iequals(cat, s); }) == catOrder.rend())
catOrder.push_back(cat);
}
for (auto& c: catOrder)
{
auto cat = get(c);
if (cat == nullptr)
continue;
vector<string> items;
for (auto& o: order)
{
string catName, Item;
std::tie(catName, Item) = splitTagName(o);
if (catName == c)
items.push_back(Item);
}
cat->write(os, items);
}
// for any Category we missed in the catOrder
for (auto& cat: mCategories)
{
if (find_if(catOrder.begin(), catOrder.end(), [&](const string& s) -> bool { return iequals(cat.name(), s); }) != catOrder.end())
continue;
cat.write(os);
}
// // mmcif support, sort of. First write the 'entry' Category
// // and if it exists, _AND_ we have a Validator, write out the
// // auditConform record.
//
// for (auto& cat: mCategories)
// {
// if (cat.name() == "entry")
// {
// cat.write(os);
//
// if (mValidator != nullptr)
// {
// Category auditConform(*this, "audit_conform", nullptr);
// auditConform.emplace({
// { "dict_name", mValidator->dictName() },
// { "dict_version", mValidator->dictVersion() }
// });
// auditConform.write(os);
// }
//
// break;
// }
// }
//
// for (auto& cat: mCategories)
// {
// if (cat.name() != "entry" and cat.name() != "audit_conform")
// cat.write(os);
// }
}
// --------------------------------------------------------------------
//
// class to compare two rows based on their keys.
class RowComparator
{
public:
RowComparator(Category* cat)
: RowComparator(cat, cat->getCatValidator()->mKeys.begin(), cat->getCatValidator()->mKeys.end())
{
}
template<typename KeyIter>
RowComparator(Category* cat, KeyIter b, KeyIter e);
int operator()(const ItemRow* a, const ItemRow* b) const;
int operator()(const Row& a, const Row& b) const
{
return operator()(a.mData, b.mData);
}
private:
typedef function<int(const char*,const char*)> compareFunc;
typedef tuple<size_t,compareFunc> keyComp;
vector<keyComp> mComp;
};
template<typename KeyIter>
RowComparator::RowComparator(Category* cat, KeyIter b, KeyIter e)
{
auto cv = cat->getCatValidator();
for (auto ki = b; ki != e; ++ki)
{
string k = *ki;
size_t ix = cat->getColumnIndex(k);
auto iv = cv->getValidatorForItem(k);
if (iv == nullptr)
throw runtime_error("Incomplete dictionary, no Item Validator for Key " + k);
auto tv = iv->mType;
if (tv == nullptr)
throw runtime_error("Incomplete dictionary, no type Validator for Item " + k);
using namespace placeholders;
mComp.emplace_back(ix, bind(&ValidateType::compare, tv, _1, _2));
}
}
int RowComparator::operator()(const ItemRow* a, const ItemRow* b) const
{
assert(a);
assert(b);
int d = 0;
for (auto& c: mComp)
{
size_t k;
compareFunc f;
std::tie(k, f) = c;
const char* ka = a->c_str(k);
const char* kb = b->c_str(k);
d = f(ka, kb);
if (d != 0)
break;
}
return d;
}
// --------------------------------------------------------------------
//
// class to keep an index on the keys of a Category. This is a red/black
// tree implementation.
class catIndex
{
public:
catIndex(Category* cat);
~catIndex();
ItemRow* find(ItemRow* k) const;
void insert(ItemRow* r);
void erase(ItemRow* r);
// batch create
void reconstruct();
// reorder the ItemRow's and returns new head and tail
tuple<ItemRow*,ItemRow*> reorder()
{
tuple<ItemRow*,ItemRow*> result = make_tuple(nullptr, nullptr);
if (mRoot != nullptr)
{
entry* head = findMin(mRoot);
entry* tail = reorder(mRoot);
tail->mRow->mNext = nullptr;
result = make_tuple(head->mRow, tail->mRow);
}
return result;
}
size_t size() const;
void validate() const;
private:
struct entry
{
entry(ItemRow* r)
: mRow(r), mLeft(nullptr), mRight(nullptr), mRed(true) {}
~entry()
{
delete mLeft;
delete mRight;
}
ItemRow* mRow;
entry* mLeft;
entry* mRight;
bool mRed;
};
entry* insert(entry* h, ItemRow* v);
entry* erase(entry* h, ItemRow* k);
void validate(entry* h, bool isParentRed, uint32 blackDepth, uint32& minBlack, uint32& maxBlack) const;
entry* rotateLeft(entry* h)
{
entry* x = h->mRight;
h->mRight = x->mLeft;
x->mLeft = h;
x->mRed = h->mRed;
h->mRed = true;
return x;
}
entry* rotateRight(entry* h)
{
entry* x = h->mLeft;
h->mLeft = x->mRight;
x->mRight = h;
x->mRed = h->mRed;
h->mRed = true;
return x;
}
void flipColour(entry* h)
{
h->mRed = not h->mRed;
if (h->mLeft != nullptr)
h->mLeft->mRed = not h->mLeft->mRed;
if (h->mRight != nullptr)
h->mRight->mRed = not h->mRight->mRed;
}
bool isRed(entry* h) const
{
return h != nullptr and h->mRed;
}
entry* moveRedLeft(entry* h)
{
flipColour(h);
if (h->mRight != nullptr and isRed(h->mRight->mLeft))
{
h->mRight = rotateRight(h->mRight);
h = rotateLeft(h);
flipColour(h);
}
return h;
}
entry* moveRedRight(entry* h)
{
flipColour(h);
if (h->mLeft != nullptr and isRed(h->mLeft->mLeft))
{
h = rotateRight(h);
flipColour(h);
}
return h;
}
entry* fixUp(entry* h)
{
if (isRed(h->mRight))
h = rotateLeft(h);
if (isRed(h->mLeft) and isRed(h->mLeft->mLeft))
h = rotateRight(h);
if (isRed(h->mLeft) and isRed(h->mRight))
flipColour(h);
return h;
}
entry* findMin(entry* h)
{
while (h->mLeft != nullptr)
h = h->mLeft;
return h;
}
entry* eraseMin(entry* h)
{
if (h->mLeft == nullptr)
{
delete h;
h = nullptr;
}
else
{
if (not isRed(h->mLeft) and not isRed(h->mLeft->mLeft))
h = moveRedLeft(h);
h->mLeft = eraseMin(h->mLeft);
h = fixUp(h);
}
return h;
}
// Fix mNext fields for rows in order of this index
entry* reorder(entry* e)
{
auto result = e;
if (e->mLeft != nullptr)
{
auto l = reorder(e->mLeft);
l->mRow->mNext = e->mRow;
}
if (e->mRight != nullptr)
{
auto mr = findMin(e->mRight);
e->mRow->mNext = mr->mRow;
result = reorder(e->mRight);
}
return result;
}
Category& mCat;
RowComparator mComp;
entry* mRoot;
};
catIndex::catIndex(Category* cat)
: mCat(*cat), mComp(cat), mRoot(nullptr)
{
}
catIndex::~catIndex()
{
delete mRoot;
}
ItemRow* catIndex::find(ItemRow* k) const
{
const entry* r = mRoot;
while (r != nullptr)
{
int d = mComp(k, r->mRow);
if (d < 0)
r = r->mLeft;
else if (d > 0)
r = r->mRight;
else
break;
}
return r ? r->mRow : nullptr;
}
void catIndex::insert(ItemRow* k)
{
mRoot = insert(mRoot, k);
mRoot->mRed = false;
}
catIndex::entry* catIndex::insert(entry* h, ItemRow* v)
{
if (h == nullptr)
return new entry(v);
int d = mComp(v, h->mRow);
if (d < 0) h->mLeft = insert(h->mLeft, v);
else if (d > 0) h->mRight = insert(h->mRight, v);
else
throw runtime_error("Duplicate Key violation, cat: " + mCat.name() + " values: " + v->str());
if (isRed(h->mRight) and not isRed(h->mLeft))
h = rotateLeft(h);
if (isRed(h->mLeft) and isRed(h->mLeft->mLeft))
h = rotateRight(h);
if (isRed(h->mLeft) and isRed(h->mRight))
flipColour(h);
return h;
}
void catIndex::erase(ItemRow* k)
{
mRoot = erase(mRoot, k);
if (mRoot != nullptr)
mRoot->mRed = false;
}
catIndex::entry* catIndex::erase(entry* h, ItemRow* k)
{
if (mComp(k, h->mRow) < 0)
{
if (h->mLeft != nullptr)
{
if (not isRed(h->mLeft) and not isRed(h->mLeft->mLeft))
h = moveRedLeft(h);
h->mLeft = erase(h->mLeft, k);
}
}
else
{
if (isRed(h->mLeft))
h = rotateRight(h);
if (mComp(k, h->mRow) == 0 and h->mRight == nullptr)
{
delete h;
return nullptr;
}
if (h->mRight != nullptr)
{
if (not isRed(h->mRight) and not isRed(h->mRight->mLeft))
h = moveRedRight(h);
if (mComp(k, h->mRow) == 0)
{
h->mRow = findMin(h->mRight)->mRow;
h->mRight = eraseMin(h->mRight);
}
else
h->mRight = erase(h->mRight, k);
}
}
return fixUp(h);
}
void catIndex::reconstruct()
{
delete mRoot;
mRoot = nullptr;
for (auto r: mCat)
insert(r.mData);
// maybe reconstruction can be done quicker by using the following commented code.
// however, I've not had the time to think of a way to set the red/black flag correctly in that case.
// vector<ItemRow*> rows;
// transform(mCat.begin(), mCat.end(), backInserter(rows),
// [](Row r) -> ItemRow* { assert(r.mData); return r.mData; });
//
// assert(std::find(rows.begin(), rows.end(), nullptr) == rows.end());
//
// // don't use sort here, it will run out of the stack of something.
// // quicksort is notorious for using excessive recursion.
// // Besides, most of the time, the data is ordered already anyway.
//
// stable_sort(rows.begin(), rows.end(), [this](ItemRow* a, ItemRow* b) -> bool { return this->mComp(a, b) < 0; });
//
// for (size_t i = 0; i < rows.size() - 1; ++i)
// assert(mComp(rows[i], rows[i + 1]) < 0);
//
// deque<entry*> e;
// transform(rows.begin(), rows.end(), back_inserter(e),
// [](ItemRow* r) -> entry* { return new entry(r); });
//
// while (e.size() > 1)
// {
// deque<entry*> ne;
//
// while (not e.empty())
// {
// entry* a = e.front();
// e.pop_front();
//
// if (e.empty())
// ne.push_back(a);
// else
// {
// entry* b = e.front();
// b->mLeft = a;
//
// assert(mComp(a->mRow, b->mRow) < 0);
//
// e.pop_front();
//
// if (not e.empty())
// {
// entry* c = e.front();
// e.pop_front();
//
// assert(mComp(b->mRow, c->mRow) < 0);
//
// b->mRight = c;
// }
//
// ne.push_back(b);
//
// if (not e.empty())
// {
// ne.push_back(e.front());
// e.pop_front();
// }
// }
// }
//
// swap (e, ne);
// }
//
// assert(e.size() == 1);
// mRoot = e.front();
}
size_t catIndex::size() const
{
stack<entry*> s;
s.push(mRoot);
size_t result = 0;
while (not s.empty())
{
entry* e = s.top();
s.pop();
if (e == nullptr)
continue;
++result;
s.push(e->mLeft);
s.push(e->mRight);
}
return result;
}
void catIndex::validate() const
{
if (mRoot != nullptr)
{
uint32 minBlack = numeric_limits<uint32>::max();
uint32 maxBlack = 0;
assert(not mRoot->mRed);
validate(mRoot, false, 0, minBlack, maxBlack);
assert(minBlack == maxBlack);
}
}
void catIndex::validate(entry* h, bool isParentRed, uint32 blackDepth, uint32& minBlack, uint32& maxBlack) const
{
if (h->mRed)
assert(not isParentRed);
else
++blackDepth;
if (isParentRed)
assert(not h->mRed);
if (h->mLeft != nullptr and h->mRight != nullptr)
{
if (isRed(h->mLeft))
assert(not isRed(h->mRight));
if (isRed(h->mRight))
assert(not isRed(h->mLeft));
}
if (h->mLeft != nullptr)
{
assert(mComp(h->mLeft->mRow, h->mRow) < 0);
validate(h->mLeft, h->mRed, blackDepth, minBlack, maxBlack);
}
else
{
if (minBlack > blackDepth)
minBlack = blackDepth;
if (maxBlack < blackDepth)
maxBlack = blackDepth;
}
if (h->mRight != nullptr)
{
assert(mComp(h->mRight->mRow, h->mRow) > 0);
validate(h->mRight, h->mRight, blackDepth, minBlack, maxBlack);
}
else
{
if (minBlack > blackDepth)
minBlack = blackDepth;
if (maxBlack < blackDepth)
maxBlack = blackDepth;
}
}
// --------------------------------------------------------------------
RowSet::RowSet(Category& cat)
: mCat(cat)
{
}
RowSet& RowSet::orderBy(initializer_list<string> items)
{
RowComparator c(&mCat, items.begin(), items.end());
stable_sort(begin(), end(), c);
return *this;
}
// --------------------------------------------------------------------
Category::Category(Datablock& db, const string& name, Validator* Validator)
: mDb(db), mName(name), mValidator(Validator)
, mHead(nullptr), mTail(nullptr), mIndex(nullptr)
{
if (mName.empty())
throw ValidationError("invalid empty name for Category");
if (mValidator != nullptr)
{
mCatValidator = mValidator->getValidatorForCategory(mName);
if (mCatValidator != nullptr)
{
// make sure all required columns are added
for (auto& k: mCatValidator->mKeys)
addColumn(k);
for (auto& k: mCatValidator->mMandatoryFields)
addColumn(k);
mIndex = new catIndex(this);
}
}
}
Category::~Category()
{
delete mHead;
delete mIndex;
}
void Category::setValidator(Validator* v)
{
mValidator = v;
if (mIndex != nullptr)
{
delete mIndex;
mIndex = nullptr;
}
if (mValidator != nullptr)
{
mCatValidator = mValidator->getValidatorForCategory(mName);
if (mCatValidator != nullptr)
{
mIndex = new catIndex(this);
mIndex->reconstruct();
#if DEBUG
assert(mIndex->size() == size());
mIndex->validate();
#endif
}
}
else
mCatValidator = nullptr;
}
size_t Category::getColumnIndex(const string& name) const
{
size_t result;
for (result = 0; result < mColumns.size(); ++result)
{
if (iequals(name, mColumns[result].mName))
break;
}
return result;
}
const string& Category::getColumnName(size_t columnIx) const
{
return mColumns.at(columnIx).mName;
}
size_t Category::addColumn(const string& name)
{
size_t result = getColumnIndex(name);
if (result == mColumns.size())
{
const ValidateItem* itemValidator = nullptr;
if (mCatValidator != nullptr)
{
itemValidator = mCatValidator->getValidatorForItem(name);
if (itemValidator == nullptr)
mValidator->reportError("tag " + name + " not allowed in Category " + mName);
}
mColumns.push_back({name, itemValidator});
}
return result;
}
void Category::reorderByIndex()
{
if (mIndex != nullptr)
std::tie(mHead, mTail) = mIndex->reorder();
}
size_t Category::size() const
{
size_t result = 0;
for (auto pi = mHead; pi != nullptr; pi = pi->mNext)
++result;
return result;
}
bool Category::empty() const
{
return mHead == nullptr or mHead->mValues == nullptr;
}
void Category::drop(const string& field)
{
using namespace placeholders;
auto ci = find_if(mColumns.begin(), mColumns.end(),
[field](ItemColumn& c) -> bool { return iequals(c.mName, field); });
if (ci != mColumns.end())
{
uint32 columnIx = ci - mColumns.begin();
for (auto pi = mHead; pi != nullptr; pi = pi->mNext)
pi->drop(columnIx);
mColumns.erase(ci);
}
}
Row Category::operator[](Condition&& cond)
{
Row result;
for (auto r: *this)
{
if (cond(*this, r))
{
result = r;
break;
}
}
return result;
}
RowSet Category::find(Condition&& cond)
{
RowSet result(*this);
for (auto r: *this)
{
if (cond(*this, r))
result.push_back(r);
}
return result;
}
bool Category::exists(Condition&& cond)
{
bool result = false;
for (auto r: *this)
{
if (cond(*this, r))
{
result = true;
break;
}
}
return result;
}
RowSet Category::orderBy(std::initializer_list<string> items)
{
RowSet result(*this);
result.insert(result.begin(), begin(), end());
return result.orderBy(items);
}
void Category::clear()
{
delete mHead;
mHead = mTail = nullptr;
if (mIndex != nullptr)
{
delete mIndex;
mIndex = new catIndex(this);
}
}
template<class Iter>
tuple<Row,bool> Category::emplace(Iter b, Iter e)
{
// First, make sure all mandatory fields are supplied
tuple<Row,bool> result = make_tuple(Row(), true);
if (mCatValidator != nullptr and b != e)
{
for (auto& col: mColumns)
{
auto iv = mCatValidator->getValidatorForItem(col.mName);
if (iv == nullptr)
continue;
bool seen = false;
for (auto v = b; v != e; ++v)
{
if (iequals(v->name(), col.mName))
{
seen = true;
break;
}
}
if (not seen and iv->mMandatory)
throw runtime_error("missing mandatory field " + col.mName + " for Category " + mName);
}
if (mIndex != nullptr)
{
unique_ptr<ItemRow> nr(new ItemRow{nullptr, this, nullptr});
Row r(nr.get());
auto keys = keyFields();
for (auto v = b; v != e; ++v)
{
if (keys.count(v->name()))
r.assign(v->name(), v->value(), true);
}
auto test = mIndex->find(nr.get());
if (test != nullptr)
{
if (VERBOSE > 1)
cerr << "Not inserting new record in " << mName << " (duplicate Key)" << endl;
result = make_tuple(Row(test), false);
}
}
}
if (get<1>(result))
{
auto nr = new ItemRow{nullptr, this, nullptr};
if (mHead == nullptr)
{
assert(mTail == nullptr);
mHead = mTail = nr;
}
else
{
assert(mTail != nullptr);
assert(mHead != nullptr);
mTail->mNext = nr;
mTail = nr;
}
Row r(nr);
for (auto v = b; v != e; ++v)
r.assign(*v, true);
get<0>(result) = r;
if (mIndex != nullptr)
mIndex->insert(nr);
}
return result;
}
tuple<Row,bool> Category::emplace(Row r)
{
return emplace(r.begin(), r.end());
}
void Category::erase(Condition&& cond)
{
RowSet remove(*this);
for (auto r: *this)
{
if (cond(*this, r))
remove.push_back(r);
}
for (auto r: remove)
erase(r);
}
void Category::erase(iterator p)
{
erase(*p);
}
void Category::erase(Row r)
{
iset keys;
if (mCatValidator)
keys = iset(mCatValidator->mKeys.begin(), mCatValidator->mKeys.end());
for (auto& col: mColumns)
{
auto iv = col.mValidator;
if (iv == nullptr or iv->mChildren.empty())
continue;
if (not keys.count(col.mName))
continue;
const char* value = r[col.mName].c_str();
for (auto child: iv->mChildren)
{
if (child->mCategory == nullptr)
continue;
auto childCat = mDb.get(child->mCategory->mName);
if (childCat == nullptr)
continue;
auto rows = childCat->find(Key(child->mTag) == value);
for (auto& cr: rows)
childCat->erase(cr);
}
}
if (mHead == nullptr)
throw runtime_error("erase");
if (mIndex != nullptr)
mIndex->erase(r.mData);
if (r == mHead)
{
mHead = mHead->mNext;
r.mData->mNext = nullptr;
delete r.mData;
}
else
{
for (auto pi = mHead; pi != nullptr; pi = pi->mNext)
{
if (pi->mNext == r.mData)
{
pi->mNext = r.mData->mNext;
r.mData->mNext = nullptr;
delete r.mData;
break;
}
}
}
}
void Category::getTagOrder(vector<string>& tags) const
{
for (auto& c: mColumns)
tags.push_back("_" + mName + "." + c.mName);
}
const detail::ItemReference Category::getFirstItem(const char* itemName) const
{
return detail::ItemReference{itemName, mHead};
}
Category::iterator Category::begin()
{
return iterator(mHead);
}
Category::iterator Category::end()
{
return iterator(nullptr);
}
void Category::validate()
{
if (mValidator == nullptr)
throw runtime_error("no Validator specified");
if (empty())
{
if (VERBOSE > 2)
cerr << "Skipping validation of empty Category " << mName << endl;
return;
}
if (mCatValidator == nullptr)
{
mValidator->reportError("undefined Category " + mName);
return;
}
auto mandatory = mCatValidator->mMandatoryFields;
for (auto& col: mColumns)
{
auto iv = mCatValidator->getValidatorForItem(col.mName);
if (iv == nullptr)
mValidator->reportError("Field " + col.mName + " is not valid in Category " + mName);
col.mValidator = iv;
mandatory.erase(col.mName);
}
if (not mandatory.empty())
mValidator->reportError("In Category " + mName + " the following mandatory fields are missing: " + ba::join(mandatory, ", "));
// check index?
if (mIndex)
{
#if not defined(NDEBUG)
mIndex->validate();
for (auto r: *this)
{
if (mIndex->find(r.mData) != r.mData)
mValidator->reportError("Key not found in index for Category " + mName);
}
#endif
}
// validate all values
mandatory = mCatValidator->mMandatoryFields;
for (auto ri = mHead; ri != nullptr; ri = ri->mNext)
{
for (size_t cix = 0; cix < mColumns.size(); ++cix)
{
bool seen = false;
auto iv = mColumns[cix].mValidator;
if (iv == nullptr)
{
mValidator->reportError("invalid field " + mColumns[cix].mName + " for Category " + mName);
continue;
}
for (auto vi = ri->mValues; vi != nullptr; vi = vi->mNext)
{
if (vi->mColumnIndex == cix)
{
seen = true;
(*iv)(vi->mText);
}
}
if (seen)
continue;
if (iv != nullptr and iv->mMandatory)
mValidator->reportError("missing mandatory field " + mColumns[cix].mName + " for Category " + mName);
}
}
}
const Validator& Category::getValidator() const
{
if (mValidator == nullptr)
throw runtime_error("no Validator defined yet");
return *mValidator;
}
iset Category::fields() const
{
if (mValidator == nullptr)
throw runtime_error("No Validator specified");
if (mCatValidator == nullptr)
mValidator->reportError("undefined Category");
iset result;
for (auto& iv: mCatValidator->mItemValidators)
result.insert(iv.mTag);
return result;
}
iset Category::mandatoryFields() const
{
if (mValidator == nullptr)
throw runtime_error("No Validator specified");
if (mCatValidator == nullptr)
mValidator->reportError("undefined Category");
return mCatValidator->mMandatoryFields;
}
iset Category::keyFields() const
{
if (mValidator == nullptr)
throw runtime_error("No Validator specified");
if (mCatValidator == nullptr)
mValidator->reportError("undefined Category");
return iset{ mCatValidator->mKeys.begin(), mCatValidator->mKeys.end() };
}
auto Category::iterator::operator++() -> iterator&
{
mCurrent = Row(mCurrent.data()->mNext);
return *this;
}
namespace detail
{
size_t writeValue(ostream& os, string value, size_t offset, size_t width)
{
if (value.find('\n') != string::npos or width == 0 or value.length() >= 132) // write as text field
{
ba::replace_all(value, "\n;", "\n\\;");
if (offset > 0)
os << endl;
os << ';' << value;
if (not ba::ends_with(value, "\n"))
os << endl;
os << ';' << endl;
offset = 0;
}
else if (isUnquotedString(value.c_str()))
{
os << value;
if (value.length() < width)
{
os << string(width - value.length(), ' ');
offset += width;
}
else
{
os << ' ';
offset += value.length() + 1;
}
}
else
{
bool done = false;
for (char q: { '\'', '"'})
{
auto p = value.find(q); // see if we can use the quote character
while (p != string::npos and isNonBlank(value[p + 1]) and value[p + 1] != q)
p = value.find(q, p + 1);
if (p != string::npos)
continue;
os << q << value << q;
if (value.length() + 2 < width)
{
os << string(width - value.length() - 2, ' ');
offset += width;
}
else
{
os << ' ';
offset += value.length() + 1;
}
done = true;
break;
}
if (not done)
{
if (offset > 0)
os << endl;
os << ';' << value << endl
<< ';' << endl;
offset = 0;
}
}
return offset;
}
}
void Category::write(ostream& os, const vector<int>& order, bool includeEmptyColumns)
{
if (empty())
return;
// If the first Row has a next, we need a loop_
bool needLoop = (mHead->mNext != nullptr);
if (needLoop)
{
os << "loop_" << endl;
vector<size_t> columnWidths;
for (auto cix: order)
{
auto& col = mColumns[cix];
os << '_' << mName << '.' << col.mName << ' ' << endl;
columnWidths.push_back(2);
}
for (auto Row = mHead; Row != nullptr; Row = Row->mNext)
{
for (auto v = Row->mValues; v != nullptr; v = v->mNext)
{
if (strchr(v->mText, '\n') == nullptr)
{
size_t l = strlen(v->mText);
if (not isUnquotedString(v->mText))
l += 2;
if (l >= 132)
continue;
if (columnWidths[v->mColumnIndex] < l + 1)
columnWidths[v->mColumnIndex] = l + 1;
}
}
}
for (auto Row = mHead; Row != nullptr; Row = Row->mNext) // loop over rows
{
size_t offset = 0;
for (size_t cix: order)
{
size_t w = columnWidths[cix];
string s;
for (auto iv = Row->mValues; iv != nullptr; iv = iv->mNext)
{
if (iv->mColumnIndex == cix)
{
s = iv->mText;
break;
}
}
if (s.empty())
s = "?";
size_t l = s.length();
if (not isUnquotedString(s.c_str()))
l += 2;
if (l < w)
l = w;
if (offset + l >= 132 and offset > 0)
{
os << endl;
offset = 0;
}
offset = detail::writeValue(os, s, offset, w);
if (offset >= 132)
{
os << endl;
offset = 0;
}
}
if (offset > 0)
os << endl;
}
}
else
{
// first find the indent level
size_t l = 0;
for (auto& col: mColumns)
{
string tag = '_' + mName + '.' + col.mName;
if (l < tag.length())
l = tag.length();
}
l += 3;
for (size_t cix: order)
{
auto& col = mColumns[cix];
os << '_' << mName << '.' << col.mName << string(l - col.mName.length() - mName.length() - 2, ' ');
string s;
for (auto iv = mHead->mValues; iv != nullptr; iv = iv->mNext)
{
if (iv->mColumnIndex == cix)
{
s = iv->mText;
break;
}
}
if (s.empty())
s = "?";
size_t offset = l;
if (s.length() + l >= kMaxLineLength)
{
os << endl;
offset = 0;
}
if (detail::writeValue(os, s, offset, 1) != 0)
os << endl;
}
}
os << "# " << endl;
}
void Category::write(ostream& os)
{
vector<int> order(mColumns.size());
iota(order.begin(), order.end(), 0);
write(os, order, false);
}
void Category::write(ostream& os, const vector<string>& columns)
{
// make sure all columns are present
for (auto& c: columns)
addColumn(c);
vector<int> order;
order.reserve(mColumns.size());
for (auto& c: columns)
order.push_back(getColumnIndex(c));
for (size_t i = 0; i < mColumns.size(); ++i)
{
if (std::find(order.begin(), order.end(), i) == order.end())
order.push_back(i);
}
write(os, order, true);
}
// --------------------------------------------------------------------
Row::Row(const Row& rhs)
: mData(rhs.mData)
{
}
Row& Row::operator=(const Row& rhs)
{
mData = rhs.mData;
return *this;
}
void Row::assign(const string& name, const string& value, bool emplacing)
{
if (mData == nullptr)
throw logic_error("invalid Row, no data");
auto cat = mData->mCategory;
auto cix = cat->addColumn(name);
auto& col = cat->mColumns[cix];
// auto& db = cat->mDb;
const char* oldValue = nullptr;
for (auto iv = mData->mValues; iv != nullptr; iv = iv->mNext)
{
assert(iv != iv->mNext and (iv->mNext == nullptr or iv != iv->mNext->mNext));
if (iv->mColumnIndex == cix)
{
oldValue = iv->mText;
break;
}
}
if (oldValue != nullptr and value == oldValue) // no need to update
return;
// check the value
if (col.mValidator)
(*col.mValidator)(value);
// If the field is part of the Key for this Category, remove it from the index
// before updating
bool reinsert = false;
if (not emplacing) // an update of an Item's value
{
////#if DEBUG
//// if (VERBOSE)
//// cerr << "reassigning the value of Key field _" << cat->mName << '.' << name << endl;
////#endif
// // see if we need to update any child categories that depend on this value
// auto iv = col.mValidator;
// if (iv != nullptr and not iv->mChildren.empty())
// {
// for (auto child: iv->mChildren)
// {
// if (child->mCategory == nullptr)
// continue;
//
// auto childCat = db.get(child->mCategory->mName);
// if (childCat == nullptr)
// continue;
//
// auto rows = childCat->find(Key(child->mTag) == oldValue);
// for (auto& cr: rows)
// cr.assign(child->mTag, value, false);
// }
// }
if (cat->mIndex != nullptr and cat->keyFields().count(name))
{
reinsert = cat->mIndex->find(mData);
if (reinsert)
cat->mIndex->erase(mData);
}
}
// first remove old value with cix
if (mData->mValues == nullptr)
; // nothing to do
else if (mData->mValues->mColumnIndex == cix)
{
auto iv = mData->mValues;
mData->mValues = iv->mNext;
iv->mNext = nullptr;
delete iv;
}
else
{
for (auto iv = mData->mValues; iv->mNext != nullptr; iv = iv->mNext)
{
if (iv->mNext->mColumnIndex == cix)
{
auto nv = iv->mNext;
iv->mNext = nv->mNext;
nv->mNext = nullptr;
delete nv;
break;
}
}
}
#if DEBUG
for (auto iv = mData->mValues; iv != nullptr; iv = iv->mNext)
assert(iv != iv->mNext and (iv->mNext == nullptr or iv != iv->mNext->mNext));
#endif
if (not value.empty())
{
auto nv = new(value.length()) ItemValue(value.c_str(), cix);
if (mData->mValues == nullptr)
mData->mValues = nv;
else
{
auto iv = mData->mValues;
while (iv->mNext != nullptr)
iv = iv->mNext;
iv->mNext = nv;
}
}
#if DEBUG
for (auto iv = mData->mValues; iv != nullptr; iv = iv->mNext)
assert(iv != iv->mNext and (iv->mNext == nullptr or iv != iv->mNext->mNext));
#endif
if (reinsert)
cat->mIndex->insert(mData);
}
void Row::assign(const Item& value, bool emplacing)
{
assign(value.name(), value.value(), emplacing);
}
bool Row::empty() const
{
return mData == nullptr or mData->mValues == nullptr;
}
auto Row::begin() const -> const_iterator
{
return const_iterator(mData, mData->mValues);
}
auto Row::end() const -> const_iterator
{
return const_iterator(mData, nullptr);
}
Row::const_iterator::const_iterator(ItemRow* data, ItemValue* ptr)
: mData(data), mPtr(ptr)
{
if (mPtr != nullptr)
fetch();
}
Row::const_iterator& Row::const_iterator::operator++()
{
if (mPtr != nullptr)
mPtr = mPtr->mNext;
if (mPtr != nullptr)
fetch();
return *this;
}
void Row::const_iterator::fetch()
{
mCurrent = Item(
mData->mCategory->getColumnName(mPtr->mColumnIndex),
mPtr->mText);
}
// --------------------------------------------------------------------
File::File()
: mHead(nullptr)
, mValidator(nullptr)
{
}
File::File(istream& is, bool validate)
: File()
{
// parser p(is, *this);
// p.parseFile();
load(is);
}
File::File(File&& rhs)
: mHead(nullptr), mValidator(nullptr)
{
swap(mHead, rhs.mHead);
swap(mValidator, rhs.mValidator);
}
File::~File()
{
delete mHead;
delete mValidator;
}
void File::append(Datablock* e)
{
e->setValidator(mValidator);
if (mHead == nullptr)
mHead = e;
else
{
auto ie = mHead;
for (;;)
{
if (iequals(ie->getName(), e->getName()))
throw ValidationError("Datablock " + e->getName() + " already defined in File");
if (ie->mNext == nullptr)
{
ie->mNext = e;
break;
}
ie = ie->mNext;
}
}
}
void File::load(istream& is)
{
Validator* saved = mValidator;
setValidator(nullptr);
Parser p(is, *this);
p.parseFile();
if (saved != nullptr)
{
setValidator(saved);
validate();
}
}
void File::save(ostream& os)
{
Datablock* e = mHead;
while (e != nullptr)
{
e->write(os);
e = e->mNext;
}
}
void File::write(ostream& os, const vector<string>& order)
{
Datablock* e = mHead;
while (e != nullptr)
{
e->write(os, order);
e = e->mNext;
}
}
Datablock& File::operator[](const string& name)
{
Datablock* result = mHead;
while (result != nullptr and not iequals(result->mName, name))
result = result->mNext;
if (result == nullptr)
throw runtime_error("Datablock " + name + " does not exist");
return *result;
}
void File::validate()
{
if (mValidator == nullptr)
{
if (VERBOSE)
cerr << "No dictionary loaded explicitly, loading default" << endl;
loadDictionary();
}
for (auto d = mHead; d != nullptr; d = d->mNext)
d->validate();
}
const Validator& File::getValidator() const
{
if (mValidator == nullptr)
throw runtime_error("no Validator defined yet");
return *mValidator;
}
void File::loadDictionary()
{
loadDictionary("mmcif_ddl");
}
void File::loadDictionary(const char* dict)
{
fs::path dictFile = string("dictionaries/") + dict + ".dic";
#if defined(USE_RSRC)
mrsrc::rsrc dictData(dictFile.string());
if (not dictData)
throw invalid_argument("no such dictionary");
struct membuf : public streambuf
{
membuf(char* dict, size_t length)
{
this->setg(dict, dict, dict + length);
}
} buffer(const_cast<char*>(dictData.data()), dictData.size());
istream is(&buffer);
#else
if (not fs::exists(dictFile))
throw runtime_error("Dictionary not found (" + dictFile.string() + ")");
fs::ifstream is(dictFile);
#endif
loadDictionary(is);
}
void File::loadDictionary(istream& is)
{
unique_ptr<Validator> v(new Validator());
DictParser p(*v, is);
p.loadDictionary();
setValidator(v.release());
}
void File::setValidator(Validator* v)
{
mValidator = v;
for (auto d = mHead; d != nullptr; d = d->mNext)
d->setValidator(mValidator);
}
void File::getTagOrder(vector<string>& tags) const
{
for (auto d = mHead; d != nullptr; d = d->mNext)
d->getTagOrder(tags);
}
auto File::iterator::operator++() -> iterator&
{
mCurrent = mCurrent->mNext;
return *this;
}
auto File::begin() const -> iterator
{
return iterator(mHead);
}
auto File::end() const -> iterator
{
return iterator(nullptr);
}
}
#include "libpr.h"
#include "cif++/Config.h"
#include <map>
#include <set>
......@@ -13,18 +13,17 @@
#include <boost/iostreams/concepts.hpp> // output_filter
#include <boost/iostreams/operations.hpp> // put
#include "peptidedb.h"
#include "cif2pdb.h"
#include "libcif/atom_type.h"
//#include "libcif/compound.h"
#include "cif++/PeptideDB.h"
#include "cif++/Cif2PDB.h"
#include "cif++/AtomType.h"
using namespace std;
namespace ba = boost::algorithm;
namespace io = boost::iostreams;
using cif::datablock;
using cif::category;
using cif::row;
using cif::Datablock;
using cif::Category;
using cif::Row;
// --------------------------------------------------------------------
// FillOutLineFilter is used to make sure all lines in PDB files
......@@ -81,7 +80,7 @@ class FillOutLineFilter : public io::output_filter
// --------------------------------------------------------------------
// conversion routines between cif and pdb format
string cif2pdb_date(const string& d)
string cif2pdbDate(const string& d)
{
const regex rx(R"((\d{4})-(\d{2})(?:-(\d{2}))?)");
const char* kMonths[12] = {
......@@ -105,7 +104,7 @@ string cif2pdb_date(const string& d)
return result;
}
string cif2pdb_auth(string name)
string cif2pdbAuth(string name)
{
const regex rx(R"(([^,]+), (\S+))");
......@@ -116,7 +115,7 @@ string cif2pdb_auth(string name)
return name;
}
string cif2pdb_symmetry(string s)
string cif2pdbSymmetry(string s)
{
auto i = s.rfind('_');
if (i != string::npos)
......@@ -124,11 +123,11 @@ string cif2pdb_symmetry(string s)
return s;
}
string cif2pdb_atomName(string name, string resName, datablock& db)
string cif2pdbAtomName(string name, string resName, Datablock& db)
{
if (name.length() < 4)
{
for (auto r: db["atom_site"].find(cif::key("label_atom_id") == name and cif::key("label_comp_id") == resName))
for (auto r: db["atom_site"].find(cif::Key("label_atom_id") == name and cif::Key("label_comp_id") == resName))
{
string element = r["type_symbol"].as<string>();
......@@ -144,7 +143,7 @@ string cif2pdb_atomName(string name, string resName, datablock& db)
enum SoftwareType { eRefinement, eDataScaling, eDataExtraction, eDataReduction, ePhasing };
string cif_software(datablock& db, SoftwareType sw)
string cifSoftware(Datablock& db, SoftwareType sw)
{
string result = "NULL";
......@@ -152,9 +151,9 @@ string cif_software(datablock& db, SoftwareType sw)
{
switch (sw)
{
case eRefinement: result = db["computing"][cif::key("entry_id") == db.name()]["structure_refinement"].as<string>(); break;
case eDataScaling: result = db["computing"][cif::key("entry_id") == db.name()]["pdbx_data_reduction_ds"].as<string>(); break;
case eDataReduction: result = db["computing"][cif::key("entry_id") == db.name()]["pdbx_data_reduction_ii"].as<string>(); break;
case eRefinement: result = db["computing"][cif::Key("entry_id") == db.getName()]["structure_refinement"].as<string>(); break;
case eDataScaling: result = db["computing"][cif::Key("entry_id") == db.getName()]["pdbx_data_reduction_ds"].as<string>(); break;
case eDataReduction: result = db["computing"][cif::Key("entry_id") == db.getName()]["pdbx_data_reduction_ii"].as<string>(); break;
default: break;
}
......@@ -162,15 +161,15 @@ string cif_software(datablock& db, SoftwareType sw)
{
auto& software = db["software"];
row r;
Row r;
switch (sw)
{
case eRefinement: r = software[cif::key("classification") == "refinement"]; break;
case eDataScaling: r = software[cif::key("classification") == "data scaling"]; break;
case eDataExtraction: r = software[cif::key("classification") == "data extraction"]; break;
case eDataReduction: r = software[cif::key("classification") == "data reduction"]; break;
case ePhasing: r = software[cif::key("classification") == "phasing"]; break;
case eRefinement: r = software[cif::Key("classification") == "refinement"]; break;
case eDataScaling: r = software[cif::Key("classification") == "data scaling"]; break;
case eDataExtraction: r = software[cif::Key("classification") == "data extraction"]; break;
case eDataReduction: r = software[cif::Key("classification") == "data reduction"]; break;
case ePhasing: r = software[cif::Key("classification") == "phasing"]; break;
}
result = r["name"].as<string>() + " " + r["version"].as<string>();
......@@ -188,19 +187,19 @@ string cif_software(datablock& db, SoftwareType sw)
}
// Map asym ID's back to PDB Chain ID's
vector<string> MapAsymIDs2ChainIDs(const vector<string>& asymIDs, datablock& db)
vector<string> MapAsymIDs2ChainIDs(const vector<string>& asymIDs, Datablock& db)
{
set<string> result;
for (auto asym: asymIDs)
{
for (auto r: db["pdbx_poly_seq_scheme"].find(cif::key("asym_id") == asym))
for (auto r: db["pdbx_poly_seq_scheme"].find(cif::Key("asym_id") == asym))
{
result.insert(r["pdb_strand_id"].as<string>());
break;
}
for (auto r: db["pdbx_nonpoly_scheme"].find(cif::key("asym_id") == asym))
for (auto r: db["pdbx_nonpoly_scheme"].find(cif::Key("asym_id") == asym))
{
result.insert(r["pdb_strand_id"].as<string>());
break;
......@@ -223,7 +222,7 @@ int WriteContinuedLine(ostream& pdbFile, string header, int& count, int cLen, st
int maxLength = 80 - lStart - 1;
vector<string> lines = cif::word_wrap(text, maxLength);
vector<string> lines = cif::wordWrap(text, maxLength);
for (auto& line: lines)
{
......@@ -253,7 +252,7 @@ int WriteOneContinuedLine(ostream& pdbFile, string header, int cLen, string line
return WriteContinuedLine(pdbFile, header, count, cLen, line, lStart);
}
int WriteCitation(ostream& pdbFile, datablock& db, row r, int reference)
int WriteCitation(ostream& pdbFile, Datablock& db, Row r, int reference)
{
int result = 0;
......@@ -268,16 +267,16 @@ int WriteCitation(ostream& pdbFile, datablock& db, row r, int reference)
else
s1 = "JRNL ";
string id, title, pubname, volume, astm, country, issn, csd, publ, pmid, doi, page_first, page_last, year;
string id, title, pubname, volume, astm, country, issn, csd, publ, pmid, doi, pageFirst, pageLast, year;
cif::tie(id, title, pubname, volume, astm, country, issn, csd, publ, pmid, doi, page_first, page_last, year) =
cif::tie(id, title, pubname, volume, astm, country, issn, csd, publ, pmid, doi, pageFirst, pageLast, year) =
r.get("id", "title", "journal_abbrev", "journal_volume", "journal_id_ASTM", "country", "journal_id_ISSN",
"journal_id_CSD", "book_publisher", "pdbx_database_id_PubMed", "pdbx_database_id_DOI",
"page_first", "page_last", "year");
vector<string> authors;
for (auto r1: db["citation_author"].find(cif::key("citation_id") == id))
authors.push_back(cif2pdb_auth(r1["name"].as<string>()));
for (auto r1: db["citation_author"].find(cif::Key("citation_id") == id))
authors.push_back(cif2pdbAuth(r1["name"].as<string>()));
if (not authors.empty())
result += WriteOneContinuedLine(pdbFile, s1 + "AUTH", 2, ba::join(authors, ","), 19);
......@@ -294,7 +293,7 @@ int WriteCitation(ostream& pdbFile, datablock& db, row r, int reference)
% pubname
% (volume.empty() ? "" : "V.")
% volume
% page_first
% pageFirst
% year).str()
<< endl;
++result;
......@@ -339,7 +338,7 @@ int WriteCitation(ostream& pdbFile, datablock& db, row r, int reference)
return result;
}
void WriteTitle(ostream& pdbFile, datablock& db)
void WriteTitle(ostream& pdbFile, Datablock& db)
{
// 0 1 2 3 4 5 6 7 8
// HEADER xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxDDDDDDDDD IIII
......@@ -364,7 +363,7 @@ void WriteTitle(ostream& pdbFile, datablock& db)
date = r["recvd_initial_deposition_date"].as<string>();
if (date.empty())
continue;
date = cif2pdb_date(date);
date = cif2pdbDate(date);
break;
}
......@@ -375,12 +374,12 @@ void WriteTitle(ostream& pdbFile, datablock& db)
date = r["date_original"].as<string>();
if (date.empty())
continue;
date = cif2pdb_date(date);
date = cif2pdbDate(date);
break;
}
}
pdbFile << (boost::format(kHeader) % keywords % date % db.name()).str() << endl;
pdbFile << (boost::format(kHeader) % keywords % date % db.getName()).str() << endl;
// TODO: implement
// OBSLTE (skip for now)
......@@ -405,7 +404,7 @@ void WriteTitle(ostream& pdbFile, datablock& db)
if (r["type"] != "polymer")
continue;
string entity_id = r["id"].as<string>();
string entityId = r["id"].as<string>();
++molID;
cmpnd.push_back("MOL_ID: " + to_string(molID));
......@@ -413,7 +412,7 @@ void WriteTitle(ostream& pdbFile, datablock& db)
string molecule = r["pdbx_description"].as<string>();
cmpnd.push_back("MOLECULE: " + molecule);
auto poly = db["entity_poly"].find(cif::key("entity_id") == entity_id);
auto poly = db["entity_poly"].find(cif::Key("entity_id") == entityId);
if (not poly.empty())
{
string chains = poly.front()["pdbx_strand_id"].as<string>();
......@@ -425,7 +424,7 @@ void WriteTitle(ostream& pdbFile, datablock& db)
if (not fragment.empty())
cmpnd.push_back("FRAGMENT: " + fragment);
for (auto sr: db["entity_name_com"].find(cif::key("entity_id") == entity_id))
for (auto sr: db["entity_name_com"].find(cif::Key("entity_id") == entityId))
{
string syn = sr["name"].as<string>();
if (not syn.empty())
......@@ -460,7 +459,7 @@ void WriteTitle(ostream& pdbFile, datablock& db)
if (r["type"] != "polymer")
continue;
string entity_id = r["id"].as<string>();
string entityId = r["id"].as<string>();
++molID;
source.push_back("MOL_ID: " + to_string(molID));
......@@ -490,7 +489,7 @@ void WriteTitle(ostream& pdbFile, datablock& db)
{ "details", "OTHER_DETAILS" }
};
for (auto gr: gen.find(cif::key("entity_id") == entity_id))
for (auto gr: gen.find(cif::Key("entity_id") == entityId))
{
for (auto m: kGenSourceMapping)
{
......@@ -515,7 +514,7 @@ void WriteTitle(ostream& pdbFile, datablock& db)
{ "details", "OTHER_DETAILS" }
};
for (auto nr: nat.find(cif::key("entity_id") == entity_id))
for (auto nr: nat.find(cif::Key("entity_id") == entityId))
{
for (auto m: kNatSourceMapping)
{
......@@ -562,27 +561,27 @@ void WriteTitle(ostream& pdbFile, datablock& db)
// AUTHOR
vector<string> authors;
for (auto r: db["audit_author"])
authors.push_back(cif2pdb_auth(r["name"].as<string>()));
authors.push_back(cif2pdbAuth(r["name"].as<string>()));
if (not authors.empty())
WriteOneContinuedLine(pdbFile, "AUTHOR ", 2, ba::join(authors, ","));
// REVDAT
boost::format kRevDat("REVDAT %3.3d%2.2s %9.9s %4.4s %1.1d ");
auto& cat2 = db["database_PDB_rev"];
vector<row> rev(cat2.begin(), cat2.end());
sort(rev.begin(), rev.end(), [](row a, row b) -> bool { return a["num"].as<int>() > b["num"].as<int>(); });
vector<Row> rev(cat2.begin(), cat2.end());
sort(rev.begin(), rev.end(), [](Row a, Row b) -> bool { return a["num"].as<int>() > b["num"].as<int>(); });
for (auto r: rev)
{
int rev_num, mod_type;
int revNum, modType;
string date, replaces;
cif::tie(rev_num, mod_type, date, replaces) = r.get("num", "mod_type", "date", "replaces");
cif::tie(revNum, modType, date, replaces) = r.get("num", "mod_type", "date", "replaces");
date = cif2pdb_date(date);
date = cif2pdbDate(date);
vector<string> types;
for (auto r1: db["database_PDB_rev_record"].find(cif::key("rev_num") == rev_num))
for (auto r1: db["database_PDB_rev_record"].find(cif::Key("rev_num") == revNum))
types.push_back(r1["type"].as<string>());
int continuation = 0;
......@@ -590,7 +589,7 @@ void WriteTitle(ostream& pdbFile, datablock& db)
{
string cs = ++continuation > 1 ? to_string(continuation) : string();
pdbFile << (kRevDat % rev_num % cs % date % db.name() % mod_type).str();
pdbFile << (kRevDat % revNum % cs % date % db.getName() % modType).str();
for (size_t i = 0; i < 4; ++i)
pdbFile << (boost::format(" %-6.6s") % (i < types.size() ? types[i] : string())).str();
pdbFile << endl;
......@@ -611,7 +610,7 @@ void WriteTitle(ostream& pdbFile, datablock& db)
}
}
void WriteRemark1(ostream& pdbFile, datablock& db)
void WriteRemark1(ostream& pdbFile, Datablock& db)
{
int reference = 0;
......@@ -629,7 +628,7 @@ void WriteRemark1(ostream& pdbFile, datablock& db)
}
}
void WriteRemark2(ostream& pdbFile, datablock& db)
void WriteRemark2(ostream& pdbFile, Datablock& db)
{
auto& refine = db["refine"];
if (refine.empty())
......@@ -663,9 +662,9 @@ class FBase
virtual void out(ostream& os) = 0;
protected:
FBase(row r, const char* f)
FBase(Row r, const char* f)
: mRow(r), mField(f) {}
FBase(category& cat, cif::condition&& cond, const char* f)
FBase(Category& cat, cif::Condition&& cond, const char* f)
: mField(f)
{
auto r = cat.find(move(cond));
......@@ -673,15 +672,15 @@ class FBase
mRow = r.front();
}
row mRow;
Row mRow;
const char* mField;
};
class Fi : public FBase
{
public:
Fi(row r, const char* f) : FBase(r, f) {}
Fi(category& cat, cif::condition&& cond, const char* f) : FBase(cat, move(cond), f) {}
Fi(Row r, const char* f) : FBase(r, f) {}
Fi(Category& cat, cif::Condition&& cond, const char* f) : FBase(cat, move(cond), f) {}
virtual void out(ostream& os)
{
......@@ -700,8 +699,8 @@ class Fi : public FBase
class Ff : public FBase
{
public:
Ff(row r, const char* f) : FBase(r, f) {}
Ff(category& cat, cif::condition&& cond, const char* f) : FBase(cat, move(cond), f) {}
Ff(Row r, const char* f) : FBase(r, f) {}
Ff(Category& cat, cif::Condition&& cond, const char* f) : FBase(cat, move(cond), f) {}
virtual void out(ostream& os)
{
......@@ -720,8 +719,8 @@ class Ff : public FBase
class Fs : public FBase
{
public:
Fs(row r, const char* f, int remarkNr = 3) : FBase(r, f), mNr(remarkNr) {}
Fs(category& cat, cif::condition&& cond, const char* f, int remarkNr = 3) : FBase(cat, move(cond), f), mNr(remarkNr) {}
Fs(Row r, const char* f, int remarkNr = 3) : FBase(r, f), mNr(remarkNr) {}
Fs(Category& cat, cif::Condition&& cond, const char* f, int remarkNr = 3) : FBase(cat, move(cond), f), mNr(remarkNr) {}
virtual void out(ostream& os)
{
......@@ -787,7 +786,7 @@ ostream& operator<<(ostream& os, SEP&& sep)
// --------------------------------------------------------------------
void WriteRemark3BusterTNT(ostream& pdbFile, datablock& db)
void WriteRemark3BusterTNT(ostream& pdbFile, Datablock& db)
{
auto refine = db["refine"].front();
auto ls_shell = db["refine_ls_shell"].front();
......@@ -874,59 +873,59 @@ void WriteRemark3BusterTNT(ostream& pdbFile, datablock& db)
<< RM3("") << endl
<< RM3(" NUMBER OF GEOMETRIC FUNCTION TERMS DEFINED : 15") << endl
<< RM3(" TERM COUNT WEIGHT FUNCTION.") << endl
<< RM3(" BOND LENGTHS : ", 7, 0) << Ff(ls_restr, cif::key("type") == "t_bond_d", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::key("type") == "t_bond_d", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::key("type") == "t_bond_d", "pdbx_restraint_function") << endl
<< RM3(" BOND ANGLES : ", 7, 0) << Ff(ls_restr, cif::key("type") == "t_angle_deg", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::key("type") == "t_angle_deg", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::key("type") == "t_angle_deg", "pdbx_restraint_function") << endl
<< RM3(" TORSION ANGLES : ", 7, 0) << Ff(ls_restr, cif::key("type") == "t_dihedral_angle_d", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::key("type") == "t_dihedral_angle_d", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::key("type") == "t_dihedral_angle_d", "pdbx_restraint_function") << endl
<< RM3(" TRIGONAL CARBON PLANES : ", 7, 0) << Ff(ls_restr, cif::key("type") == "t_trig_c_planes", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::key("type") == "t_trig_c_planes", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::key("type") == "t_trig_c_planes", "pdbx_restraint_function") << endl
<< RM3(" GENERAL PLANES : ", 7, 0) << Ff(ls_restr, cif::key("type") == "t_gen_planes", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::key("type") == "t_gen_planes", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::key("type") == "t_gen_planes", "pdbx_restraint_function") << endl
<< RM3(" ISOTROPIC THERMAL FACTORS : ", 7, 0) << Ff(ls_restr, cif::key("type") == "t_it", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::key("type") == "t_it", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::key("type") == "t_it", "pdbx_restraint_function") << endl
<< RM3(" BAD NON-BONDED CONTACTS : ", 7, 0) << Ff(ls_restr, cif::key("type") == "t_nbd", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::key("type") == "t_nbd", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::key("type") == "t_nbd", "pdbx_restraint_function") << endl
<< RM3(" IMPROPER TORSIONS : ", 7, 0) << Ff(ls_restr, cif::key("type") == "t_improper_torsion", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::key("type") == "t_improper_torsion", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::key("type") == "t_improper_torsion", "pdbx_restraint_function") << endl
<< RM3(" PSEUDOROTATION ANGLES : ", 7, 0) << Ff(ls_restr, cif::key("type") == "t_pseud_angle", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::key("type") == "t_pseud_angle", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::key("type") == "t_pseud_angle", "pdbx_restraint_function") << endl
<< RM3(" CHIRAL IMPROPER TORSION : ", 7, 0) << Ff(ls_restr, cif::key("type") == "t_chiral_improper_torsion", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::key("type") == "t_chiral_improper_torsion", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::key("type") == "t_chiral_improper_torsion", "pdbx_restraint_function") << endl
<< RM3(" SUM OF OCCUPANCIES : ", 7, 0) << Ff(ls_restr, cif::key("type") == "t_sum_occupancies", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::key("type") == "t_sum_occupancies", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::key("type") == "t_sum_occupancies", "pdbx_restraint_function") << endl
<< RM3(" UTILITY DISTANCES : ", 7, 0) << Ff(ls_restr, cif::key("type") == "t_utility_distance", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::key("type") == "t_utility_distance", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::key("type") == "t_utility_distance", "pdbx_restraint_function") << endl
<< RM3(" UTILITY ANGLES : ", 7, 0) << Ff(ls_restr, cif::key("type") == "t_utility_angle", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::key("type") == "t_utility_angle", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::key("type") == "t_utility_angle", "pdbx_restraint_function") << endl
<< RM3(" UTILITY TORSION : ", 7, 0) << Ff(ls_restr, cif::key("type") == "t_utility_torsion", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::key("type") == "t_utility_torsion", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::key("type") == "t_utility_torsion", "pdbx_restraint_function") << endl
<< RM3(" IDEAL-DIST CONTACT TERM : ", 7, 0) << Ff(ls_restr, cif::key("type") == "t_ideal_dist_contact", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::key("type") == "t_ideal_dist_contact", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::key("type") == "t_ideal_dist_contact", "pdbx_restraint_function") << endl
<< RM3(" BOND LENGTHS : ", 7, 0) << Ff(ls_restr, cif::Key("type") == "t_bond_d", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::Key("type") == "t_bond_d", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::Key("type") == "t_bond_d", "pdbx_restraint_function") << endl
<< RM3(" BOND ANGLES : ", 7, 0) << Ff(ls_restr, cif::Key("type") == "t_angle_deg", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::Key("type") == "t_angle_deg", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::Key("type") == "t_angle_deg", "pdbx_restraint_function") << endl
<< RM3(" TORSION ANGLES : ", 7, 0) << Ff(ls_restr, cif::Key("type") == "t_dihedral_angle_d", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::Key("type") == "t_dihedral_angle_d", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::Key("type") == "t_dihedral_angle_d", "pdbx_restraint_function") << endl
<< RM3(" TRIGONAL CARBON PLANES : ", 7, 0) << Ff(ls_restr, cif::Key("type") == "t_trig_c_planes", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::Key("type") == "t_trig_c_planes", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::Key("type") == "t_trig_c_planes", "pdbx_restraint_function") << endl
<< RM3(" GENERAL PLANES : ", 7, 0) << Ff(ls_restr, cif::Key("type") == "t_gen_planes", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::Key("type") == "t_gen_planes", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::Key("type") == "t_gen_planes", "pdbx_restraint_function") << endl
<< RM3(" ISOTROPIC THERMAL FACTORS : ", 7, 0) << Ff(ls_restr, cif::Key("type") == "t_it", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::Key("type") == "t_it", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::Key("type") == "t_it", "pdbx_restraint_function") << endl
<< RM3(" BAD NON-BONDED CONTACTS : ", 7, 0) << Ff(ls_restr, cif::Key("type") == "t_nbd", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::Key("type") == "t_nbd", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::Key("type") == "t_nbd", "pdbx_restraint_function") << endl
<< RM3(" IMPROPER TORSIONS : ", 7, 0) << Ff(ls_restr, cif::Key("type") == "t_improper_torsion", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::Key("type") == "t_improper_torsion", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::Key("type") == "t_improper_torsion", "pdbx_restraint_function") << endl
<< RM3(" PSEUDOROTATION ANGLES : ", 7, 0) << Ff(ls_restr, cif::Key("type") == "t_pseud_angle", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::Key("type") == "t_pseud_angle", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::Key("type") == "t_pseud_angle", "pdbx_restraint_function") << endl
<< RM3(" CHIRAL IMPROPER TORSION : ", 7, 0) << Ff(ls_restr, cif::Key("type") == "t_chiral_improper_torsion", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::Key("type") == "t_chiral_improper_torsion", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::Key("type") == "t_chiral_improper_torsion", "pdbx_restraint_function") << endl
<< RM3(" SUM OF OCCUPANCIES : ", 7, 0) << Ff(ls_restr, cif::Key("type") == "t_sum_occupancies", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::Key("type") == "t_sum_occupancies", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::Key("type") == "t_sum_occupancies", "pdbx_restraint_function") << endl
<< RM3(" UTILITY DISTANCES : ", 7, 0) << Ff(ls_restr, cif::Key("type") == "t_utility_distance", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::Key("type") == "t_utility_distance", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::Key("type") == "t_utility_distance", "pdbx_restraint_function") << endl
<< RM3(" UTILITY ANGLES : ", 7, 0) << Ff(ls_restr, cif::Key("type") == "t_utility_angle", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::Key("type") == "t_utility_angle", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::Key("type") == "t_utility_angle", "pdbx_restraint_function") << endl
<< RM3(" UTILITY TORSION : ", 7, 0) << Ff(ls_restr, cif::Key("type") == "t_utility_torsion", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::Key("type") == "t_utility_torsion", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::Key("type") == "t_utility_torsion", "pdbx_restraint_function") << endl
<< RM3(" IDEAL-DIST CONTACT TERM : ", 7, 0) << Ff(ls_restr, cif::Key("type") == "t_ideal_dist_contact", "number")
<< SEP("; ", 7, 3) << Ff(ls_restr, cif::Key("type") == "t_ideal_dist_contact", "weight")
<< SEP("; ", 12) << Fs(ls_restr, cif::Key("type") == "t_ideal_dist_contact", "pdbx_restraint_function") << endl
<< RM3("") << endl
<< RM3(" RMS DEVIATIONS FROM IDEAL VALUES.") << endl
<< RM3(" BOND LENGTHS (A) : ", 7, 3) << Ff(ls_restr, cif::key("type") == "t_bond_d", "dev_ideal") << endl
<< RM3(" BOND ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, cif::key("type") == "t_angle_deg", "dev_ideal") << endl
<< RM3(" PEPTIDE OMEGA TORSION ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, cif::key("type") == "t_omega_torsion", "dev_ideal") << endl
<< RM3(" OTHER TORSION ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, cif::key("type") == "t_other_torsion", "dev_ideal") << endl;
<< RM3(" BOND LENGTHS (A) : ", 7, 3) << Ff(ls_restr, cif::Key("type") == "t_bond_d", "dev_ideal") << endl
<< RM3(" BOND ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, cif::Key("type") == "t_angle_deg", "dev_ideal") << endl
<< RM3(" PEPTIDE OMEGA TORSION ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, cif::Key("type") == "t_omega_torsion", "dev_ideal") << endl
<< RM3(" OTHER TORSION ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, cif::Key("type") == "t_other_torsion", "dev_ideal") << endl;
auto& tls = db["pdbx_refine_tls"];
......@@ -937,7 +936,7 @@ void WriteRemark3BusterTNT(ostream& pdbFile, datablock& db)
for (auto t: tls)
{
string id = t["id"].as<string>();
auto g = db["pdbx_refine_tls_group"][cif::key("refine_tls_id") == id];
auto g = db["pdbx_refine_tls_group"][cif::Key("refine_tls_id") == id];
pdbFile << RM3("") << endl
<< RM3(" TLS GROUP : ") << id << endl
......@@ -965,7 +964,7 @@ void WriteRemark3BusterTNT(ostream& pdbFile, datablock& db)
// --------------------------------------------------------------------
void WriteRemark3CNS(ostream& pdbFile, datablock& db)
void WriteRemark3CNS(ostream& pdbFile, Datablock& db)
{
auto refine = db["refine"].front();
auto ls_shell = db["refine_ls_shell"].front();
......@@ -1056,24 +1055,24 @@ void WriteRemark3CNS(ostream& pdbFile, datablock& db)
<< RM3("") << endl
<< RM3(" RMS DEVIATIONS FROM IDEAL VALUES.") << endl
<< RM3(" BOND LENGTHS (A) : ", 7, 3) << Ff(ls_restr, cif::key("type") == "c_bond_d", "dev_ideal") << endl
<< RM3(" BOND ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, cif::key("type") == "c_angle_deg", "dev_ideal") << endl
<< RM3(" DIHEDRAL ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, cif::key("type") == "c_dihedral_angle_d", "dev_ideal") << endl
<< RM3(" IMPROPER ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, cif::key("type") == "c_improper_angle_d", "dev_ideal") << endl
<< RM3(" BOND LENGTHS (A) : ", 7, 3) << Ff(ls_restr, cif::Key("type") == "c_bond_d", "dev_ideal") << endl
<< RM3(" BOND ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, cif::Key("type") == "c_angle_deg", "dev_ideal") << endl
<< RM3(" DIHEDRAL ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, cif::Key("type") == "c_dihedral_angle_d", "dev_ideal") << endl
<< RM3(" IMPROPER ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, cif::Key("type") == "c_improper_angle_d", "dev_ideal") << endl
<< RM3("") << endl
<< RM3(" ISOTROPIC THERMAL MODEL : ") << Fs(refine, "pdbx_isotropic_thermal_model") << endl
<< RM3("") << endl
<< RM3(" ISOTROPIC THERMAL FACTOR RESTRAINTS. RMS SIGMA") << endl
<< RM3(" MAIN-CHAIN BOND (A**2) : ", 7, 3) << Ff(ls_restr, cif::key("type") == "c_mcbond_it", "dev_ideal") << SEP("; ", 7, 3)
<< Ff(ls_restr, cif::key("type") == "c_mcbond_it", "dev_ideal_target") << endl
<< RM3(" MAIN-CHAIN ANGLE (A**2) : ", 7, 3) << Ff(ls_restr, cif::key("type") == "c_mcangle_it", "dev_ideal") << SEP("; ", 7, 3)
<< Ff(ls_restr, cif::key("type") == "c_mcangle_it", "dev_ideal_target") << endl
<< RM3(" SIDE-CHAIN BOND (A**2) : ", 7, 3) << Ff(ls_restr, cif::key("type") == "c_scbond_it", "dev_ideal") << SEP("; ", 7, 3)
<< Ff(ls_restr, cif::key("type") == "c_scbond_it", "dev_ideal_target") << endl
<< RM3(" SIDE-CHAIN ANGLE (A**2) : ", 7, 3) << Ff(ls_restr, cif::key("type") == "c_scangle_it", "dev_ideal") << SEP("; ", 7, 3)
<< Ff(ls_restr, cif::key("type") == "c_scangle_it", "dev_ideal_target") << endl
<< RM3(" MAIN-CHAIN BOND (A**2) : ", 7, 3) << Ff(ls_restr, cif::Key("type") == "c_mcbond_it", "dev_ideal") << SEP("; ", 7, 3)
<< Ff(ls_restr, cif::Key("type") == "c_mcbond_it", "dev_ideal_target") << endl
<< RM3(" MAIN-CHAIN ANGLE (A**2) : ", 7, 3) << Ff(ls_restr, cif::Key("type") == "c_mcangle_it", "dev_ideal") << SEP("; ", 7, 3)
<< Ff(ls_restr, cif::Key("type") == "c_mcangle_it", "dev_ideal_target") << endl
<< RM3(" SIDE-CHAIN BOND (A**2) : ", 7, 3) << Ff(ls_restr, cif::Key("type") == "c_scbond_it", "dev_ideal") << SEP("; ", 7, 3)
<< Ff(ls_restr, cif::Key("type") == "c_scbond_it", "dev_ideal_target") << endl
<< RM3(" SIDE-CHAIN ANGLE (A**2) : ", 7, 3) << Ff(ls_restr, cif::Key("type") == "c_scangle_it", "dev_ideal") << SEP("; ", 7, 3)
<< Ff(ls_restr, cif::Key("type") == "c_scangle_it", "dev_ideal_target") << endl
<< RM3("") << endl
<< RM3(" BULK SOLVENT MODELING.") << endl
......@@ -1105,7 +1104,7 @@ void WriteRemark3CNS(ostream& pdbFile, datablock& db)
// --------------------------------------------------------------------
void WriteRemark3Refmac(ostream& pdbFile, datablock& db)
void WriteRemark3Refmac(ostream& pdbFile, Datablock& db)
{
auto refine = db["refine"].front();
auto ls_shell = db["refine_ls_shell"].front();
......@@ -1115,7 +1114,7 @@ void WriteRemark3Refmac(ostream& pdbFile, datablock& db)
auto& ls_restr = db["refine_ls_restr"];
// auto pdbx_xplor_file = db["pdbx_xplor_file"].front();
auto c = [](const char* t) -> cif::condition { return cif::key("type") == t; };
auto c = [](const char* t) -> cif::Condition { return cif::Key("type") == t; };
pdbFile << RM3("") << endl
<< RM3("REFINEMENT TARGET : ") << Fs(refine, "pdbx_stereochemistry_target_values") << endl
......@@ -1321,7 +1320,7 @@ void WriteRemark3Refmac(ostream& pdbFile, datablock& db)
for (auto ens_id: ncs_groups)
{
auto lim = db["struct_ncs_dom_lim"].find(cif::key("pdbx_ens_id") == ens_id);
auto lim = db["struct_ncs_dom_lim"].find(cif::Key("pdbx_ens_id") == ens_id);
set<string> chains;
set<int> component_ids;
......@@ -1350,7 +1349,7 @@ void WriteRemark3Refmac(ostream& pdbFile, datablock& db)
}
pdbFile << RM3(" GROUP CHAIN COUNT RMS WEIGHT") << endl;
for (auto l: db["refine_ls_restr_ncs"].find(cif::key("pdbx_ens_id") == ens_id))
for (auto l: db["refine_ls_restr_ncs"].find(cif::Key("pdbx_ens_id") == ens_id))
{
string type = l["pdbx_type"].as<string>();
ba::to_upper(type);
......@@ -1388,7 +1387,7 @@ void WriteRemark3Refmac(ostream& pdbFile, datablock& db)
for (auto t: tls)
{
string id = t["id"].as<string>();
auto g = db["pdbx_refine_tls_group"].find(cif::key("refine_tls_id") == id);
auto g = db["pdbx_refine_tls_group"].find(cif::Key("refine_tls_id") == id);
pdbFile << RM3("") << endl
<< RM3(" TLS GROUP : ") << id << endl
......@@ -1432,7 +1431,7 @@ void WriteRemark3Refmac(ostream& pdbFile, datablock& db)
<< RM3("") << endl;
}
void WriteRemark3Shelxl(ostream& pdbFile, datablock& db)
void WriteRemark3Shelxl(ostream& pdbFile, Datablock& db)
{
auto refine = db["refine"].front();
// auto ls_shell = db["refine_ls_shell"].front();
......@@ -1443,7 +1442,7 @@ void WriteRemark3Shelxl(ostream& pdbFile, datablock& db)
// auto pdbx_xplor_file = db["pdbx_xplor_file"].front();
auto pdbx_refine = db["pdbx_refine"].front();
auto c = [](const char* t) -> cif::condition { return cif::key("type") == t; };
auto c = [](const char* t) -> cif::Condition { return cif::Key("type") == t; };
pdbFile << RM3(" AUTHORS : G.M.SHELDRICK") << endl
<< RM3("") << endl
......@@ -1512,7 +1511,7 @@ void WriteRemark3Shelxl(ostream& pdbFile, datablock& db)
<< RM3("") << endl;
}
void WriteRemark3Phenix(ostream& pdbFile, datablock& db)
void WriteRemark3Phenix(ostream& pdbFile, Datablock& db)
{
auto refine = db["refine"].front();
// auto ls_shell = db["refine_ls_shell"].front();
......@@ -1523,7 +1522,7 @@ void WriteRemark3Phenix(ostream& pdbFile, datablock& db)
// auto pdbx_xplor_file = db["pdbx_xplor_file"].front();
auto pdbx_reflns_twin = db["pdbx_reflns_twin"].front();
auto c = [](const char* t) -> cif::condition { return cif::key("type") == t; };
auto c = [](const char* t) -> cif::Condition { return cif::Key("type") == t; };
pdbFile << RM3("") << endl
<< RM3(" REFINEMENT TARGET : ") << Fs(refine, "pdbx_stereochemistry_target_values") << endl
......@@ -1547,13 +1546,13 @@ void WriteRemark3Phenix(ostream& pdbFile, datablock& db)
<< RM3(" BIN RESOLUTION RANGE COMPL. NWORK NFREE RWORK RFREE") << endl;
int bin = 1;
vector<row> bins;
vector<Row> bins;
for (auto r: db["refine_ls_shell"])
bins.push_back(r);
// reverse(bins.begin(), bins.end());
try
{
sort(bins.begin(), bins.end(), [](row a, row b) -> bool { return a["d_res_high"].as<float>() > b["d_res_high"].as<float>(); });
sort(bins.begin(), bins.end(), [](Row a, Row b) -> bool { return a["d_res_high"].as<float>() > b["d_res_high"].as<float>(); });
}
catch (...) {}
......@@ -1635,7 +1634,7 @@ void WriteRemark3Phenix(ostream& pdbFile, datablock& db)
{
string id = t["id"].as<string>();
auto pdbx_refine_tls_group = db["pdbx_refine_tls_group"][cif::key("refine_tls_id") == id];
auto pdbx_refine_tls_group = db["pdbx_refine_tls_group"][cif::Key("refine_tls_id") == id];
pdbFile << RM3(" TLS GROUP : ") << id << endl
<< RM3(" SELECTION: ") << Fs(pdbx_refine_tls_group, "selection_details") << endl
......@@ -1673,7 +1672,7 @@ void WriteRemark3Phenix(ostream& pdbFile, datablock& db)
//
// for (auto ens_id: ncs_groups)
// {
// auto lim = db["struct_ncs_dom_lim"].find(cif::key("pdbx_ens_id") == ens_id);
// auto lim = db["struct_ncs_dom_lim"].find(cif::Key("pdbx_ens_id") == ens_id);
//
// set<string> chains;
// set<int> component_ids;
......@@ -1702,7 +1701,7 @@ void WriteRemark3Phenix(ostream& pdbFile, datablock& db)
// }
//
// pdbFile << RM3(" GROUP CHAIN COUNT RMS WEIGHT") << endl;
// for (auto l: db["refine_ls_restr_ncs"].find(cif::key("pdbx_ens_id") == ens_id))
// for (auto l: db["refine_ls_restr_ncs"].find(cif::Key("pdbx_ens_id") == ens_id))
// {
// string type = l["pdbx_type"];
// ba::to_upper(type);
......@@ -1739,7 +1738,7 @@ void WriteRemark3Phenix(ostream& pdbFile, datablock& db)
pdbFile << RM3("") << endl;
}
void WriteRemark3XPlor(ostream& pdbFile, datablock& db)
void WriteRemark3XPlor(ostream& pdbFile, Datablock& db)
{
auto refine = db["refine"].front();
auto ls_shell = db["refine_ls_shell"].front();
......@@ -1816,24 +1815,24 @@ void WriteRemark3XPlor(ostream& pdbFile, datablock& db)
<< RM3("") << endl
<< RM3(" RMS DEVIATIONS FROM IDEAL VALUES.") << endl
<< RM3(" BOND LENGTHS (A) : ", 7, 3) << Ff(ls_restr, cif::key("type") == "x_bond_d", "dev_ideal") << endl
<< RM3(" BOND ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, cif::key("type") == "x_angle_deg", "dev_ideal") << endl
<< RM3(" DIHEDRAL ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, cif::key("type") == "x_dihedral_angle_d", "dev_ideal") << endl
<< RM3(" IMPROPER ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, cif::key("type") == "x_improper_angle_d", "dev_ideal") << endl
<< RM3(" BOND LENGTHS (A) : ", 7, 3) << Ff(ls_restr, cif::Key("type") == "x_bond_d", "dev_ideal") << endl
<< RM3(" BOND ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, cif::Key("type") == "x_angle_deg", "dev_ideal") << endl
<< RM3(" DIHEDRAL ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, cif::Key("type") == "x_dihedral_angle_d", "dev_ideal") << endl
<< RM3(" IMPROPER ANGLES (DEGREES) : ", 7, 2) << Ff(ls_restr, cif::Key("type") == "x_improper_angle_d", "dev_ideal") << endl
<< RM3("") << endl
<< RM3(" ISOTROPIC THERMAL MODEL : ") << Fs(refine, "pdbx_isotropic_thermal_model") << endl
<< RM3("") << endl
<< RM3(" ISOTROPIC THERMAL FACTOR RESTRAINTS. RMS SIGMA") << endl
<< RM3(" MAIN-CHAIN BOND (A**2) : ", 6, 2) << Ff(ls_restr, cif::key("type") == "x_mcbond_it", "dev_ideal") << SEP("; ", 6, 2)
<< Ff(ls_restr, cif::key("type") == "x_mcbond_it", "dev_ideal_target") << endl
<< RM3(" MAIN-CHAIN ANGLE (A**2) : ", 6, 2) << Ff(ls_restr, cif::key("type") == "x_mcangle_it", "dev_ideal") << SEP("; ", 6, 2)
<< Ff(ls_restr, cif::key("type") == "x_mcangle_it", "dev_ideal_target") << endl
<< RM3(" SIDE-CHAIN BOND (A**2) : ", 6, 2) << Ff(ls_restr, cif::key("type") == "x_scbond_it", "dev_ideal") << SEP("; ", 6, 2)
<< Ff(ls_restr, cif::key("type") == "x_scbond_it", "dev_ideal_target") << endl
<< RM3(" SIDE-CHAIN ANGLE (A**2) : ", 6, 2) << Ff(ls_restr, cif::key("type") == "x_scangle_it", "dev_ideal") << SEP("; ", 6, 2)
<< Ff(ls_restr, cif::key("type") == "x_scangle_it", "dev_ideal_target") << endl
<< RM3(" MAIN-CHAIN BOND (A**2) : ", 6, 2) << Ff(ls_restr, cif::Key("type") == "x_mcbond_it", "dev_ideal") << SEP("; ", 6, 2)
<< Ff(ls_restr, cif::Key("type") == "x_mcbond_it", "dev_ideal_target") << endl
<< RM3(" MAIN-CHAIN ANGLE (A**2) : ", 6, 2) << Ff(ls_restr, cif::Key("type") == "x_mcangle_it", "dev_ideal") << SEP("; ", 6, 2)
<< Ff(ls_restr, cif::Key("type") == "x_mcangle_it", "dev_ideal_target") << endl
<< RM3(" SIDE-CHAIN BOND (A**2) : ", 6, 2) << Ff(ls_restr, cif::Key("type") == "x_scbond_it", "dev_ideal") << SEP("; ", 6, 2)
<< Ff(ls_restr, cif::Key("type") == "x_scbond_it", "dev_ideal_target") << endl
<< RM3(" SIDE-CHAIN ANGLE (A**2) : ", 6, 2) << Ff(ls_restr, cif::Key("type") == "x_scangle_it", "dev_ideal") << SEP("; ", 6, 2)
<< Ff(ls_restr, cif::Key("type") == "x_scangle_it", "dev_ideal_target") << endl
<< RM3("") << endl
<< RM3(" NCS MODEL : ") << Fs(ls_restr_ncs, "ncs_model_details") << endl
......@@ -1854,13 +1853,13 @@ void WriteRemark3XPlor(ostream& pdbFile, datablock& db)
<< RM3("") << endl;
}
void WriteRemark3(ostream& pdbFile, datablock& db)
void WriteRemark3(ostream& pdbFile, Datablock& db)
{
string program, authors;
if (not db["pdbx_nmr_software"].empty())
{
auto software = db["pdbx_nmr_software"].find(cif::key("classification") == "refinement");
auto software = db["pdbx_nmr_software"].find(cif::Key("classification") == "refinement");
if (software.size() == 1)
cif::tie(program, authors) = software.front().get("name", "authors");
else if (software.size() > 1)
......@@ -1880,7 +1879,7 @@ void WriteRemark3(ostream& pdbFile, datablock& db)
}
if (program.empty())
program = cif_software(db, eRefinement);
program = cifSoftware(db, eRefinement);
if (authors.empty())
authors = "NULL";
......@@ -1891,11 +1890,11 @@ void WriteRemark3(ostream& pdbFile, datablock& db)
<< RM3("REFINEMENT.") << endl;
int l = 0;
for (auto s: cif::word_wrap(program, 52))
for (auto s: cif::wordWrap(program, 52))
pdbFile << RM3(++l == 1 ? " PROGRAM : " : " ") << s << endl;
l = 0;
for (auto s: cif::word_wrap(authors, 52))
for (auto s: cif::wordWrap(authors, 52))
pdbFile << RM3(++l == 1 ? " AUTHORS : " : " ") << s << endl;
}
......@@ -1930,7 +1929,7 @@ void WriteRemark3(ostream& pdbFile, datablock& db)
}
}
void WriteRemark200(ostream& pdbFile, datablock& db)
void WriteRemark200(ostream& pdbFile, Datablock& db)
{
typedef RM<200> RM;
......@@ -1941,29 +1940,29 @@ void WriteRemark200(ostream& pdbFile, datablock& db)
string diffrn_id = diffrn["id"].as<string>();
string crystal_id = diffrn["crystal_id"].as<string>();
auto diffrn_radiation = db["diffrn_radiation"][cif::key("diffrn_id") == diffrn_id];
auto diffrn_radiation_wavelength = db["diffrn_radiation_wavelength"][cif::key("id") == diffrn_radiation["wavelength_id"].as<string>()];
auto diffrn_source = db["diffrn_source"][cif::key("diffrn_id") == diffrn_id];
auto diffrn_detector = db["diffrn_detector"][cif::key("diffrn_id") == diffrn_id];
auto exptl = db["exptl"][cif::key("entry_id") == db.name()];
auto exptl_crystal = db["exptl_crystal"][cif::key("id") == crystal_id];
auto exptl_crystal_grow = db["exptl_crystal_grow"][cif::key("crystal_id") == crystal_id];
auto computing = db["computing"][cif::key("entry_id") == db.name()];
auto reflns = db["reflns"][cif::key("entry_id") == db.name()];
auto diffrn_radiation = db["diffrn_radiation"][cif::Key("diffrn_id") == diffrn_id];
auto diffrn_radiation_wavelength = db["diffrn_radiation_wavelength"][cif::Key("id") == diffrn_radiation["wavelength_id"].as<string>()];
auto diffrn_source = db["diffrn_source"][cif::Key("diffrn_id") == diffrn_id];
auto diffrn_detector = db["diffrn_detector"][cif::Key("diffrn_id") == diffrn_id];
auto exptl = db["exptl"][cif::Key("entry_id") == db.getName()];
auto exptl_crystal = db["exptl_crystal"][cif::Key("id") == crystal_id];
auto exptl_crystal_grow = db["exptl_crystal_grow"][cif::Key("crystal_id") == crystal_id];
auto computing = db["computing"][cif::Key("entry_id") == db.getName()];
auto reflns = db["reflns"][cif::Key("entry_id") == db.getName()];
string pdbx_diffrn_id = reflns["pdbx_diffrn_id"].as<string>();
auto reflns_shell = db["reflns_shell"][cif::key("pdbx_diffrn_id") == pdbx_diffrn_id];
auto refine = db["refine"][cif::key("pdbx_diffrn_id") == pdbx_diffrn_id];
auto reflns_shell = db["reflns_shell"][cif::Key("pdbx_diffrn_id") == pdbx_diffrn_id];
auto refine = db["refine"][cif::Key("pdbx_diffrn_id") == pdbx_diffrn_id];
string date = diffrn_detector["pdbx_collection_date"].as<string>();
if (date.empty())
date = "NULL";
else
date = cif2pdb_date(date);
date = cif2pdbDate(date);
string iis = cif_software(db, eDataReduction);
string dss = cif_software(db, eDataScaling);
string iis = cifSoftware(db, eDataReduction);
string dss = cifSoftware(db, eDataScaling);
string source = diffrn_source["source"].as<string>();
string synchrotron, type;
......@@ -2031,7 +2030,7 @@ void WriteRemark200(ostream& pdbFile, datablock& db)
<< RM(" <I/SIGMA(I)> FOR SHELL : ", 7, 3) << Ff(reflns_shell, "meanI_over_sigI_obs") << endl
<< RM("") << endl;
struct { row r; const char* field; const char* dst; }
struct { Row r; const char* field; const char* dst; }
kTail[] = {
{ diffrn_radiation, "pdbx_diffrn_protocol", "DIFFRACTION PROTOCOL: "},
{ refine, "pdbx_method_to_determine_struct", "METHOD USED TO DETERMINE THE STRUCTURE: "},
......@@ -2047,7 +2046,7 @@ void WriteRemark200(ostream& pdbFile, datablock& db)
if (s.empty())
{
if (strcmp(t.field, "structure_solution") == 0)
s = cif_software(db, ePhasing);
s = cifSoftware(db, ePhasing);
else
s = "NULL";
}
......@@ -2064,7 +2063,7 @@ void WriteRemark200(ostream& pdbFile, datablock& db)
}
}
void WriteRemark280(ostream& pdbFile, datablock& db)
void WriteRemark280(ostream& pdbFile, Datablock& db)
{
typedef RM<280> RM;
......@@ -2073,7 +2072,7 @@ void WriteRemark280(ostream& pdbFile, datablock& db)
for (auto exptl_crystal: db["exptl_crystal"])
{
string crystal_id = exptl_crystal["id"].as<string>();
auto exptl_crystal_grow = db["exptl_crystal_grow"][cif::key("crystal_id") == crystal_id];
auto exptl_crystal_grow = db["exptl_crystal_grow"][cif::Key("crystal_id") == crystal_id];
pdbFile
<< RM("") << endl
......@@ -2125,7 +2124,7 @@ void WriteRemark280(ostream& pdbFile, datablock& db)
}
}
void WriteRemark350(ostream& pdbFile, datablock& db)
void WriteRemark350(ostream& pdbFile, Datablock& db)
{
auto& c1 = db["pdbx_struct_assembly"];
if (c1.empty())
......@@ -2137,7 +2136,7 @@ void WriteRemark350(ostream& pdbFile, datablock& db)
string id = bm["id"].as<string>();
biomolecules.push_back(id);
for (auto r: db["struct_biol"].find(cif::key("id") == id))
for (auto r: db["struct_biol"].find(cif::Key("id") == id))
{
string s = r["details"].as<string>();
if (not s.empty())
......@@ -2192,7 +2191,7 @@ void WriteRemark350(ostream& pdbFile, datablock& db)
for (string type: { "ABSA (A^2)", "SSA (A^2)", "MORE" })
{
for (auto prop: db["pdbx_struct_assembly_prop"].find(cif::key("biol_id") == id and cif::key("type") == type))
for (auto prop: db["pdbx_struct_assembly_prop"].find(cif::Key("biol_id") == id and cif::Key("type") == type))
{
string value = prop["value"].as<string>();
......@@ -2205,7 +2204,7 @@ void WriteRemark350(ostream& pdbFile, datablock& db)
}
}
auto gen = db["pdbx_struct_assembly_gen"][cif::key("assembly_id") == id];
auto gen = db["pdbx_struct_assembly_gen"][cif::Key("assembly_id") == id];
vector<string> asyms;
string asym_id_list, oper_id_list;
......@@ -2221,7 +2220,7 @@ void WriteRemark350(ostream& pdbFile, datablock& db)
{
string oper_id{ i->begin(), i->end() };
auto r = db["pdbx_struct_oper_list"][cif::key("id") == oper_id];
auto r = db["pdbx_struct_oper_list"][cif::Key("id") == oper_id];
pdbFile << RM(" BIOMT1 ", -3) << Fi(r, "id")
<< SEP(" ", -9, 6) << Ff(r, "matrix[1][1]")
......@@ -2245,7 +2244,7 @@ void WriteRemark350(ostream& pdbFile, datablock& db)
}
}
void WriteRemark400(ostream& pdbFile, datablock& db)
void WriteRemark400(ostream& pdbFile, Datablock& db)
{
for (auto& r: db["pdbx_entry_details"])
{
......@@ -2255,7 +2254,7 @@ void WriteRemark400(ostream& pdbFile, datablock& db)
}
}
void WriteRemark450(ostream& pdbFile, datablock& db)
void WriteRemark450(ostream& pdbFile, Datablock& db)
{
for (auto& r: db["pdbx_entry_details"])
{
......@@ -2266,15 +2265,15 @@ void WriteRemark450(ostream& pdbFile, datablock& db)
}
}
void WriteRemark465(ostream& pdbFile, datablock& db)
void WriteRemark465(ostream& pdbFile, Datablock& db)
{
bool first = true;
typedef RM<465> RM;
boost::format fmt("REMARK 465 %3.3s %3.3s %1.1s %5.5d%1.1s");
auto& c = db["pdbx_unobs_or_zero_occ_residues"];
vector<row> missing(c.begin(), c.end());
stable_sort(missing.begin(), missing.end(), [](row a, row b) -> bool
vector<Row> missing(c.begin(), c.end());
stable_sort(missing.begin(), missing.end(), [](Row a, Row b) -> bool
{
int modelNrA, seqIDA, modelNrB, seqIDB;
string asymIDA, asymIDB;
......@@ -2315,7 +2314,7 @@ void WriteRemark465(ostream& pdbFile, datablock& db)
}
}
void WriteRemark470(ostream& pdbFile, datablock& db)
void WriteRemark470(ostream& pdbFile, Datablock& db)
{
typedef RM<470> RM;
boost::format fmt("REMARK 470 %3.3s %3.3s %1.1s%4.4d%1.1s ");
......@@ -2363,7 +2362,7 @@ void WriteRemark470(ostream& pdbFile, datablock& db)
for (size_t i = 0; i < 6 and not a.second.empty(); ++i)
{
pdbFile << cif2pdb_atomName(a.second.front(), resName, db) << ' ';
pdbFile << cif2pdbAtomName(a.second.front(), resName, db) << ' ';
a.second.pop_front();
}
......@@ -2374,12 +2373,12 @@ void WriteRemark470(ostream& pdbFile, datablock& db)
}
}
void WriteRemark610(ostream& pdbFile, datablock& db)
void WriteRemark610(ostream& pdbFile, Datablock& db)
{
#warning("unimplemented!");
}
void WriteRemark800(ostream& pdbFile, datablock& db)
void WriteRemark800(ostream& pdbFile, Datablock& db)
{
int nr = 0;
for (auto r: db["struct_site"])
......@@ -2398,13 +2397,13 @@ void WriteRemark800(ostream& pdbFile, datablock& db)
for (auto l: { "SITE_IDENTIFIER: " + ident, "EVIDENCE_CODE: " + code, "SITE_DESCRIPTION: " + desc })
{
for (auto s: cif::word_wrap(l, 69))
for (auto s: cif::wordWrap(l, 69))
pdbFile << "REMARK 800 " << s << endl;
};
}
}
void WriteRemark999(ostream& pdbFile, datablock& db)
void WriteRemark999(ostream& pdbFile, Datablock& db)
{
for (auto& r: db["pdbx_entry_details"])
{
......@@ -2415,7 +2414,7 @@ void WriteRemark999(ostream& pdbFile, datablock& db)
}
}
void WriteRemarks(ostream& pdbFile, datablock& db)
void WriteRemarks(ostream& pdbFile, Datablock& db)
{
WriteRemark1(pdbFile, db);
WriteRemark2(pdbFile, db);
......@@ -2437,7 +2436,7 @@ void WriteRemarks(ostream& pdbFile, datablock& db)
WriteRemark999(pdbFile, db);
}
int WritePrimaryStructure(ostream& pdbFile, datablock& db)
int WritePrimaryStructure(ostream& pdbFile, Datablock& db)
{
int numSeq = 0;
......@@ -2448,7 +2447,7 @@ int WritePrimaryStructure(ostream& pdbFile, datablock& db)
string id, db_name, db_code;
cif::tie(id, db_name, db_code) = r.get("id", "db_name", "db_code");
for (auto r1: db["struct_ref_seq"].find(cif::key("ref_id") == id))
for (auto r1: db["struct_ref_seq"].find(cif::Key("ref_id") == id))
{
string idCode, chainID, insertBegin, insertEnd, dbAccession, dbinsBeg, dbinsEnd;
string seqBegin, seqEnd, dbseqBegin, dbseqEnd;
......@@ -2578,7 +2577,7 @@ int WritePrimaryStructure(ostream& pdbFile, datablock& db)
pdbFile << (boost::format(
"MODRES %4.4s %3.3s %1.1s %4.4s%1.1s %3.3s %-41.41s")
% db.name()
% db.getName()
% resName
% chainID
% seqNum
......@@ -2591,12 +2590,12 @@ int WritePrimaryStructure(ostream& pdbFile, datablock& db)
return numSeq;
}
int WriteHeterogen(ostream& pdbFile, datablock& db)
int WriteHeterogen(ostream& pdbFile, Datablock& db)
{
int numHet = 0;
string water_entity_id, water_comp_id;
for (auto r: db["entity"].find(cif::key("type") == string("water")))
for (auto r: db["entity"].find(cif::Key("type") == string("water")))
{
water_entity_id = r["id"].as<string>();
break;
......@@ -2656,7 +2655,7 @@ int WriteHeterogen(ostream& pdbFile, datablock& db)
// }
// count the HETATM's
// for (auto r: db["atom_site"].find(cif::key("group_PDB") == string("HETATM")))
// for (auto r: db["atom_site"].find(cif::Key("group_PDB") == string("HETATM")))
set<string> missingHetNames;
for (auto r: db["atom_site"])
......@@ -2759,7 +2758,7 @@ int WriteHeterogen(ostream& pdbFile, datablock& db)
if (id == water_comp_id)
continue;
string syn = db["chem_comp"][cif::key("id") == id]["pdbx_synonyms"].as<string>();
string syn = db["chem_comp"][cif::Key("id") == id]["pdbx_synonyms"].as<string>();
if (syn.empty())
continue;
......@@ -2777,7 +2776,7 @@ int WriteHeterogen(ostream& pdbFile, datablock& db)
int componentNr = 0;
string first_het_asym_id;
for (auto p: db["pdbx_poly_seq_scheme"].find(cif::key("mon_id") == hetID))
for (auto p: db["pdbx_poly_seq_scheme"].find(cif::Key("mon_id") == hetID))
{
first_het_asym_id = p["asym_id"].as<string>();
break;
......@@ -2785,7 +2784,7 @@ int WriteHeterogen(ostream& pdbFile, datablock& db)
if (first_het_asym_id.empty())
{
for (auto p: db["pdbx_nonpoly_scheme"].find(cif::key("mon_id") == hetID))
for (auto p: db["pdbx_nonpoly_scheme"].find(cif::Key("mon_id") == hetID))
{
first_het_asym_id = p["asym_id"].as<string>();
break;
......@@ -2804,7 +2803,7 @@ int WriteHeterogen(ostream& pdbFile, datablock& db)
int nr = count_if(hets.begin(), hets.end(), [hetID](auto& h) -> bool { return h.hetID == hetID; });
for (auto r: db["chem_comp"].find(cif::key("id") == hetID))
for (auto r: db["chem_comp"].find(cif::Key("id") == hetID))
{
string formula = r["formula"].as<string>();
if (nr > 1)
......@@ -2860,13 +2859,13 @@ int WriteHeterogen(ostream& pdbFile, datablock& db)
return numHet;
}
tuple<int,int> WriteSecondaryStructure(ostream& pdbFile, datablock& db)
tuple<int,int> WriteSecondaryStructure(ostream& pdbFile, Datablock& db)
{
int numHelix = 0, numSheet = 0;
// HELIX
boost::format kHELIX("HELIX %3.3d %3.3s %3.3s %c %4.4d%1.1s %3.3s %c %4.4d%1.1s%2.2d%-30.30s %5.5d");
for (auto r: db["struct_conf"].find(cif::key("conf_type_id") == "HELX_P"))
for (auto r: db["struct_conf"].find(cif::Key("conf_type_id") == "HELX_P"))
{
string pdbx_PDB_helix_id, beg_label_comp_id, pdbx_beg_PDB_ins_code,
end_label_comp_id, pdbx_end_PDB_ins_code, beg_auth_comp_id,
......@@ -2904,7 +2903,7 @@ tuple<int,int> WriteSecondaryStructure(ostream& pdbFile, datablock& db)
bool first = true;
for (auto o: db["struct_sheet_order"].find(cif::key("sheet_id") == sheetID))
for (auto o: db["struct_sheet_order"].find(cif::Key("sheet_id") == sheetID))
{
int sense = 0;
string s, rangeID1, rangeID2;
......@@ -2920,7 +2919,7 @@ tuple<int,int> WriteSecondaryStructure(ostream& pdbFile, datablock& db)
string initResName, initChainID, initICode, endResName, endChainID, endICode;
int initSeqNum, endSeqNum;
auto r1 = db["struct_sheet_range"][cif::key("sheet_id") == sheetID and cif::key("id") == rangeID1];
auto r1 = db["struct_sheet_range"][cif::Key("sheet_id") == sheetID and cif::Key("id") == rangeID1];
cif::tie(initResName, initICode, endResName, endICode,
initResName, initChainID, initSeqNum, endResName, endChainID, endSeqNum)
......@@ -2948,7 +2947,7 @@ tuple<int,int> WriteSecondaryStructure(ostream& pdbFile, datablock& db)
string initResName, initChainID, initICode, endResName, endChainID, endICode, curAtom, curResName, curChainId, curICode, prevAtom, prevResName, prevChainId, prevICode;
int initSeqNum, endSeqNum, curResSeq, prevResSeq;
auto r2 = db["struct_sheet_range"][cif::key("sheet_id") == sheetID and cif::key("id") == rangeID2];
auto r2 = db["struct_sheet_range"][cif::Key("sheet_id") == sheetID and cif::Key("id") == rangeID2];
cif::tie(initResName, initICode, endResName, endICode,
initResName, initChainID, initSeqNum, endResName, endChainID, endSeqNum)
......@@ -2956,7 +2955,7 @@ tuple<int,int> WriteSecondaryStructure(ostream& pdbFile, datablock& db)
"pdbx_end_PDB_ins_code", "beg_auth_comp_id", "beg_auth_asym_id", "beg_auth_seq_id",
"end_auth_comp_id", "end_auth_asym_id", "end_auth_seq_id");
auto h = db["pdbx_struct_sheet_hbond"].find(cif::key("sheet_id") == sheetID and cif::key("range_id_1") == rangeID1 and cif::key("range_id_2") == rangeID2);
auto h = db["pdbx_struct_sheet_hbond"].find(cif::Key("sheet_id") == sheetID and cif::Key("range_id_1") == rangeID1 and cif::Key("range_id_2") == rangeID2);
if (h.empty())
{
......@@ -2983,8 +2982,8 @@ tuple<int,int> WriteSecondaryStructure(ostream& pdbFile, datablock& db)
= h.front().get("range_2_auth_atom_id", "range_2_auth_comp_id", "range_2_auth_seq_id", "range_2_auth_asym_id", "range_2_PDB_ins_code",
"range_1_auth_atom_id", "range_1_auth_comp_id", "range_1_auth_seq_id", "range_1_auth_asym_id", "range_1_PDB_ins_code");
curAtom = cif2pdb_atomName(curAtom, compID[0], db);
prevAtom = cif2pdb_atomName(prevAtom, compID[1], db);
curAtom = cif2pdbAtomName(curAtom, compID[0], db);
prevAtom = cif2pdbAtomName(prevAtom, compID[1], db);
pdbFile << (kSHEET1
% rangeID2
......@@ -3018,7 +3017,7 @@ tuple<int,int> WriteSecondaryStructure(ostream& pdbFile, datablock& db)
return make_tuple(numHelix, numSheet);
}
void WriteConnectivity(ostream& pdbFile, cif::datablock& db)
void WriteConnectivity(ostream& pdbFile, cif::Datablock& db)
{
// SSBOND
// have to filter out alts
......@@ -3026,7 +3025,7 @@ void WriteConnectivity(ostream& pdbFile, cif::datablock& db)
int nr = 1;
boost::format kSSBOND("SSBOND %3.3d CYS %1.1s %4.4d%1.1s CYS %1.1s %4.4d%1.1s %6.6s %6.6s %5.2f");
for (auto r: db["struct_conn"].find(cif::key("conn_type_id") == "disulf"))
for (auto r: db["struct_conn"].find(cif::Key("conn_type_id") == "disulf"))
{
string chainID1, icode1, chainID2, icode2, sym1, sym2;
float Length;
......@@ -3042,8 +3041,8 @@ void WriteConnectivity(ostream& pdbFile, cif::datablock& db)
if (n.second == false)
continue;
sym1 = cif2pdb_symmetry(sym1);
sym2 = cif2pdb_symmetry(sym2);
sym1 = cif2pdbSymmetry(sym1);
sym2 = cif2pdbSymmetry(sym2);
pdbFile << (kSSBOND
% nr
......@@ -3063,7 +3062,7 @@ void WriteConnectivity(ostream& pdbFile, cif::datablock& db)
// LINK
boost::format kLINK("LINK %-4.4s%1.1s%3.3s %1.1s%4.4d%1.1s %-4.4s%1.1s%3.3s %1.1s%4.4d%1.1s %6.6s %6.6s %5.2f");
for (auto r: db["struct_conn"].find(cif::key("conn_type_id") == "metalc" or cif::key("conn_type_id") == "covale"))
for (auto r: db["struct_conn"].find(cif::Key("conn_type_id") == "metalc" or cif::Key("conn_type_id") == "covale"))
{
string name1, altLoc1, resName1, chainID1, iCode1, name2, altLoc2, resName2, chainID2, iCode2, sym1, sym2;
int resSeq1, resSeq2;
......@@ -3078,11 +3077,11 @@ void WriteConnectivity(ostream& pdbFile, cif::datablock& db)
cif::tie(compID[0], compID[1]) = r.get("ptnr1_label_comp_id", "ptnr2_label_comp_id");
name1 = cif2pdb_atomName(name1, compID[0], db);
name2 = cif2pdb_atomName(name2, compID[1], db);
name1 = cif2pdbAtomName(name1, compID[0], db);
name2 = cif2pdbAtomName(name2, compID[1], db);
sym1 = cif2pdb_symmetry(sym1);
sym2 = cif2pdb_symmetry(sym2);
sym1 = cif2pdbSymmetry(sym1);
sym2 = cif2pdbSymmetry(sym2);
pdbFile << (kLINK
% name1
......@@ -3131,7 +3130,7 @@ void WriteConnectivity(ostream& pdbFile, cif::datablock& db)
}
}
int WriteMiscellaneousFeatures(ostream& pdbFile, datablock& db)
int WriteMiscellaneousFeatures(ostream& pdbFile, Datablock& db)
{
int numSite = 0;
......@@ -3184,12 +3183,12 @@ int WriteMiscellaneousFeatures(ostream& pdbFile, datablock& db)
return numSite;
}
void WriteCrystallographic(ostream& pdbFile, datablock& db)
void WriteCrystallographic(ostream& pdbFile, Datablock& db)
{
auto r = db["symmetry"][cif::key("entry_id") == db.name()];
auto r = db["symmetry"][cif::Key("entry_id") == db.getName()];
string symmetry = r["space_group_name_H-M"].as<string>();
r = db["cell"][cif::key("entry_id") == db.name()];
r = db["cell"][cif::Key("entry_id") == db.getName()];
boost::format kCRYST1("CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f %-11.11s%4.4d");
......@@ -3204,7 +3203,7 @@ void WriteCrystallographic(ostream& pdbFile, datablock& db)
% r["Z_PDB"]) << endl;
}
int WriteCoordinateTransformation(ostream& pdbFile, datablock& db)
int WriteCoordinateTransformation(ostream& pdbFile, Datablock& db)
{
int result = 0;
......@@ -3245,7 +3244,7 @@ int WriteCoordinateTransformation(ostream& pdbFile, datablock& db)
return result;
}
tuple<int,int> WriteCoordinatesForModel(ostream& pdbFile, datablock& db,
tuple<int,int> WriteCoordinatesForModel(ostream& pdbFile, Datablock& db,
const map<string,tuple<string,int,string>>& last_resseq_for_chain_map,
set<string>& TERminatedChains, int model_nr)
{
......@@ -3344,10 +3343,10 @@ tuple<int,int> WriteCoordinatesForModel(ostream& pdbFile, datablock& db,
++numCoord;
auto ai = atom_site_anisotrop[cif::key("id") == id];
auto ai = atom_site_anisotrop[cif::Key("id") == id];
if (not ai.empty())
//
// auto ai = find_if(atom_site_anisotrop.begin(), atom_site_anisotrop.end(), [id](row r) -> bool { return r["id"] == id; });
// auto ai = find_if(atom_site_anisotrop.begin(), atom_site_anisotrop.end(), [id](Row r) -> bool { return r["id"] == id; });
// if (ai != atom_site_anisotrop.end())
{
float u11, u22, u33, u12, u13, u23;
......@@ -3379,7 +3378,7 @@ tuple<int,int> WriteCoordinatesForModel(ostream& pdbFile, datablock& db,
return make_tuple(numCoord, numTer);
}
tuple<int,int> WriteCoordinate(ostream& pdbFile, datablock& db)
tuple<int,int> WriteCoordinate(ostream& pdbFile, Datablock& db)
{
// residues known from seqres
// map<tuple<string,int,string>,string> res2chain_map;
......@@ -3439,7 +3438,7 @@ tuple<int,int> WriteCoordinate(ostream& pdbFile, datablock& db)
return result;
}
void WritePDBFile(ostream& pdbFile, cif::file& cifFile)
void WritePDBFile(ostream& pdbFile, cif::File& cifFile)
{
io::filtering_ostream out;
out.push(FillOutLineFilter());
......@@ -3448,7 +3447,7 @@ void WritePDBFile(ostream& pdbFile, cif::file& cifFile)
auto filter = out.component<FillOutLineFilter>(0);
assert(filter);
auto& db = cifFile.first_datablock();
auto& db = cifFile.firstDatablock();
int numRemark = 0, numHet = 0, numHelix = 0, numSheet = 0, numTurn = 0, numSite = 0, numXform = 0, numCoord = 0, numTer = 0, numConect = 0, numSeq = 0;
......
......@@ -4,9 +4,9 @@
#include <boost/algorithm/string.hpp>
#include "libcif/cif++.h"
#include "libcif/cif-parser.h"
#include "libcif/cif-validator.h"
#include "cif++/Cif++.h"
#include "cif++/CifParser.h"
#include "cif++/CifValidator.h"
using namespace std;
namespace ba = boost::algorithm;
......@@ -30,14 +30,14 @@ const uint8 kCharTraitsTable[128] = {
// --------------------------------------------------------------------
cif_parser_error::cif_parser_error(uint32 line_nr, const string& message)
: runtime_error("parse error at line " + to_string(line_nr) + ": " + message)
CifParserError::CifParserError(uint32 lineNr, const string& message)
: runtime_error("parse error at line " + to_string(lineNr) + ": " + message)
{
}
// --------------------------------------------------------------------
const char* sac_parser::kTokenName[] = {
const char* SacParser::kTokenName[] = {
"unknown",
"EOF",
"DATA",
......@@ -49,7 +49,7 @@ const char* sac_parser::kTokenName[] = {
"Value"
};
const char* sac_parser::kValueName[] = {
const char* SacParser::kValueName[] = {
"Int",
"Float",
"Numeric",
......@@ -61,52 +61,52 @@ const char* sac_parser::kValueName[] = {
// --------------------------------------------------------------------
sac_parser::sac_parser(std::istream& is)
: m_data(is)
SacParser::SacParser(std::istream& is)
: mData(is)
{
m_validate = true;
m_line_nr = 1;
m_bol = true;
m_lookahead = get_next_token();
mValidate = true;
mLineNr = 1;
mBol = true;
mLookahead = getNextToken();
}
void sac_parser::error(const string& msg)
void SacParser::error(const string& msg)
{
throw cif_parser_error(m_line_nr, msg);
throw CifParserError(mLineNr, msg);
}
// get_next_char takes a char from the buffer, or if it is empty
// getNextChar takes a char from the buffer, or if it is empty
// from the istream. This function also does carriage/linefeed
// translation.
int sac_parser::get_next_char()
int SacParser::getNextChar()
{
int result;
if (m_buffer.empty())
result = m_data.get();
if (mBuffer.empty())
result = mData.get();
else
{
result = m_buffer.top();
m_buffer.pop();
result = mBuffer.top();
mBuffer.pop();
}
// very simple CR/LF translation into LF
if (result == '\r')
{
int lookahead = m_data.get();
int lookahead = mData.get();
if (lookahead != '\n')
m_buffer.push(lookahead);
mBuffer.push(lookahead);
result = '\n';
}
m_token_value += static_cast<char>(result);
mTokenValue += static_cast<char>(result);
if (result == '\n')
++m_line_nr;
++mLineNr;
if (VERBOSE >= 6)
{
cerr << "get_next_char => ";
cerr << "getNextChar => ";
if (iscntrl(result) or not isprint(result))
cerr << int(result) << endl;
else
......@@ -116,95 +116,95 @@ int sac_parser::get_next_char()
return result;
}
void sac_parser::retract()
void SacParser::retract()
{
assert(not m_token_value.empty());
assert(not mTokenValue.empty());
char ch = m_token_value.back();
char ch = mTokenValue.back();
if (ch == '\n')
--m_line_nr;
--mLineNr;
m_buffer.push(ch);
m_token_value.pop_back();
mBuffer.push(ch);
mTokenValue.pop_back();
}
void sac_parser::restart()
void SacParser::restart()
{
while (not m_token_value.empty())
while (not mTokenValue.empty())
retract();
switch (m_start)
switch (mStart)
{
case eStateStart:
m_state = m_start = eStateFloat;
mState = mStart = eStateFloat;
break;
case eStateFloat:
m_state = m_start = eStateInt;
mState = mStart = eStateInt;
break;
case eStateInt:
m_state = m_start = eStateValue;
mState = mStart = eStateValue;
break;
default:
error("Invalid state in sac_parser");
error("Invalid state in SacParser");
}
m_bol = false;
mBol = false;
}
void sac_parser::match(sac_parser::CIFToken t)
void SacParser::match(SacParser::CIFToken t)
{
if (m_lookahead != t)
error(string("Unexpected token, expected ") + kTokenName[t] + " but found " + kTokenName[m_lookahead]);
if (mLookahead != t)
error(string("Unexpected token, expected ") + kTokenName[t] + " but found " + kTokenName[mLookahead]);
m_lookahead = get_next_token();
mLookahead = getNextToken();
}
sac_parser::CIFToken sac_parser::get_next_token()
SacParser::CIFToken SacParser::getNextToken()
{
const auto kEOF = char_traits<char>::eof();
CIFToken result = eCIFTokenUnknown;
int quoteChar = 0;
m_state = m_start = eStateStart;
m_bol = false;
mState = mStart = eStateStart;
mBol = false;
m_token_value.clear();
m_token_type = eCIFValueUnknown;
mTokenValue.clear();
mTokenType = eCIFValueUnknown;
while (result == eCIFTokenUnknown)
{
auto ch = get_next_char();
auto ch = getNextChar();
switch (m_state)
switch (mState)
{
case eStateStart:
if (ch == kEOF)
result = eCIFTokenEOF;
else if (ch == '\n')
{
m_bol = true;
m_state = eStateWhite;
mBol = true;
mState = eStateWhite;
}
else if (ch == ' ' or ch == '\t')
m_state = eStateWhite;
mState = eStateWhite;
else if (ch == '#')
m_state = eStateComment;
mState = eStateComment;
else if (ch == '.')
m_state = eStateDot;
mState = eStateDot;
else if (ch == '_')
m_state = eStateTag;
else if (ch == ';' and m_bol)
m_state = eStateTextField;
mState = eStateTag;
else if (ch == ';' and mBol)
mState = eStateTextField;
else if (ch == '\'' or ch == '"')
{
quoteChar = ch;
m_state = eStateQuotedString;
mState = eStateQuotedString;
}
else if (ch == '?')
m_state = eStateQuestionMark;
mState = eStateQuestionMark;
else
restart();
break;
......@@ -214,70 +214,70 @@ sac_parser::CIFToken sac_parser::get_next_token()
result = eCIFTokenEOF;
else if (not isspace(ch))
{
m_state = eStateStart;
mState = eStateStart;
retract();
m_token_value.clear();
mTokenValue.clear();
}
else
m_bol = (ch == '\n');
mBol = (ch == '\n');
break;
case eStateComment:
if (ch == '\n')
{
m_state = eStateStart;
m_bol = true;
m_token_value.clear();
mState = eStateStart;
mBol = true;
mTokenValue.clear();
}
else if (ch == kEOF)
result = eCIFTokenEOF;
else if (not is_any_print(ch))
else if (not isAnyPrint(ch))
error("invalid character in comment");
break;
case eStateQuestionMark:
if (is_non_blank(ch))
m_state = eStateValue;
if (isNonBlank(ch))
mState = eStateValue;
else
{
retract();
result = eCIFTokenValue;
m_token_value.clear();
m_token_type = eCIFValueUnknown;
mTokenValue.clear();
mTokenType = eCIFValueUnknown;
}
break;
case eStateDot:
if (isdigit(ch))
m_state = eStateFloat + 2;
mState = eStateFloat + 2;
else if (isspace(ch))
{
retract();
result = eCIFTokenValue;
m_token_type = eCIFValueInapplicable;
mTokenType = eCIFValueInapplicable;
}
else
m_state = eStateValue;
mState = eStateValue;
break;
case eStateTextField:
if (ch == '\n')
m_state = eStateTextField + 1;
mState = eStateTextField + 1;
else if (ch == kEOF)
error("unterminated textfield");
else if (not is_any_print(ch))
else if (not isAnyPrint(ch))
// error("invalid character in text field '" + string({ static_cast<char>(ch) }) + "' (" + to_string((int)ch) + ")");
cerr << "invalid character in text field '" << string({ static_cast<char>(ch) }) << "' (" << ch << ") line: " << m_line_nr << endl;
cerr << "invalid character in text field '" << string({ static_cast<char>(ch) }) << "' (" << ch << ") line: " << mLineNr << endl;
break;
case eStateTextField + 1:
if (is_text_lead(ch) or ch == ' ' or ch == '\t')
m_state = eStateTextField;
if (isTextLead(ch) or ch == ' ' or ch == '\t')
mState = eStateTextField;
else if (ch == ';')
{
assert(m_token_value.length() >= 2);
m_token_value = m_token_value.substr(1, m_token_value.length() - 3);
m_token_type = eCIFValueTextField;
assert(mTokenValue.length() >= 2);
mTokenValue = mTokenValue.substr(1, mTokenValue.length() - 3);
mTokenType = eCIFValueTextField;
result = eCIFTokenValue;
}
else if (ch == kEOF)
......@@ -290,25 +290,25 @@ sac_parser::CIFToken sac_parser::get_next_token()
if (ch == kEOF)
error("unterminated quoted string");
else if (ch == quoteChar)
m_state = eStateQuotedStringQuote;
else if (not is_any_print(ch))
mState = eStateQuotedStringQuote;
else if (not isAnyPrint(ch))
error("invalid character in quoted string");
break;
case eStateQuotedStringQuote:
if (is_white(ch))
if (isWhite(ch))
{
retract();
result = eCIFTokenValue;
m_token_type = eCIFValueString;
mTokenType = eCIFValueString;
assert(m_token_value.length() >= 3);
m_token_value = m_token_value.substr(1, m_token_value.length() - 2);
assert(mTokenValue.length() >= 3);
mTokenValue = mTokenValue.substr(1, mTokenValue.length() - 2);
}
else if (ch == quoteChar)
;
else if (is_any_print(ch))
m_state = eStateQuotedString;
else if (isAnyPrint(ch))
mState = eStateQuotedString;
else if (ch == kEOF)
error("unterminated quoted string");
else
......@@ -316,7 +316,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
break;
case eStateTag:
if (not is_non_blank(ch))
if (not isNonBlank(ch))
{
retract();
result = eCIFTokenTag;
......@@ -326,27 +326,27 @@ sac_parser::CIFToken sac_parser::get_next_token()
case eStateFloat:
if (ch == '+' or ch == '-')
{
m_state = eStateFloat + 1;
mState = eStateFloat + 1;
}
else if (isdigit(ch))
m_state = eStateFloat + 1;
mState = eStateFloat + 1;
else
restart();
break;
case eStateFloat + 1:
// if (ch == '(') // numeric???
// m_state = eStateNumericSuffix;
// mState = eStateNumericSuffix;
// else
if (ch == '.')
m_state = eStateFloat + 2;
mState = eStateFloat + 2;
else if (tolower(ch) == 'e')
m_state = eStateFloat + 3;
else if (is_white(ch) or ch == kEOF)
mState = eStateFloat + 3;
else if (isWhite(ch) or ch == kEOF)
{
retract();
result = eCIFTokenValue;
m_token_type = eCIFValueInt;
mTokenType = eCIFValueInt;
}
else
restart();
......@@ -355,15 +355,15 @@ sac_parser::CIFToken sac_parser::get_next_token()
// parsed '.'
case eStateFloat + 2:
// if (ch == '(') // numeric???
// m_state = eStateNumericSuffix;
// mState = eStateNumericSuffix;
// else
if (tolower(ch) == 'e')
m_state = eStateFloat + 3;
else if (is_white(ch) or ch == kEOF)
mState = eStateFloat + 3;
else if (isWhite(ch) or ch == kEOF)
{
retract();
result = eCIFTokenValue;
m_token_type = eCIFValueFloat;
mTokenType = eCIFValueFloat;
}
else
restart();
......@@ -372,29 +372,29 @@ sac_parser::CIFToken sac_parser::get_next_token()
// parsed 'e'
case eStateFloat + 3:
if (ch == '-' or ch == '+')
m_state = eStateFloat + 4;
mState = eStateFloat + 4;
else if (isdigit(ch))
m_state = eStateFloat + 5;
mState = eStateFloat + 5;
else
restart();
break;
case eStateFloat + 4:
if (isdigit(ch))
m_state = eStateFloat + 5;
mState = eStateFloat + 5;
else
restart();
break;
case eStateFloat + 5:
// if (ch == '(')
// m_state = eStateNumericSuffix;
// mState = eStateNumericSuffix;
// else
if (is_white(ch) or ch == kEOF)
if (isWhite(ch) or ch == kEOF)
{
retract();
result = eCIFTokenValue;
m_token_type = eCIFValueFloat;
mTokenType = eCIFValueFloat;
}
else
restart();
......@@ -402,17 +402,17 @@ sac_parser::CIFToken sac_parser::get_next_token()
case eStateInt:
if (isdigit(ch) or ch == '+' or ch == '-')
m_state = eStateInt + 1;
mState = eStateInt + 1;
else
restart();
break;
case eStateInt + 1:
if (is_white(ch) or ch == kEOF)
if (isWhite(ch) or ch == kEOF)
{
retract();
result = eCIFTokenValue;
m_token_type = eCIFValueInt;
mTokenType = eCIFValueInt;
}
else
restart();
......@@ -420,7 +420,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
// case eStateNumericSuffix:
// if (isdigit(ch))
// m_state = eStateNumericSuffix + 1;
// mState = eStateNumericSuffix + 1;
// else
// restart();
// break;
......@@ -429,15 +429,15 @@ sac_parser::CIFToken sac_parser::get_next_token()
// if (ch == ')')
// {
// result = eCIFTokenValue;
// m_token_type = eCIFValueNumeric;
// mTokenType = eCIFValueNumeric;
// }
// else if (not isdigit(ch))
// restart();
// break;
case eStateValue:
if (is_non_blank(ch))
m_state = eStateValue + 1;
if (isNonBlank(ch))
mState = eStateValue + 1;
else
error("invalid character at this position");
break;
......@@ -445,7 +445,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
case eStateValue + 1:
if (ch == '_') // first _, check for keywords
{
string s = to_lower_copy(m_token_value);
string s = toLowerCopy(mTokenValue);
if (s == "global_")
result = eCIFTokenGLOBAL;
......@@ -454,33 +454,33 @@ sac_parser::CIFToken sac_parser::get_next_token()
else if (s == "loop_")
result = eCIFTokenLOOP;
else if (s == "data_" or s == "save_")
m_state = eStateValue + 2;
mState = eStateValue + 2;
}
else if (not is_non_blank(ch))
else if (not isNonBlank(ch))
{
retract();
result = eCIFTokenValue;
m_token_type = eCIFValueString;
mTokenType = eCIFValueString;
}
break;
case eStateValue + 2:
if (not is_non_blank(ch))
if (not isNonBlank(ch))
{
retract();
if (tolower(m_token_value[0]) == 'd')
if (tolower(mTokenValue[0]) == 'd')
result = eCIFTokenDATA;
else
result = eCIFTokenSAVE;
m_token_value.erase(m_token_value.begin(), m_token_value.begin() + 5);
mTokenValue.erase(mTokenValue.begin(), mTokenValue.begin() + 5);
}
break;
default:
assert(false);
error("Invalid state in get_next_token");
error("Invalid state in getNextToken");
break;
}
}
......@@ -488,33 +488,33 @@ sac_parser::CIFToken sac_parser::get_next_token()
if (VERBOSE >= 5)
{
cerr << kTokenName[result];
if (m_token_type != eCIFValueUnknown)
cerr << ' ' << kValueName[m_token_type];
if (mTokenType != eCIFValueUnknown)
cerr << ' ' << kValueName[mTokenType];
if (result != eCIFTokenEOF)
cerr << " '" << m_token_value << '\'';
cerr << " '" << mTokenValue << '\'';
cerr << endl;
}
return result;
}
void sac_parser::parse_file()
void SacParser::parseFile()
{
try
{
while (m_lookahead != eCIFTokenEOF)
while (mLookahead != eCIFTokenEOF)
{
switch (m_lookahead)
switch (mLookahead)
{
case eCIFTokenGLOBAL:
parse_global();
parseGlobal();
break;
case eCIFTokenDATA:
produce_datablock(m_token_value);
produceDatablock(mTokenValue);
match(eCIFTokenDATA);
parse_data_block();
parseDataBlock();
break;
default:
......@@ -529,23 +529,23 @@ void sac_parser::parse_file()
}
}
void sac_parser::parse_global()
void SacParser::parseGlobal()
{
match(eCIFTokenGLOBAL);
while (m_lookahead == eCIFTokenTag)
while (mLookahead == eCIFTokenTag)
{
match(eCIFTokenTag);
match(eCIFTokenValue);
}
}
void sac_parser::parse_data_block()
void SacParser::parseDataBlock()
{
string cat;
while (m_lookahead == eCIFTokenLOOP or m_lookahead == eCIFTokenTag or m_lookahead == eCIFTokenSAVE)
while (mLookahead == eCIFTokenLOOP or mLookahead == eCIFTokenTag or mLookahead == eCIFTokenSAVE)
{
switch (m_lookahead)
switch (mLookahead)
{
case eCIFTokenLOOP:
{
......@@ -555,31 +555,31 @@ void sac_parser::parse_data_block()
vector<string> tags;
while (m_lookahead == eCIFTokenTag)
while (mLookahead == eCIFTokenTag)
{
string cat_name, item_name;
std::tie(cat_name, item_name) = split_tag_name(m_token_value);
string catName, itemName;
std::tie(catName, itemName) = splitTagName(mTokenValue);
if (cat.empty())
{
produce_category(cat_name);
cat = cat_name;
produceCategory(catName);
cat = catName;
}
else if (not iequals(cat, cat_name))
else if (not iequals(cat, catName))
error("inconsistent categories in loop_");
tags.push_back(item_name);
tags.push_back(itemName);
match(eCIFTokenTag);
}
while (m_lookahead == eCIFTokenValue)
while (mLookahead == eCIFTokenValue)
{
produce_row();
produceRow();
for (auto tag: tags)
{
produce_item(cat, tag, m_token_value);
produceItem(cat, tag, mTokenValue);
match(eCIFTokenValue);
}
}
......@@ -590,26 +590,26 @@ void sac_parser::parse_data_block()
case eCIFTokenTag:
{
string cat_name, item_name;
std::tie(cat_name, item_name) = split_tag_name(m_token_value);
string catName, itemName;
std::tie(catName, itemName) = splitTagName(mTokenValue);
if (not iequals(cat, cat_name))
if (not iequals(cat, catName))
{
produce_category(cat_name);
cat = cat_name;
produce_row();
produceCategory(catName);
cat = catName;
produceRow();
}
match(eCIFTokenTag);
produce_item(cat, item_name, m_token_value);
produceItem(cat, itemName, mTokenValue);
match(eCIFTokenValue);
break;
}
case eCIFTokenSAVE:
parse_save_frame();
parseSaveFrame();
break;
default:
......@@ -619,118 +619,118 @@ void sac_parser::parse_data_block()
}
}
void sac_parser::parse_save_frame()
void SacParser::parseSaveFrame()
{
error("A regular CIF file should not contain a save frame");
}
// --------------------------------------------------------------------
parser::parser(std::istream& is, file& f)
: sac_parser(is), m_file(f), m_data_block(nullptr)
Parser::Parser(std::istream& is, File& f)
: SacParser(is), mFile(f), mDataBlock(nullptr)
{
}
void parser::produce_datablock(const string& name)
void Parser::produceDatablock(const string& name)
{
m_data_block = new datablock(name);
m_file.append(m_data_block);
mDataBlock = new Datablock(name);
mFile.append(mDataBlock);
}
void parser::produce_category(const string& name)
void Parser::produceCategory(const string& name)
{
if (VERBOSE >= 4)
cerr << "producing category " << name << endl;
std::tie(m_cat, ignore) = m_data_block->emplace(name);
std::tie(mCat, ignore) = mDataBlock->emplace(name);
}
void parser::produce_row()
void Parser::produceRow()
{
if (VERBOSE >= 4)
cerr << "producing row for category " << m_cat->name() << endl;
cerr << "producing row for category " << mCat->name() << endl;
m_cat->emplace({});
m_row = m_cat->back();
mCat->emplace({});
mRow = mCat->back();
}
void parser::produce_item(const string& category, const string& item, const string& value)
void Parser::produceItem(const string& category, const string& item, const string& value)
{
if (VERBOSE >= 4)
cerr << "producing _" << category << '.' << item << " -> " << value << endl;
if (not iequals(category, m_cat->name()))
if (not iequals(category, mCat->name()))
error("inconsistent categories in loop_");
m_row[item] = m_token_value;
mRow[item] = mTokenValue;
}
// --------------------------------------------------------------------
struct dict_parser_data_impl
struct DictParserDataImpl
{
// temporary values for constructing dictionaries
vector<validate_category> m_category_validators;
map<string,vector<validate_item>> m_item_validators;
vector<ValidateCategory> mCategoryValidators;
map<string,vector<ValidateItem>> mItemValidators;
};
dict_parser::dict_parser(validator& validator, std::istream& is)
: parser(is, m_file), m_validator(validator), m_impl(new dict_parser_data_impl)
DictParser::DictParser(Validator& validator, std::istream& is)
: Parser(is, mFile), mValidator(validator), mImpl(new DictParserDataImpl)
{
}
dict_parser::~dict_parser()
DictParser::~DictParser()
{
delete m_impl;
delete mImpl;
}
void dict_parser::parse_save_frame()
void DictParser::parseSaveFrame()
{
if (not m_collected_item_types)
m_collected_item_types = collect_item_types();
if (not mCollectedItemTypes)
mCollectedItemTypes = collectItemTypes();
string saveFrameName = m_token_value;
string saveFrameName = mTokenValue;
if (saveFrameName.empty())
error("Invalid save frame, should contain more than just 'save_' here");
bool isCategorySaveFrame = m_token_value[0] != '_';
bool isCategorySaveFrame = mTokenValue[0] != '_';
datablock dict(m_token_value);
datablock::iterator cat = dict.end();
Datablock dict(mTokenValue);
Datablock::iterator cat = dict.end();
match(eCIFTokenSAVE);
while (m_lookahead == eCIFTokenLOOP or m_lookahead == eCIFTokenTag)
while (mLookahead == eCIFTokenLOOP or mLookahead == eCIFTokenTag)
{
if (m_lookahead == eCIFTokenLOOP)
if (mLookahead == eCIFTokenLOOP)
{
cat = dict.end(); // should start a new category
match(eCIFTokenLOOP);
vector<string> tags;
while (m_lookahead == eCIFTokenTag)
while (mLookahead == eCIFTokenTag)
{
string cat_name, item_name;
std::tie(cat_name, item_name) = split_tag_name(m_token_value);
string catName, itemName;
std::tie(catName, itemName) = splitTagName(mTokenValue);
if (cat == dict.end())
std::tie(cat, ignore) = dict.emplace(cat_name);
else if (not iequals(cat->name(), cat_name))
std::tie(cat, ignore) = dict.emplace(catName);
else if (not iequals(cat->name(), catName))
error("inconsistent categories in loop_");
tags.push_back(item_name);
tags.push_back(itemName);
match(eCIFTokenTag);
}
while (m_lookahead == eCIFTokenValue)
while (mLookahead == eCIFTokenValue)
{
cat->emplace({});
auto row = cat->back();
for (auto tag: tags)
{
row[tag] = m_token_value;
row[tag] = mTokenValue;
match(eCIFTokenValue);
}
}
......@@ -739,17 +739,17 @@ void dict_parser::parse_save_frame()
}
else
{
string cat_name, item_name;
std::tie(cat_name, item_name) = split_tag_name(m_token_value);
string catName, itemName;
std::tie(catName, itemName) = splitTagName(mTokenValue);
if (cat == dict.end() or not iequals(cat->name(), cat_name))
std::tie(cat, ignore) = dict.emplace(cat_name);
if (cat == dict.end() or not iequals(cat->name(), catName))
std::tie(cat, ignore) = dict.emplace(catName);
match(eCIFTokenTag);
if (cat->empty())
cat->emplace({});
cat->back()[item_name] = m_token_value;
cat->back()[itemName] = mTokenValue;
match(eCIFTokenValue);
}
......@@ -759,84 +759,84 @@ void dict_parser::parse_save_frame()
if (isCategorySaveFrame)
{
string category = dict.first_item("_category.id");
string category = dict.firstItem("_category.id");
vector<string> keys;
for (auto k: dict["category_key"])
keys.push_back(get<1>(split_tag_name(k["name"].as<string>())));
for (auto k: dict["categoryKey"])
keys.push_back(get<1>(splitTagName(k["name"].as<string>())));
iset groups;
for (auto g: dict["category_group"])
for (auto g: dict["categoryGroup"])
groups.insert(g["id"].as<string>());
m_impl->m_category_validators.push_back(validate_category{category, keys, groups});
mImpl->mCategoryValidators.push_back(ValidateCategory{category, keys, groups});
}
else
{
// if the type code is missing, this must be a pointer, just skip it
string type_code = dict.first_item("_item_type.code");
string typeCode = dict.firstItem("_item_type.code");
const validate_type* tv = nullptr;
if (not (type_code.empty() or type_code == "?"))
tv = m_validator.get_validator_for_type(type_code);
const ValidateType* tv = nullptr;
if (not (typeCode.empty() or typeCode == "?"))
tv = mValidator.getValidatorForType(typeCode);
iset ess;
for (auto e: dict["item_enumeration"])
ess.insert(e["value"].as<string>());
// collect the dict from our data_block and construct validators
// collect the dict from our dataBlock and construct validators
for (auto i: dict["item"])
{
string tag_name, category, mandatory;
string tagName, category, mandatory;
cif::tie(tag_name, category, mandatory) = i.get("name", "category_id", "mandatory_code");
cif::tie(tagName, category, mandatory) = i.get("name", "category_id", "mandatory_code");
string cat_name, item_name;
std::tie(cat_name, item_name) = split_tag_name(tag_name);
string catName, itemName;
std::tie(catName, itemName) = splitTagName(tagName);
if (cat_name.empty() or item_name.empty())
error("Invalid tag name in _item.name " + tag_name);
if (catName.empty() or itemName.empty())
error("Invalid tag name in _item.name " + tagName);
if (not iequals(category, cat_name) and not (category.empty() or category == "?"))
error("specified category id does match the implicit category name for tag '" + tag_name + '\'');
if (not iequals(category, catName) and not (category.empty() or category == "?"))
error("specified category id does match the implicit category name for tag '" + tagName + '\'');
else
category = cat_name;
category = catName;
auto& ivs = m_impl->m_item_validators[category];
auto& ivs = mImpl->mItemValidators[category];
auto vi = find(ivs.begin(), ivs.end(), validate_item{item_name});
auto vi = find(ivs.begin(), ivs.end(), ValidateItem{itemName});
if (vi == ivs.end())
ivs.push_back(validate_item{item_name, iequals(mandatory, "yes"), tv, ess});
ivs.push_back(ValidateItem{itemName, iequals(mandatory, "yes"), tv, ess});
else
{
// need to update the item_validator?
if (vi->m_mandatory != (iequals(mandatory, "yes")))
// need to update the itemValidator?
if (vi->mMandatory != (iequals(mandatory, "yes")))
{
if (VERBOSE > 2)
{
cerr << "inconsistent mandatory value for " << tag_name << " in dictionary" << endl;
cerr << "inconsistent mandatory value for " << tagName << " in dictionary" << endl;
if (iequals(tag_name, saveFrameName))
if (iequals(tagName, saveFrameName))
cerr << "choosing " << mandatory << endl;
else
cerr << "choosing " << (vi->m_mandatory ? "Y" : "N") << endl;
cerr << "choosing " << (vi->mMandatory ? "Y" : "N") << endl;
}
if (iequals(tag_name, saveFrameName))
vi->m_mandatory = (iequals(mandatory, "yes"));
if (iequals(tagName, saveFrameName))
vi->mMandatory = (iequals(mandatory, "yes"));
}
if (vi->m_type != nullptr and tv != nullptr and vi->m_type != tv)
if (vi->mType != nullptr and tv != nullptr and vi->mType != tv)
{
if (VERBOSE > 1)
cerr << "inconsistent type for " << tag_name << " in dictionary" << endl;
cerr << "inconsistent type for " << tagName << " in dictionary" << endl;
}
// vi->m_mandatory = (iequals(mandatory, "yes"));
if (vi->m_type == nullptr)
vi->m_type = tv;
// vi->mMandatory = (iequals(mandatory, "yes"));
if (vi->mType == nullptr)
vi->mType = tv;
vi->m_enums.insert(ess.begin(), ess.end());
vi->mEnums.insert(ess.begin(), ess.end());
// anything else yet?
// ...
......@@ -845,63 +845,63 @@ void dict_parser::parse_save_frame()
}
}
void dict_parser::link_items()
void DictParser::linkItems()
{
if (not m_data_block)
if (not mDataBlock)
error("no datablock");
auto& dict = *m_data_block;
auto& dict = *mDataBlock;
for (auto gl: dict["pdbx_item_linked_group_list"])
{
string child, parent;
cif::tie(child, parent) = gl.get("child_name", "parent_name");
auto civ = m_validator.get_validator_for_item(child);
auto civ = mValidator.getValidatorForItem(child);
if (civ == nullptr)
error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
auto piv = m_validator.get_validator_for_item(parent);
auto piv = mValidator.getValidatorForItem(parent);
if (piv == nullptr)
error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
civ->set_parent(piv);
civ->setParent(piv);
}
// now make sure the item_type is specified for all item_validators
// now make sure the itemType is specified for all itemValidators
for (auto& cv: m_validator.m_category_validators)
for (auto& cv: mValidator.mCategoryValidators)
{
for (auto& iv: cv.m_item_validators)
for (auto& iv: cv.mItemValidators)
{
if (iv.m_type == nullptr)
cerr << "Missing item_type for " << iv.m_tag << endl;
if (iv.mType == nullptr)
cerr << "Missing item_type for " << iv.mTag << endl;
}
}
}
void dict_parser::load_dictionary()
void DictParser::loadDictionary()
{
unique_ptr<datablock> dict;
datablock* saved_datablock = m_data_block;
unique_ptr<Datablock> dict;
Datablock* savedDatablock = mDataBlock;
try
{
while (m_lookahead != eCIFTokenEOF)
while (mLookahead != eCIFTokenEOF)
{
switch (m_lookahead)
switch (mLookahead)
{
case eCIFTokenGLOBAL:
parse_global();
parseGlobal();
break;
default:
{
dict.reset(new datablock(m_token_value)); // dummy datablock, for constructing the validator only
m_data_block = dict.get();
dict.reset(new Datablock(mTokenValue)); // dummy datablock, for constructing the validator only
mDataBlock = dict.get();
match(eCIFTokenDATA);
parse_data_block();
parseDataBlock();
break;
}
}
......@@ -914,74 +914,74 @@ void dict_parser::load_dictionary()
}
// store all validators
for (auto& ic: m_impl->m_category_validators)
m_validator.add_category_validator(move(ic));
m_impl->m_category_validators.clear();
for (auto& ic: mImpl->mCategoryValidators)
mValidator.addCategoryValidator(move(ic));
mImpl->mCategoryValidators.clear();
for (auto& iv: m_impl->m_item_validators)
for (auto& iv: mImpl->mItemValidators)
{
auto cv = m_validator.get_validator_for_category(iv.first);
auto cv = mValidator.getValidatorForCategory(iv.first);
if (cv == nullptr)
error("Undefined category '" + iv.first);
for (auto& v: iv.second)
const_cast<validate_category*>(cv)->add_item_validator(move(v));
const_cast<ValidateCategory*>(cv)->addItemValidator(move(v));
}
// check all item validators for having a type_validator
// check all item validators for having a typeValidator
if (dict)
link_items();
linkItems();
// store meta information
datablock::iterator info;
Datablock::iterator info;
bool n;
std::tie(info, n) = m_data_block->emplace("dictionary");
std::tie(info, n) = mDataBlock->emplace("dictionary");
if (n)
{
auto r = info->front();
m_validator.dict_name(r["title"].as<string>());
m_validator.dict_version(r["version"].as<string>());
mValidator.dictName(r["title"].as<string>());
mValidator.dictVersion(r["version"].as<string>());
}
m_data_block = saved_datablock;
mDataBlock = savedDatablock;
m_impl->m_item_validators.clear();
mImpl->mItemValidators.clear();
}
bool dict_parser::collect_item_types()
bool DictParser::collectItemTypes()
{
bool result = false;
if (not m_data_block)
if (not mDataBlock)
error("no datablock");
auto& dict = *m_data_block;
auto& dict = *mDataBlock;
for (auto& t: dict["item_type_list"])
{
auto ts = t.get("code", "primitive_code", "construct");
string code, primitive_code, construct;
cif::tie(code, primitive_code, construct) = ts;
string code, primitiveCode, construct;
cif::tie(code, primitiveCode, construct) = ts;
ba::replace_all(construct, "\\n", "\n");
ba::replace_all(construct, "\\t", "\t");
ba::replace_all(construct, "\\\n", "");
validate_type v = {
code, map_to_primitive_type(primitive_code), boost::regex(construct, boost::regex::egrep)
ValidateType v = {
code, mapToPrimitiveType(primitiveCode), boost::regex(construct, boost::regex::egrep)
};
// Do not replace an already defined type validator, this won't work with pdbx_v40
// as it has a name that is too strict for its own names :-)
// if (m_file_impl.m_type_validators.count(v))
// m_file_impl.m_type_validators.erase(v);
// if (mFileImpl.mTypeValidators.count(v))
// mFileImpl.mTypeValidators.erase(v);
m_validator.add_type_validator(move(v));
mValidator.addTypeValidator(move(v));
if (VERBOSE >= 5)
cerr << "Added type " << code << " (" << primitive_code << ") => " << construct << endl;
cerr << "Added type " << code << " (" << primitiveCode << ") => " << construct << endl;
result = true;
}
......
// CIF parser
#include "libcif/config.h"
#include "cif++/Config.h"
#include <tuple>
#include <iostream>
#include <boost/algorithm/string.hpp>
#include "libcif/cif-utils.h"
#include "cif++/CifUtils.h"
using namespace std;
namespace ba = boost::algorithm;
......
......@@ -5,9 +5,9 @@
// since gcc's regex is crashing....
#include <boost/regex.hpp>
#include "libcif/cif++.h"
#include "libcif/cif-parser.h"
#include "libcif/cif-validator.h"
#include "cif++/Cif++.h"
#include "cif++/CifParser.h"
#include "cif++/CifValidator.h"
using namespace std;
namespace ba = boost::algorithm;
......@@ -17,7 +17,7 @@ extern int VERBOSE;
namespace cif
{
DDL_PrimitiveType map_to_primitive_type(const string& s)
DDL_PrimitiveType mapToPrimitiveType(const string& s)
{
DDL_PrimitiveType result;
if (iequals(s, "char"))
......@@ -27,13 +27,13 @@ DDL_PrimitiveType map_to_primitive_type(const string& s)
else if (iequals(s, "numb"))
result = ptNumb;
else
throw validation_error("Not a known primitive type");
throw ValidationError("Not a known primitive type");
return result;
}
// --------------------------------------------------------------------
int validate_type::compare(const char* a, const char* b) const
int ValidateType::compare(const char* a, const char* b) const
{
int result = 0;
......@@ -45,7 +45,7 @@ int validate_type::compare(const char* a, const char* b) const
{
try
{
switch (m_primitive_type)
switch (mPrimitiveType)
{
case ptNumb:
{
......@@ -119,59 +119,59 @@ int validate_type::compare(const char* a, const char* b) const
// --------------------------------------------------------------------
void validate_item::set_parent(validate_item* parent)
void ValidateItem::setParent(ValidateItem* parent)
{
m_parent = parent;
mParent = parent;
if (m_type == nullptr and m_parent != nullptr)
m_type = m_parent->m_type;
if (mType == nullptr and mParent != nullptr)
mType = mParent->mType;
if (m_parent != nullptr)
if (mParent != nullptr)
{
m_parent->m_children.insert(this);
mParent->mChildren.insert(this);
if (m_category->m_keys == vector<string>{m_tag})
m_parent->m_foreign_keys.insert(this);
if (mCategory->mKeys == vector<string>{mTag})
mParent->mForeignKeys.insert(this);
}
}
void validate_item::operator()(string value) const
void ValidateItem::operator()(string value) const
{
if (VERBOSE >= 4)
cout << "validating '" << value << "' for '" << m_tag << "'" << endl;
cout << "validating '" << value << "' for '" << mTag << "'" << endl;
if (not value.empty() and value != "?" and value != ".")
{
if (m_type != nullptr and not boost::regex_match(value, m_type->m_rx))
throw validation_error("Value '" + value + "' does not match type expression for type " + m_type->m_name + " in item " + m_tag);
if (mType != nullptr and not boost::regex_match(value, mType->mRx))
throw ValidationError("Value '" + value + "' does not match type expression for type " + mType->mName + " in item " + mTag);
if (not m_enums.empty())
if (not mEnums.empty())
{
if (m_enums.count(value) == 0)
throw validation_error("Value '" + value + "' is not in the list of allowed values for item " + m_tag);
if (mEnums.count(value) == 0)
throw ValidationError("Value '" + value + "' is not in the list of allowed values for item " + mTag);
}
}
}
// --------------------------------------------------------------------
void validate_category::add_item_validator(validate_item&& v)
void ValidateCategory::addItemValidator(ValidateItem&& v)
{
if (v.m_mandatory)
m_mandatory_fields.insert(v.m_tag);
if (v.mMandatory)
mMandatoryFields.insert(v.mTag);
v.m_category = this;
v.mCategory = this;
auto r = m_item_validators.insert(move(v));
auto r = mItemValidators.insert(move(v));
if (not r.second and VERBOSE >= 4)
cout << "Could not add validator for item " << v.m_tag << " to category " << m_name << endl;
cout << "Could not add validator for item " << v.mTag << " to category " << mName << endl;
}
const validate_item* validate_category::get_validator_for_item(string tag) const
const ValidateItem* ValidateCategory::getValidatorForItem(string tag) const
{
const validate_item* result = nullptr;
auto i = m_item_validators.find(validate_item{tag});
if (i != m_item_validators.end())
const ValidateItem* result = nullptr;
auto i = mItemValidators.find(ValidateItem{tag});
if (i != mItemValidators.end())
result = &*i;
else if (VERBOSE > 4)
cout << "No validator for tag " << tag << endl;
......@@ -180,61 +180,61 @@ const validate_item* validate_category::get_validator_for_item(string tag) const
// --------------------------------------------------------------------
validator::validator()
Validator::Validator()
{
}
validator::~validator()
Validator::~Validator()
{
}
void validator::add_type_validator(validate_type&& v)
void Validator::addTypeValidator(ValidateType&& v)
{
auto r = m_type_validators.insert(move(v));
auto r = mTypeValidators.insert(move(v));
if (not r.second and VERBOSE > 4)
cout << "Could not add validator for type " << v.m_name << endl;
cout << "Could not add validator for type " << v.mName << endl;
}
const validate_type* validator::get_validator_for_type(string type_code) const
const ValidateType* Validator::getValidatorForType(string typeCode) const
{
const validate_type* result = nullptr;
const ValidateType* result = nullptr;
auto i = m_type_validators.find(validate_type{ type_code, ptChar, boost::regex() });
if (i != m_type_validators.end())
auto i = mTypeValidators.find(ValidateType{ typeCode, ptChar, boost::regex() });
if (i != mTypeValidators.end())
result = &*i;
else if (VERBOSE > 4)
cout << "No validator for type " << type_code << endl;
cout << "No validator for type " << typeCode << endl;
return result;
}
void validator::add_category_validator(validate_category&& v)
void Validator::addCategoryValidator(ValidateCategory&& v)
{
auto r = m_category_validators.insert(move(v));
auto r = mCategoryValidators.insert(move(v));
if (not r.second and VERBOSE > 4)
cout << "Could not add validator for category " << v.m_name << endl;
cout << "Could not add validator for category " << v.mName << endl;
}
const validate_category* validator::get_validator_for_category(string category) const
const ValidateCategory* Validator::getValidatorForCategory(string category) const
{
const validate_category* result = nullptr;
auto i = m_category_validators.find(validate_category{category});
if (i != m_category_validators.end())
const ValidateCategory* result = nullptr;
auto i = mCategoryValidators.find(ValidateCategory{category});
if (i != mCategoryValidators.end())
result = &*i;
else if (VERBOSE > 4)
cout << "No validator for category " << category << endl;
return result;
}
validate_item* validator::get_validator_for_item(string tag) const
ValidateItem* Validator::getValidatorForItem(string tag) const
{
validate_item* result = nullptr;
ValidateItem* result = nullptr;
string cat, item;
std::tie(cat, item) = split_tag_name(tag);
std::tie(cat, item) = splitTagName(tag);
auto* cv = get_validator_for_category(cat);
auto* cv = getValidatorForCategory(cat);
if (cv != nullptr)
result = const_cast<validate_item*>(cv->get_validator_for_item(item));
result = const_cast<ValidateItem*>(cv->getValidatorForItem(item));
if (result == nullptr and VERBOSE > 4)
cout << "No validator for item " << tag << endl;
......@@ -242,10 +242,10 @@ validate_item* validator::get_validator_for_item(string tag) const
return result;
}
void validator::report_error(const string& msg)
void Validator::reportError(const string& msg)
{
if (m_strict)
throw validation_error(msg);
if (mStrict)
throw ValidationError(msg);
else if (VERBOSE)
cerr << msg << endl;
}
......
#include "libpr.h"
#include <set>
#include <map>
#include <unordered_set>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/fstream.hpp>
#include <boost/algorithm/string.hpp>
#include "cif++.h"
#include "peptidedb.h"
using namespace std;
namespace fs = boost::filesystem;
namespace ba = boost::algorithm;
const map<string,char> kAAMap{
{ "ALA", 'A' },
{ "ARG", 'R' },
{ "ASN", 'N' },
{ "ASP", 'D' },
{ "CYS", 'C' },
{ "GLN", 'Q' },
{ "GLU", 'E' },
{ "GLY", 'G' },
{ "HIS", 'H' },
{ "ILE", 'I' },
{ "LEU", 'L' },
{ "LYS", 'K' },
{ "MET", 'M' },
{ "PHE", 'F' },
{ "PRO", 'P' },
{ "SER", 'S' },
{ "THR", 'T' },
{ "TRP", 'W' },
{ "TYR", 'Y' },
{ "VAL", 'V' },
{ "GLX", 'Z' },
{ "ASX", 'B' }
};
const map<string,char> kBaseMap{
{ "A", 'A' },
{ "C", 'C' },
{ "G", 'G' },
{ "T", 'T' },
{ "U", 'U' },
{ "DA", 'A' },
{ "DC", 'C' },
{ "DG", 'G' },
{ "DT", 'T' }
};
// --------------------------------------------------------------------
struct PeptideDBImpl
{
PeptideDBImpl(istream& data, PeptideDBImpl* next);
~PeptideDBImpl()
{
delete m_next;
}
/*unordered_*/set<string> m_known_peptides;
set<string> m_known_bases;
cif::file m_file;
cif::category& m_chem_comp;
PeptideDBImpl* m_next;
string name_for(const string& res_name) const
{
string result;
for (auto& chem_comp: m_chem_comp)
{
if (ba::iequals(chem_comp["three_letter_code"].as<string>(), res_name) == false)
continue;
result = chem_comp["name"].as<string>();
ba::trim(result);
break;
}
if (result.empty() and m_next)
result = m_next->name_for(res_name);
return result;
}
string formula_for(string res_name) const;
string unalias(const string& res_name) const
{
string result = res_name;
auto& e = const_cast<cif::file&>(m_file)["comp_synonym_list"];
for (auto& synonym: e["chem_comp_synonyms"])
{
if (ba::iequals(synonym["comp_alternative_id"].as<string>(), res_name) == false)
continue;
result = synonym["comp_id"].as<string>();
ba::trim(result);
break;
}
if (result.empty() and m_next)
result = m_next->unalias(res_name);
return result;
}
};
PeptideDBImpl::PeptideDBImpl(istream& data, PeptideDBImpl* next)
: m_file(data), m_chem_comp(m_file.first_datablock()["chem_comp"]), m_next(next)
{
for (auto& chem_comp: m_chem_comp)
{
string group, three_letter_code;
cif::tie(group, three_letter_code) = chem_comp.get("group", "three_letter_code");
if (group == "peptide" or group == "M-peptide" or group == "P-peptide")
m_known_peptides.insert(three_letter_code);
else if (group == "DNA" or group == "RNA")
m_known_bases.insert(three_letter_code);
}
}
string PeptideDBImpl::formula_for(string res) const
{
string result;
ba::to_upper(res);
for (auto& db: m_file)
{
if (db.name() != "comp_" + res)
continue;
auto& cat = db["chem_comp_atom"];
map<string,uint32> atoms;
for (auto r: cat)
atoms[r["type_symbol"].as<string>()] += 1;
for (auto a: atoms)
{
if (not result.empty())
result += ' ';
result += a.first;
if (a.second > 1)
result += to_string(a.second);
}
}
if (result.empty())
{
if (m_next != nullptr)
result = m_next->formula_for(res);
else
{
const char* clibd_mon = getenv("CLIBD_MON");
if (clibd_mon == nullptr)
throw runtime_error("Cannot locate peptide list, please souce the CCP4 environment");
fs::path resFile = fs::path(clibd_mon) / ba::to_lower_copy(res.substr(0, 1)) / (res + ".cif");
if (fs::exists(resFile))
{
fs::ifstream file(resFile);
if (file.is_open())
{
try
{
cif::file cf(file);
auto& cat = cf["comp_" + res]["chem_comp_atom"];
map<string,uint32> atoms;
for (auto r: cat)
atoms[r["type_symbol"].as<string>()] += 1;
for (auto a: atoms)
{
if (not result.empty())
result += ' ';
result += a.first;
if (a.second > 1)
result += to_string(a.second);
}
}
catch (exception& ex)
{
if (VERBOSE)
cerr << ex.what();
result.clear();
}
}
}
}
}
return result;
}
// --------------------------------------------------------------------
PeptideDB* PeptideDB::sInstance;
PeptideDB& PeptideDB::Instance()
{
if (sInstance == nullptr)
sInstance = new PeptideDB();
return *sInstance;
}
PeptideDB::PeptideDB()
{
const char* clibd_mon = getenv("CLIBD_MON");
if (clibd_mon == nullptr)
throw runtime_error("Cannot locate peptide list, please souce the CCP4 environment");
fs::path db = fs::path(clibd_mon) / "list" / "mon_lib_list.cif";
PushDictionary(db);
sInstance = this;
}
void PeptideDB::PushDictionary(boost::filesystem::path dict)
{
if (not fs::exists(dict))
throw runtime_error("file not found: " + dict.string());
fs::ifstream file(dict);
if (not file.is_open())
throw runtime_error("Could not open peptide list " + dict.string());
mImpl = new PeptideDBImpl(file, mImpl);
}
void PeptideDB::PopDictionary()
{
if (mImpl != nullptr)
{
auto i = mImpl;
mImpl = i->m_next;
i->m_next = nullptr;
delete i;
}
}
PeptideDB::~PeptideDB()
{
delete mImpl;
}
bool PeptideDB::IsKnownPeptide(const string& res_name) const
{
return mImpl->m_known_peptides.count(res_name) > 0;
}
bool PeptideDB::IsKnownBase(const string& res_name) const
{
return mImpl->m_known_bases.count(res_name) > 0;
}
string PeptideDB::GetNameForResidue(const string& res_name) const
{
return mImpl->name_for(res_name);
}
string PeptideDB::GetFormulaForResidue(const string& res_name) const
{
return mImpl->formula_for(res_name);
}
string PeptideDB::Unalias(const string& res_name) const
{
return mImpl->unalias(res_name);
}
// Lib for working with structures as contained in mmCIF and PDB files
#include "libcif/point.h"
#include "cif++/Point.h"
using namespace std;
......@@ -32,16 +32,16 @@ quaternion Normalize(quaternion q)
// --------------------------------------------------------------------
float DihedralAngle(const point& p1, const point& p2, const point& p3, const point& p4)
float DihedralAngle(const Point& p1, const Point& p2, const Point& p3, const Point& p4)
{
point v12 = p1 - p2; // vector from p2 to p1
point v43 = p4 - p3; // vector from p3 to p4
Point v12 = p1 - p2; // vector from p2 to p1
Point v43 = p4 - p3; // vector from p3 to p4
point z = p2 - p3; // vector from p3 to p2
Point z = p2 - p3; // vector from p3 to p2
point p = CrossProduct(z, v12);
point x = CrossProduct(z, v43);
point y = CrossProduct(z, x);
Point p = CrossProduct(z, v12);
Point x = CrossProduct(z, v43);
Point y = CrossProduct(z, x);
double u = DotProduct(x, x);
double v = DotProduct(y, y);
......@@ -58,10 +58,10 @@ float DihedralAngle(const point& p1, const point& p2, const point& p3, const poi
return result;
}
float CosinusAngle(const point& p1, const point& p2, const point& p3, const point& p4)
float CosinusAngle(const Point& p1, const Point& p2, const Point& p3, const Point& p4)
{
point v12 = p1 - p2;
point v34 = p3 - p4;
Point v12 = p1 - p2;
Point v34 = p3 - p4;
double result = 0;
......@@ -74,7 +74,7 @@ float CosinusAngle(const point& p1, const point& p2, const point& p3, const poin
// --------------------------------------------------------------------
tuple<double,point> QuaternionToAngleAxis(quaternion q)
tuple<double,Point> QuaternionToAngleAxis(quaternion q)
{
if (q.R_component_1() > 1)
q = Normalize(q);
......@@ -88,58 +88,58 @@ tuple<double,point> QuaternionToAngleAxis(quaternion q)
if (s < 0.001)
s = 1;
point axis(q.R_component_2() / s, q.R_component_3() / s, q.R_component_4() / s);
Point axis(q.R_component_2() / s, q.R_component_3() / s, q.R_component_4() / s);
return make_tuple(angle, axis);
}
point CenterPoints(vector<point>& points)
Point CenterPoints(vector<Point>& Points)
{
point t;
Point t;
for (point& pt : points)
for (Point& pt : Points)
{
t.x() += pt.x();
t.y() += pt.y();
t.z() += pt.z();
t.getX() += pt.getX();
t.getY() += pt.getY();
t.getZ() += pt.getZ();
}
t.x() /= points.size();
t.y() /= points.size();
t.z() /= points.size();
t.getX() /= Points.size();
t.getY() /= Points.size();
t.getZ() /= Points.size();
for (point& pt : points)
for (Point& pt : Points)
{
pt.x() -= t.x();
pt.y() -= t.y();
pt.z() -= t.z();
pt.getX() -= t.getX();
pt.getY() -= t.getY();
pt.getZ() -= t.getZ();
}
return t;
}
point Centroid(vector<point>& points)
Point Centroid(vector<Point>& Points)
{
point result;
Point result;
for (point& pt : points)
for (Point& pt : Points)
result += pt;
result /= points.size();
result /= Points.size();
return result;
}
double RMSd(const vector<point>& a, const vector<point>& b)
double RMSd(const vector<Point>& a, const vector<Point>& b)
{
double sum = 0;
for (uint32 i = 0; i < a.size(); ++i)
{
valarray<double> d(3);
d[0] = b[i].x() - a[i].x();
d[1] = b[i].y() - a[i].y();
d[2] = b[i].z() - a[i].z();
d[0] = b[i].getX() - a[i].getX();
d[1] = b[i].getY() - a[i].getY();
d[2] = b[i].getZ() - a[i].getZ();
d *= d;
......@@ -188,19 +188,19 @@ double LargestDepressedQuarticSolution(double a, double b, double c)
return t.max();
}
//quaternion AlignPoints(const vector<point>& pa, const vector<point>& pb)
//quaternion AlignPoints(const vector<Point>& pa, const vector<Point>& pb)
//{
// // First calculate M, a 3x3 matrix containing the sums of products of the coordinates of A and B
// matrix<double> M(3, 3, 0);
//
// for (uint32 i = 0; i < pa.size(); ++i)
// {
// const point& a = pa[i];
// const point& b = pb[i];
// const Point& a = pa[i];
// const Point& b = pb[i];
//
// M(0, 0) += a.x() * b.x(); M(0, 1) += a.x() * b.y(); M(0, 2) += a.x() * b.z();
// M(1, 0) += a.y() * b.x(); M(1, 1) += a.y() * b.y(); M(1, 2) += a.y() * b.z();
// M(2, 0) += a.z() * b.x(); M(2, 1) += a.z() * b.y(); M(2, 2) += a.z() * b.z();
// M(0, 0) += a.getX() * b.getX(); M(0, 1) += a.getX() * b.getY(); M(0, 2) += a.getX() * b.getZ();
// M(1, 0) += a.getY() * b.getX(); M(1, 1) += a.getY() * b.getY(); M(1, 2) += a.getY() * b.getZ();
// M(2, 0) += a.getZ() * b.getX(); M(2, 1) += a.getZ() * b.getY(); M(2, 2) += a.getZ() * b.getZ();
// }
//
// // Now calculate N, a symmetric 4x4 matrix
......
// Lib for working with structures as contained in file and PDB files
#include "libcif/structure.h"
#include "cif++/Structure.h"
#include <boost/algorithm/string.hpp>
#include <boost/filesystem/fstream.hpp>
......@@ -8,9 +8,9 @@
#include <boost/iostreams/filter/bzip2.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include "pdb2cif.h"
#include "libcif/cif-parser.h"
#include "cif2pdb.h"
#include "cif++/PDB2Cif.h"
#include "cif++/CifParser.h"
#include "cif++/Cif2PDB.h"
using namespace std;
......@@ -24,21 +24,21 @@ namespace libcif
{
// --------------------------------------------------------------------
// file_impl
// FileImpl
struct file_impl
struct FileImpl
{
cif::file m_data;
cif::datablock* m_db = nullptr;
cif::File mData;
cif::datablock* mDb = nullptr;
void load(fs::path p);
void save(fs::path p);
};
void file_impl::load(fs::path p)
void FileImpl::load(fs::path p)
{
fs::ifstream infile(p, ios_base::in | ios_base::binary);
if (not infile.is_open())
fs::ifstream inFile(p, ios_base::in | ios_base::binary);
if (not inFile.is_open())
throw runtime_error("No such file: " + p.string());
io::filtering_stream<io::input> in;
......@@ -55,13 +55,13 @@ void file_impl::load(fs::path p)
ext = p.stem().extension().string();
}
in.push(infile);
in.push(inFile);
// OK, we've got the file, now create a protein
if (ext == ".cif")
m_data.load(in);
mData.load(in);
else if (ext == ".pdb" or ext == ".ent")
ReadPDBFile(in, m_data);
ReadPDBFile(in, mData);
else
{
try
......@@ -69,9 +69,9 @@ void file_impl::load(fs::path p)
if (VERBOSE)
cerr << "unrecognized file extension, trying cif" << endl;
m_data.load(in);
mData.load(in);
}
catch (const cif::cif_parser_error& e)
catch (const cif::cifParserError& e)
{
if (VERBOSE)
cerr << "Not cif, trying plain old PDB" << endl;
......@@ -79,34 +79,34 @@ void file_impl::load(fs::path p)
// pffft...
in.reset();
if (infile.is_open())
infile.seekg(0);
if (inFile.is_open())
inFile.seekg(0);
else
infile.open(p, ios_base::in | ios::binary);
inFile.open(p, ios_base::in | ios::binary);
if (p.extension() == ".bz2")
in.push(io::bzip2_decompressor());
else if (p.extension() == ".gz")
in.push(io::gzip_decompressor());
in.push(infile);
in.push(inFile);
ReadPDBFile(in, m_data);
ReadPDBFile(in, mData);
}
}
// Yes, we've parsed the data. Now locate the datablock.
m_db = &m_data.first_datablock();
mDb = &mData.firstDatablock();
// And validate, otherwise lots of functionality won't work
// if (m_data.get_validator() == nullptr)
m_data.load_dictionary("mmcif_pdbx");
m_data.validate();
// if (mData.getValidator() == nullptr)
mData.loadDictionary("mmcif_pdbx");
mData.validate();
}
void file_impl::save(fs::path p)
void FileImpl::save(fs::path p)
{
fs::ofstream outfile(p, ios_base::out | ios_base::binary);
fs::ofstream outFile(p, ios_base::out | ios_base::binary);
io::filtering_stream<io::output> out;
if (p.extension() == ".gz")
......@@ -120,33 +120,33 @@ void file_impl::save(fs::path p)
p = p.stem();
}
out.push(outfile);
out.push(outFile);
if (p.extension() == ".pdb")
WritePDBFile(out, m_data);
WritePDBFile(out, mData);
else
m_data.save(out);
mData.save(out);
}
// --------------------------------------------------------------------
// atom
struct atom_impl
struct atomImpl
{
atom_impl(const file& f, const string& id)
: m_file(f), m_id(id), m_refcount(1), m_compound(nullptr)
atomImpl(const File& f, const string& id)
: mFile(f), mId(id), mRefcount(1), mCompound(nullptr)
{
auto& db = *m_file.impl().m_db;
auto& db = *mFile.impl().mDb;
auto& cat = db["atom_site"];
m_row = cat[cif::key("id") == m_id];
mRow = cat[cif::key("id") == mId];
prefetch();
}
atom_impl(const file& f, const string& id, cif::row row)
: m_file(f), m_id(id), m_refcount(1), m_row(row), m_compound(nullptr)
atomImpl(const file& f, const string& id, cif::row row)
: mFile(f), mId(id), mRefcount(1), mRow(row), mCompound(nullptr)
{
prefetch();
}
......@@ -155,14 +155,14 @@ struct atom_impl
{
// Prefetch some data
string symbol;
cif::tie(symbol) = m_row.get("type_symbol");
cif::tie(symbol) = mRow.get("type_symbol");
m_type = atom_type_traits(symbol).type();
mType = atomTypeTraits(symbol).type();
float x, y, z;
cif::tie(x, y, z) = m_row.get("Cartn_x", "Cartn_y", "Cartn_z");
cif::tie(x, y, z) = mRow.get("Cartn_x", "Cartn_y", "Cartn_z");
m_location = point(x, y, z);
mLocation = point(x, y, z);
try
{
......@@ -173,80 +173,80 @@ struct atom_impl
void reference()
{
++m_refcount;
++mRefcount;
}
void release()
{
if (--m_refcount < 0)
if (--mRefcount < 0)
delete this;
}
const compound& comp()
{
if (m_compound == nullptr)
if (mCompound == nullptr)
{
string comp_id;
cif::tie(comp_id) = m_row.get("label_comp_id");
string compId;
cif::tie(compId) = mRow.get("label_comp_id");
m_compound = compound::create(comp_id);
mCompound = compound::create(compId);
}
if (m_compound == nullptr)
if (mCompound == nullptr)
throw runtime_error("no compound");
return *m_compound;
return *mCompound;
}
bool is_water() const
bool isWater() const
{
return m_compound != nullptr and m_compound->is_water();
return mCompound != nullptr and mCompound->isWater();
}
const file& m_file;
string m_id;
int m_refcount;
cif::row m_row;
const compound* m_compound;
point m_location;
atom_type m_type;
// const entity& m_entity;
// std::string m_asym_id;
// std::string m_atom_id;
// point m_loc;
// property_list m_properties;
const file& mFile;
string mId;
int mRefcount;
cif::row mRow;
const compound* mCompound;
point mLocation;
atomType mType;
// const entity& mEntity;
// std::string mAsymId;
// std::string mAtomId;
// point mLoc;
// propertyList mProperties;
};
atom::atom(const file& f, const string& id)
: m_impl(new atom_impl(f, id))
: mImpl(new atomImpl(f, id))
{
}
atom::atom(atom_impl* impl)
: m_impl(impl)
atom::atom(atomImpl* impl)
: mImpl(impl)
{
}
atom::atom(const atom& rhs)
: m_impl(rhs.m_impl)
: mImpl(rhs.mImpl)
{
m_impl->reference();
mImpl->reference();
}
atom::~atom()
{
if (m_impl)
m_impl->release();
if (mImpl)
mImpl->release();
}
atom& atom::operator=(const atom& rhs)
{
if (this != &rhs)
{
m_impl->release();
m_impl = rhs.m_impl;
m_impl->reference();
mImpl->release();
mImpl = rhs.mImpl;
mImpl->reference();
}
return *this;
......@@ -254,108 +254,108 @@ atom& atom::operator=(const atom& rhs)
string atom::id() const
{
return m_impl->m_id;
return mImpl->mId;
}
atom_type atom::type() const
atomType atom::type() const
{
return m_impl->m_type;
return mImpl->mType;
}
int atom::charge() const
{
int charge;
cif::tie(charge) = m_impl->m_row.get("pdbx_formal_charge");
cif::tie(charge) = mImpl->mRow.get("pdbx_formal_charge");
return charge;
}
string atom::label_atom_id() const
string atom::labelAtomId() const
{
string atom_id;
cif::tie(atom_id) = m_impl->m_row.get("label_atom_id");
string atomId;
cif::tie(atomId) = mImpl->mRow.get("label_atom_id");
return atom_id;
return atomId;
}
string atom::label_comp_id() const
string atom::labelCompId() const
{
string comp_id;
cif::tie(comp_id) = m_impl->m_row.get("label_comp_id");
string compId;
cif::tie(compId) = mImpl->mRow.get("label_comp_id");
return comp_id;
return compId;
}
string atom::label_asym_id() const
string atom::labelAsymId() const
{
string asym_id;
cif::tie(asym_id) = m_impl->m_row.get("label_asym_id");
string asymId;
cif::tie(asymId) = mImpl->mRow.get("label_asym_id");
return asym_id;
return asymId;
}
int atom::label_seq_id() const
int atom::labelSeqId() const
{
int seq_id;
cif::tie(seq_id) = m_impl->m_row.get("label_seq_id");
int seqId;
cif::tie(seqId) = mImpl->mRow.get("label_seq_id");
return seq_id;
return seqId;
}
string atom::auth_asym_id() const
string atom::authAsymId() const
{
string asym_id;
cif::tie(asym_id) = m_impl->m_row.get("auth_asym_id");
string asymId;
cif::tie(asymId) = mImpl->mRow.get("auth_asym_id");
return asym_id;
return asymId;
}
int atom::auth_seq_id() const
int atom::authSeqId() const
{
int seq_id;
cif::tie(seq_id) = m_impl->m_row.get("auth_seq_id");
int seqId;
cif::tie(seqId) = mImpl->mRow.get("auth_seq_id");
return seq_id;
return seqId;
}
point atom::location() const
{
return m_impl->m_location;
return mImpl->mLocation;
}
const compound& atom::comp() const
{
return m_impl->comp();
return mImpl->comp();
}
bool atom::is_water() const
bool atom::isWater() const
{
return m_impl->is_water();
return mImpl->isWater();
}
boost::any atom::property(const std::string& name) const
{
string s = m_impl->m_row[name].as<string>();
string s = mImpl->mRow[name].as<string>();
return boost::any(s);
}
bool atom::operator==(const atom& rhs) const
{
return m_impl == rhs.m_impl or
(&m_impl->m_file == &rhs.m_impl->m_file and m_impl->m_id == rhs.m_impl->m_id);
return mImpl == rhs.mImpl or
(&mImpl->mFile == &rhs.mImpl->mFile and mImpl->mId == rhs.mImpl->mId);
}
const file& atom::get_file() const
const file& atom::getFile() const
{
assert(m_impl);
return m_impl->m_file;
assert(mImpl);
return mImpl->mFile;
}
// --------------------------------------------------------------------
// residue
//atom_view residue::atoms()
//atomView residue::atoms()
//{
// assert(false);
//}
......@@ -370,28 +370,28 @@ const file& atom::get_file() const
// file
file::file()
: m_impl(new file_impl)
: mImpl(new FileImpl)
{
}
file::file(fs::path file)
: m_impl(new file_impl)
File::file(fs::path file)
: mImpl(new FileImpl)
{
load(file);
}
file::~file()
{
delete m_impl;
delete mImpl;
}
void file::load(fs::path p)
{
m_impl->load(p);
mImpl->load(p);
// // all data is now in m_file, construct atoms and others
// // all data is now in mFile, construct atoms and others
//
// auto& db = m_file.first_datablock();
// auto& db = mFile.firstDatablock();
//
// // the entities
//
......@@ -409,10 +409,10 @@ void file::load(fs::path p)
// entities.push_back({ _e["id"], type });
// }
//
// auto& atom_sites = db["atom_site"];
// for (auto& atom_site: atom_sites)
// auto& atomSites = db["atom_site"];
// for (auto& atomSite: atomSites)
// {
// atom_ptr ap(new atom(this, atom_site));
// atomPtr ap(new atom(this, atom_site));
//
// string entity_id = atom_site["entity_id"];
//
......@@ -443,115 +443,115 @@ void file::load(fs::path p)
void file::save(boost::filesystem::path file)
{
m_impl->save(file);
mImpl->save(file);
}
cif::datablock& file::data()
{
assert(m_impl);
assert(m_impl->m_db);
assert(mImpl);
assert(mImpl->mDb);
if (m_impl == nullptr or m_impl->m_db == nullptr)
if (mImpl == nullptr or mImpl->mDb == nullptr)
throw runtime_error("No data loaded");
return *m_impl->m_db;
return *mImpl->mDb;
}
// --------------------------------------------------------------------
// structure
struct structure_impl
struct structureImpl
{
structure_impl(structure& s, file& f, uint32 model_nr)
: m_file(&f), m_model_nr(model_nr)
structureImpl(structure& s, file& f, uint32 modelNr)
: mFile(&f), mModelNr(modelNr)
{
auto& db = *m_file->impl().m_db;
auto& atom_cat = db["atom_site"];
auto& db = *mFile->impl().mDb;
auto& atomCat = db["atom_site"];
for (auto& a: atom_cat)
for (auto& a: atomCat)
{
auto model_nr = a["pdbx_PDB_model_num"];
auto modelNr = a["pdbx_PDB_model_num"];
if (model_nr.empty() or model_nr.as<uint32>() == m_model_nr)
m_atoms.emplace_back(new atom_impl(f, a["id"].as<string>(), a));
if (modelNr.empty() or modelNr.as<uint32>() == mModelNr)
mAtoms.emplace_back(new atomImpl(f, a["id"].as<string>(), a));
}
}
void remove_atom(atom& a);
void removeAtom(atom& a);
file* m_file;
uint32 m_model_nr;
atom_view m_atoms;
file* mFile;
uint32 mModelNr;
atomView mAtoms;
};
void structure_impl::remove_atom(atom& a)
void structureImpl::removeAtom(atom& a)
{
cif::datablock& db = *m_file->impl().m_db;
cif::datablock& db = *mFile->impl().mDb;
auto& atom_sites = db["atom_site"];
auto& atomSites = db["atom_site"];
for (auto i = atom_sites.begin(); i != atom_sites.end(); ++i)
for (auto i = atomSites.begin(); i != atomSites.end(); ++i)
{
string id;
cif::tie(id) = i->get("id");
if (id == a.id())
{
atom_sites.erase(i);
atomSites.erase(i);
break;
}
}
m_atoms.erase(remove(m_atoms.begin(), m_atoms.end(), a), m_atoms.end());
mAtoms.erase(remove(mAtoms.begin(), mAtoms.end(), a), mAtoms.end());
}
structure::structure(file& f, uint32 model_nr)
: m_impl(new structure_impl(*this, f, model_nr))
structure::structure(file& f, uint32 modelNr)
: mImpl(new structureImpl(*this, f, modelNr))
{
}
structure::~structure()
{
delete m_impl;
delete mImpl;
}
atom_view structure::atoms() const
atomView structure::atoms() const
{
return m_impl->m_atoms;
return mImpl->mAtoms;
}
atom_view structure::waters() const
atomView structure::waters() const
{
atom_view result;
atomView result;
auto& db = *get_file().impl().m_db;
auto& db = *getFile().impl().mDb;
// Get the entity id for water
auto& entity_cat = db["entity"];
string water_entity_id;
for (auto& e: entity_cat)
auto& entityCat = db["entity"];
string waterEntityId;
for (auto& e: entityCat)
{
string id, type;
cif::tie(id, type) = e.get("id", "type");
if (ba::iequals(type, "water"))
{
water_entity_id = id;
waterEntityId = id;
break;
}
}
for (auto& a: m_impl->m_atoms)
for (auto& a: mImpl->mAtoms)
{
if (boost::any_cast<string>(a.property("label_entity_id")) == water_entity_id)
if (boost::any_cast<string>(a.property("label_entity_id")) == waterEntityId)
result.push_back(a);
}
return result;
}
atom structure::get_atom_by_id(string id) const
atom structure::getAtomById(string id) const
{
for (auto& a: m_impl->m_atoms)
for (auto& a: mImpl->mAtoms)
{
if (a.id() == id)
return a;
......@@ -560,15 +560,15 @@ atom structure::get_atom_by_id(string id) const
throw out_of_range("Could not find atom with id " + id);
}
file& structure::get_file() const
file& structure::getFile() const
{
return *m_impl->m_file;
return *mImpl->mFile;
}
//tuple<string,string> structure::MapLabelToAuth(
// const string& asym_id, int seq_id)
// const string& asymId, int seqId)
//{
// auto& db = *get_file().impl().m_db;
// auto& db = *getFile().impl().mDb;
//
// tuple<string,int,string,string> result;
// bool found = false;
......@@ -610,40 +610,40 @@ file& structure::get_file() const
//}
tuple<string,int,string,string> structure::MapLabelToPDB(
const string& asym_id, int seq_id, const string& mon_id)
const string& asymId, int seqId, const string& monId)
{
auto& db = *get_file().impl().m_db;
auto& db = *getFile().impl().mDb;
tuple<string,int,string,string> result;
for (auto r: db["pdbx_poly_seq_scheme"].find(
cif::key("asym_id") == asym_id and
cif::key("seq_id") == seq_id and
cif::key("mon_id") == mon_id))
cif::key("asym_id") == asymId and
cif::key("seq_id") == seqId and
cif::key("mon_id") == monId))
{
string pdb_strand_id, pdb_mon_id, pdb_ins_code;
int pdb_seq_num;
string pdbStrandId, pdbMonId, pdbInsCode;
int pdbSeqNum;
cif::tie(pdb_strand_id, pdb_seq_num, pdb_mon_id, pdb_ins_code) =
cif::tie(pdbStrandId, pdbSeqNum, pdbMonId, pdbInsCode) =
r.get("pdb_strand_id", "pdb_seq_num", "pdb_mon_id", "pdb_ins_code");
result = make_tuple(pdb_strand_id, pdb_seq_num, pdb_mon_id, pdb_ins_code);
result = make_tuple(pdbStrandId, pdbSeqNum, pdbMonId, pdbInsCode);
break;
}
for (auto r: db["pdbx_nonpoly_scheme"].find(
cif::key("asym_id") == asym_id and
cif::key("seq_id") == seq_id and
cif::key("mon_id") == mon_id))
cif::key("asym_id") == asymId and
cif::key("seq_id") == seqId and
cif::key("mon_id") == monId))
{
string pdb_strand_id, pdb_mon_id, pdb_ins_code;
int pdb_seq_num;
string pdbStrandId, pdbMonId, pdbInsCode;
int pdbSeqNum;
cif::tie(pdb_strand_id, pdb_seq_num, pdb_mon_id, pdb_ins_code) =
cif::tie(pdbStrandId, pdbSeqNum, pdbMonId, pdbInsCode) =
r.get("pdb_strand_id", "pdb_seq_num", "pdb_mon_id", "pdb_ins_code");
result = make_tuple(pdb_strand_id, pdb_seq_num, pdb_mon_id, pdb_ins_code);
result = make_tuple(pdbStrandId, pdbSeqNum, pdbMonId, pdbInsCode);
break;
}
......@@ -654,9 +654,9 @@ tuple<string,int,string,string> structure::MapLabelToPDB(
// --------------------------------------------------------------------
// actions
void structure::remove_atom(atom& a)
void structure::removeAtom(atom& a)
{
m_impl->remove_atom(a);
mImpl->removeAtom(a);
}
}
// cif parsing library
#include <cassert>
#include <stack>
#include <tuple>
#include <regex>
#include <set>
#include <unordered_map>
#include <boost/algorithm/string.hpp>
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/fstream.hpp>
#if defined(USE_RSRC)
#include "mrsrc.h"
#endif
#include "cif++.h"
#include "cif-parser.h"
#include "cif-validator.h"
#include "cif-utils.h"
using namespace std;
namespace ba = boost::algorithm;
namespace fs = boost::filesystem;
extern int VERBOSE;
namespace cif
{
static const char* kEmptyResult = "";
// --------------------------------------------------------------------
// most internal data structures are stored as linked lists
// item values are stored in a simple struct. They should be const anyway
struct item_value
{
item_value* m_next;
uint32 m_column_index;
char m_text[0];
item_value(const char* v, uint32 column_index);
~item_value();
void* operator new(size_t size, size_t data_size);
void operator delete(void* p);
};
// --------------------------------------------------------------------
item_value::item_value(const char* value, uint32 column_index)
: m_next(nullptr), m_column_index(column_index)
{
strcpy(m_text, value);
}
item_value::~item_value()
{
// remove recursion (and be paranoid)
while (m_next != nullptr and m_next != this)
{
auto n = m_next;
m_next = n->m_next;
n->m_next = nullptr;
delete n;
}
}
void* item_value::operator new(size_t size, size_t data_size)
{
return malloc(size + data_size + 1);
}
void item_value::operator delete(void* p)
{
free(p);
}
// --------------------------------------------------------------------
// item_column contains info about a column or field in a category
struct item_column
{
string m_name; // store lower-case, for optimization
const validate_item* m_validator;
};
// item_row contains the actual values for a row in a category
struct item_row
{
~item_row();
void drop(uint32 column_ix);
const char* c_str(uint32 column_ix) const;
string str() const
{
stringstream s;
s << '{';
for (auto v = m_values; v != nullptr; v = v->m_next)
{
s << m_category->get_column_name(v->m_column_index)
<< ':'
<< v->m_text;
if (v->m_next != nullptr)
s << ", ";
}
s << '}';
return s.str();
}
item_row* m_next;
category* m_category;
item_value* m_values;
};
ostream& operator<<(ostream& os, const item_row& r)
{
os << r.m_category->name() << '[';
for (auto iv = r.m_values; iv != nullptr; iv = iv->m_next)
{
os << iv->m_text;
if (iv->m_next)
os << ',';
}
os << ']';
return os;
}
// --------------------------------------------------------------------
item_row::~item_row()
{
// remove recursive
while (m_next != nullptr and m_next != this)
{
auto n = m_next;
m_next = n->m_next;
n->m_next = nullptr;
delete n;
}
delete m_values;
}
void item_row::drop(uint32 column_ix)
{
if (m_values != nullptr and m_values->m_column_index == column_ix)
{
auto v = m_values;
m_values = m_values->m_next;
v->m_next = nullptr;
delete v;
}
else
{
for (auto v = m_values; v->m_next != nullptr; v = v->m_next)
{
if (v->m_next->m_column_index == column_ix)
{
auto vn = v->m_next;
v->m_next = vn->m_next;
vn->m_next = nullptr;
delete vn;
break;
}
}
}
#if DEBUG
for (auto iv = m_values; iv != nullptr; iv = iv->m_next)
assert(iv != iv->m_next and (iv->m_next == nullptr or iv != iv->m_next->m_next));
#endif
}
const char* item_row::c_str(uint32 column_ix) const
{
const char* result = kEmptyResult;
for (auto v = m_values; v != nullptr; v = v->m_next)
{
if (v->m_column_index == column_ix)
{
result = v->m_text;
break;
}
}
return result;
}
// --------------------------------------------------------------------
namespace detail
{
template<>
item_reference& item_reference::operator=(const string& value)
{
row(m_row).assign(m_name, value, false);
return *this;
}
const char*
item_reference::c_str() const
{
const char* result = kEmptyResult;
if (m_row != nullptr /* and m_row->m_category != nullptr*/)
{
// assert(m_row->m_category);
auto cix = m_row->m_category->get_column_index(m_name);
for (auto iv = m_row->m_values; iv != nullptr; iv = iv->m_next)
{
if (iv->m_column_index == cix)
{
if (iv->m_text[0] != '.' or iv->m_text[1] != 0)
result = iv->m_text;
break;
}
}
}
return result;
}
bool item_reference::empty() const
{
return c_str() == kEmptyResult;
}
}
// --------------------------------------------------------------------
// datablock implementation
datablock::datablock(const string& name)
: m_name(name), m_validator(nullptr), m_next(nullptr)
{
}
datablock::~datablock()
{
delete m_next;
}
string datablock::first_item(const string& tag) const
{
string result;
string cat_name, item_name;
std::tie(cat_name, item_name) = split_tag_name(tag);
for (auto& cat: m_categories)
{
if (iequals(cat.name(), cat_name))
{
result = cat.get_first_item(item_name.c_str()).as<string>();
break;
}
}
return result;
}
auto datablock::emplace(const string& name) -> tuple<iterator,bool>
{
bool isNew = false;
iterator i = find_if(begin(), end(), [name](const category& cat) -> bool
{ return iequals(cat.name(), name); });
if (i == end())
{
isNew = true;
i = m_categories.emplace(end(), *this, name, m_validator);
}
return make_tuple(i, isNew);
}
category& datablock::operator[](const string& name)
{
iterator i;
std::tie(i, ignore) = emplace(name);
return *i;
}
category* datablock::get(const string& name)
{
auto i = find_if(begin(), end(), [name](const category& cat) -> bool
{ return iequals(cat.name(), name); });
return i == end() ? nullptr : &*i;
}
void datablock::validate()
{
if (m_validator == nullptr)
throw runtime_error("validator not specified");
for (auto& cat: *this)
cat.validate();
}
void datablock::set_validator(validator* v)
{
m_validator = v;
for (auto& cat: *this)
cat.set_validator(v);
}
void datablock::get_tag_order(vector<string>& tags) const
{
for (auto& cat: *this)
cat.get_tag_order(tags);
}
void datablock::write(ostream& os)
{
os << "data_" << m_name << endl
<< "# " << endl;
// mmcif support, sort of. First write the 'entry' category
// and if it exists, _AND_ we have a validator, write out the
// audit_conform record.
for (auto& cat: m_categories)
{
if (cat.name() == "entry")
{
cat.write(os);
if (m_validator != nullptr)
{
category audit_conform(*this, "audit_conform", nullptr);
audit_conform.emplace({
{ "dict_name", m_validator->dict_name() },
{ "dict_version", m_validator->dict_version() }
});
audit_conform.write(os);
}
break;
}
}
for (auto& cat: m_categories)
{
if (cat.name() != "entry" and cat.name() != "audit_conform")
cat.write(os);
}
}
void datablock::write(ostream& os, const vector<string>& order)
{
os << "data_" << m_name << endl
<< "# " << endl;
vector<string> catOrder;
for (auto& o: order)
{
string cat, item;
std::tie(cat, item) = split_tag_name(o);
if (find_if(catOrder.rbegin(), catOrder.rend(), [cat](const string& s) -> bool { return iequals(cat, s); }) == catOrder.rend())
catOrder.push_back(cat);
}
for (auto& c: catOrder)
{
auto cat = get(c);
if (cat == nullptr)
continue;
vector<string> items;
for (auto& o: order)
{
string cat_name, item;
std::tie(cat_name, item) = split_tag_name(o);
if (cat_name == c)
items.push_back(item);
}
cat->write(os, items);
}
// for any category we missed in the catOrder
for (auto& cat: m_categories)
{
if (find_if(catOrder.begin(), catOrder.end(), [&](const string& s) -> bool { return iequals(cat.name(), s); }) != catOrder.end())
continue;
cat.write(os);
}
// // mmcif support, sort of. First write the 'entry' category
// // and if it exists, _AND_ we have a validator, write out the
// // audit_conform record.
//
// for (auto& cat: m_categories)
// {
// if (cat.name() == "entry")
// {
// cat.write(os);
//
// if (m_validator != nullptr)
// {
// category audit_conform(*this, "audit_conform", nullptr);
// audit_conform.emplace({
// { "dict_name", m_validator->dict_name() },
// { "dict_version", m_validator->dict_version() }
// });
// audit_conform.write(os);
// }
//
// break;
// }
// }
//
// for (auto& cat: m_categories)
// {
// if (cat.name() != "entry" and cat.name() != "audit_conform")
// cat.write(os);
// }
}
// --------------------------------------------------------------------
//
// class to compare two rows based on their keys.
class row_comparator
{
public:
row_comparator(category* cat)
: row_comparator(cat, cat->get_cat_validator()->m_keys.begin(), cat->get_cat_validator()->m_keys.end())
{
}
template<typename KeyIter>
row_comparator(category* cat, KeyIter b, KeyIter e);
int operator()(const item_row* a, const item_row* b) const;
int operator()(const row& a, const row& b) const
{
return operator()(a.m_data, b.m_data);
}
private:
typedef function<int(const char*,const char*)> compare_func;
typedef tuple<size_t,compare_func> key_comp;
vector<key_comp> m_comp;
};
template<typename KeyIter>
row_comparator::row_comparator(category* cat, KeyIter b, KeyIter e)
{
auto cv = cat->get_cat_validator();
for (auto ki = b; ki != e; ++ki)
{
string k = *ki;
size_t ix = cat->get_column_index(k);
auto iv = cv->get_validator_for_item(k);
if (iv == nullptr)
throw runtime_error("Incomplete dictionary, no item validator for key " + k);
auto tv = iv->m_type;
if (tv == nullptr)
throw runtime_error("Incomplete dictionary, no type validator for item " + k);
using namespace placeholders;
m_comp.emplace_back(ix, bind(&validate_type::compare, tv, _1, _2));
}
}
int row_comparator::operator()(const item_row* a, const item_row* b) const
{
assert(a);
assert(b);
int d = 0;
for (auto& c: m_comp)
{
size_t k;
compare_func f;
std::tie(k, f) = c;
const char* ka = a->c_str(k);
const char* kb = b->c_str(k);
d = f(ka, kb);
if (d != 0)
break;
}
return d;
}
// --------------------------------------------------------------------
//
// class to keep an index on the keys of a category. This is a red/black
// tree implementation.
class cat_index
{
public:
cat_index(category* cat);
~cat_index();
item_row* find(item_row* k) const;
void insert(item_row* r);
void erase(item_row* r);
// batch create
void reconstruct();
// reorder the item_row's and returns new head and tail
tuple<item_row*,item_row*> reorder()
{
tuple<item_row*,item_row*> result = make_tuple(nullptr, nullptr);
if (m_root != nullptr)
{
entry* head = findMin(m_root);
entry* tail = reorder(m_root);
tail->m_row->m_next = nullptr;
result = make_tuple(head->m_row, tail->m_row);
}
return result;
}
size_t size() const;
void validate() const;
private:
struct entry
{
entry(item_row* r)
: m_row(r), m_left(nullptr), m_right(nullptr), m_red(true) {}
~entry()
{
delete m_left;
delete m_right;
}
item_row* m_row;
entry* m_left;
entry* m_right;
bool m_red;
};
entry* insert(entry* h, item_row* v);
entry* erase(entry* h, item_row* k);
void validate(entry* h, bool isParentRed, uint32 blackDepth, uint32& minBlack, uint32& maxBlack) const;
entry* rotateLeft(entry* h)
{
entry* x = h->m_right;
h->m_right = x->m_left;
x->m_left = h;
x->m_red = h->m_red;
h->m_red = true;
return x;
}
entry* rotateRight(entry* h)
{
entry* x = h->m_left;
h->m_left = x->m_right;
x->m_right = h;
x->m_red = h->m_red;
h->m_red = true;
return x;
}
void flipColour(entry* h)
{
h->m_red = not h->m_red;
if (h->m_left != nullptr)
h->m_left->m_red = not h->m_left->m_red;
if (h->m_right != nullptr)
h->m_right->m_red = not h->m_right->m_red;
}
bool isRed(entry* h) const
{
return h != nullptr and h->m_red;
}
entry* moveRedLeft(entry* h)
{
flipColour(h);
if (h->m_right != nullptr and isRed(h->m_right->m_left))
{
h->m_right = rotateRight(h->m_right);
h = rotateLeft(h);
flipColour(h);
}
return h;
}
entry* moveRedRight(entry* h)
{
flipColour(h);
if (h->m_left != nullptr and isRed(h->m_left->m_left))
{
h = rotateRight(h);
flipColour(h);
}
return h;
}
entry* fixUp(entry* h)
{
if (isRed(h->m_right))
h = rotateLeft(h);
if (isRed(h->m_left) and isRed(h->m_left->m_left))
h = rotateRight(h);
if (isRed(h->m_left) and isRed(h->m_right))
flipColour(h);
return h;
}
entry* findMin(entry* h)
{
while (h->m_left != nullptr)
h = h->m_left;
return h;
}
entry* eraseMin(entry* h)
{
if (h->m_left == nullptr)
{
delete h;
h = nullptr;
}
else
{
if (not isRed(h->m_left) and not isRed(h->m_left->m_left))
h = moveRedLeft(h);
h->m_left = eraseMin(h->m_left);
h = fixUp(h);
}
return h;
}
// Fix m_next fields for rows in order of this index
entry* reorder(entry* e)
{
auto result = e;
if (e->m_left != nullptr)
{
auto l = reorder(e->m_left);
l->m_row->m_next = e->m_row;
}
if (e->m_right != nullptr)
{
auto mr = findMin(e->m_right);
e->m_row->m_next = mr->m_row;
result = reorder(e->m_right);
}
return result;
}
category& m_cat;
row_comparator m_comp;
entry* m_root;
};
cat_index::cat_index(category* cat)
: m_cat(*cat), m_comp(cat), m_root(nullptr)
{
}
cat_index::~cat_index()
{
delete m_root;
}
item_row* cat_index::find(item_row* k) const
{
const entry* r = m_root;
while (r != nullptr)
{
int d = m_comp(k, r->m_row);
if (d < 0)
r = r->m_left;
else if (d > 0)
r = r->m_right;
else
break;
}
return r ? r->m_row : nullptr;
}
void cat_index::insert(item_row* k)
{
m_root = insert(m_root, k);
m_root->m_red = false;
}
cat_index::entry* cat_index::insert(entry* h, item_row* v)
{
if (h == nullptr)
return new entry(v);
int d = m_comp(v, h->m_row);
if (d < 0) h->m_left = insert(h->m_left, v);
else if (d > 0) h->m_right = insert(h->m_right, v);
else
throw runtime_error("Duplicate key violation, cat: " + m_cat.name() + " values: " + v->str());
if (isRed(h->m_right) and not isRed(h->m_left))
h = rotateLeft(h);
if (isRed(h->m_left) and isRed(h->m_left->m_left))
h = rotateRight(h);
if (isRed(h->m_left) and isRed(h->m_right))
flipColour(h);
return h;
}
void cat_index::erase(item_row* k)
{
m_root = erase(m_root, k);
if (m_root != nullptr)
m_root->m_red = false;
}
cat_index::entry* cat_index::erase(entry* h, item_row* k)
{
if (m_comp(k, h->m_row) < 0)
{
if (h->m_left != nullptr)
{
if (not isRed(h->m_left) and not isRed(h->m_left->m_left))
h = moveRedLeft(h);
h->m_left = erase(h->m_left, k);
}
}
else
{
if (isRed(h->m_left))
h = rotateRight(h);
if (m_comp(k, h->m_row) == 0 and h->m_right == nullptr)
{
delete h;
return nullptr;
}
if (h->m_right != nullptr)
{
if (not isRed(h->m_right) and not isRed(h->m_right->m_left))
h = moveRedRight(h);
if (m_comp(k, h->m_row) == 0)
{
h->m_row = findMin(h->m_right)->m_row;
h->m_right = eraseMin(h->m_right);
}
else
h->m_right = erase(h->m_right, k);
}
}
return fixUp(h);
}
void cat_index::reconstruct()
{
delete m_root;
m_root = nullptr;
for (auto r: m_cat)
insert(r.m_data);
// maybe reconstruction can be done quicker by using the following commented code.
// however, I've not had the time to think of a way to set the red/black flag correctly in that case.
// vector<item_row*> rows;
// transform(m_cat.begin(), m_cat.end(), back_inserter(rows),
// [](row r) -> item_row* { assert(r.m_data); return r.m_data; });
//
// assert(std::find(rows.begin(), rows.end(), nullptr) == rows.end());
//
// // don't use sort here, it will run out of the stack of something.
// // quicksort is notorious for using excessive recursion.
// // Besides, most of the time, the data is ordered already anyway.
//
// stable_sort(rows.begin(), rows.end(), [this](item_row* a, item_row* b) -> bool { return this->m_comp(a, b) < 0; });
//
// for (size_t i = 0; i < rows.size() - 1; ++i)
// assert(m_comp(rows[i], rows[i + 1]) < 0);
//
// deque<entry*> e;
// transform(rows.begin(), rows.end(), back_inserter(e),
// [](item_row* r) -> entry* { return new entry(r); });
//
// while (e.size() > 1)
// {
// deque<entry*> ne;
//
// while (not e.empty())
// {
// entry* a = e.front();
// e.pop_front();
//
// if (e.empty())
// ne.push_back(a);
// else
// {
// entry* b = e.front();
// b->m_left = a;
//
// assert(m_comp(a->m_row, b->m_row) < 0);
//
// e.pop_front();
//
// if (not e.empty())
// {
// entry* c = e.front();
// e.pop_front();
//
// assert(m_comp(b->m_row, c->m_row) < 0);
//
// b->m_right = c;
// }
//
// ne.push_back(b);
//
// if (not e.empty())
// {
// ne.push_back(e.front());
// e.pop_front();
// }
// }
// }
//
// swap (e, ne);
// }
//
// assert(e.size() == 1);
// m_root = e.front();
}
size_t cat_index::size() const
{
stack<entry*> s;
s.push(m_root);
size_t result = 0;
while (not s.empty())
{
entry* e = s.top();
s.pop();
if (e == nullptr)
continue;
++result;
s.push(e->m_left);
s.push(e->m_right);
}
return result;
}
void cat_index::validate() const
{
if (m_root != nullptr)
{
uint32 minBlack = numeric_limits<uint32>::max();
uint32 maxBlack = 0;
assert(not m_root->m_red);
validate(m_root, false, 0, minBlack, maxBlack);
assert(minBlack == maxBlack);
}
}
void cat_index::validate(entry* h, bool isParentRed, uint32 blackDepth, uint32& minBlack, uint32& maxBlack) const
{
if (h->m_red)
assert(not isParentRed);
else
++blackDepth;
if (isParentRed)
assert(not h->m_red);
if (h->m_left != nullptr and h->m_right != nullptr)
{
if (isRed(h->m_left))
assert(not isRed(h->m_right));
if (isRed(h->m_right))
assert(not isRed(h->m_left));
}
if (h->m_left != nullptr)
{
assert(m_comp(h->m_left->m_row, h->m_row) < 0);
validate(h->m_left, h->m_red, blackDepth, minBlack, maxBlack);
}
else
{
if (minBlack > blackDepth)
minBlack = blackDepth;
if (maxBlack < blackDepth)
maxBlack = blackDepth;
}
if (h->m_right != nullptr)
{
assert(m_comp(h->m_right->m_row, h->m_row) > 0);
validate(h->m_right, h->m_right, blackDepth, minBlack, maxBlack);
}
else
{
if (minBlack > blackDepth)
minBlack = blackDepth;
if (maxBlack < blackDepth)
maxBlack = blackDepth;
}
}
// --------------------------------------------------------------------
rowset::rowset(category& cat)
: m_cat(cat)
{
}
rowset& rowset::orderBy(initializer_list<string> items)
{
row_comparator c(&m_cat, items.begin(), items.end());
stable_sort(begin(), end(), c);
return *this;
}
// --------------------------------------------------------------------
category::category(datablock& db, const string& name, validator* validator)
: m_db(db), m_name(name), m_validator(validator)
, m_head(nullptr), m_tail(nullptr), m_index(nullptr)
{
if (m_name.empty())
throw validation_error("invalid empty name for category");
if (m_validator != nullptr)
{
m_cat_validator = m_validator->get_validator_for_category(m_name);
if (m_cat_validator != nullptr)
{
// make sure all required columns are added
for (auto& k: m_cat_validator->m_keys)
add_column(k);
for (auto& k: m_cat_validator->m_mandatory_fields)
add_column(k);
m_index = new cat_index(this);
}
}
}
category::~category()
{
delete m_head;
delete m_index;
}
void category::set_validator(validator* v)
{
m_validator = v;
if (m_index != nullptr)
{
delete m_index;
m_index = nullptr;
}
if (m_validator != nullptr)
{
m_cat_validator = m_validator->get_validator_for_category(m_name);
if (m_cat_validator != nullptr)
{
m_index = new cat_index(this);
m_index->reconstruct();
#if DEBUG
assert(m_index->size() == size());
m_index->validate();
#endif
}
}
else
m_cat_validator = nullptr;
}
size_t category::get_column_index(const string& name) const
{
size_t result;
for (result = 0; result < m_columns.size(); ++result)
{
if (iequals(name, m_columns[result].m_name))
break;
}
return result;
}
const string& category::get_column_name(size_t column_ix) const
{
return m_columns.at(column_ix).m_name;
}
size_t category::add_column(const string& name)
{
size_t result = get_column_index(name);
if (result == m_columns.size())
{
const validate_item* item_validator = nullptr;
if (m_cat_validator != nullptr)
{
item_validator = m_cat_validator->get_validator_for_item(name);
if (item_validator == nullptr)
m_validator->report_error("tag " + name + " not allowed in category " + m_name);
}
m_columns.push_back({name, item_validator});
}
return result;
}
void category::reorderByIndex()
{
if (m_index != nullptr)
std::tie(m_head, m_tail) = m_index->reorder();
}
size_t category::size() const
{
size_t result = 0;
for (auto pi = m_head; pi != nullptr; pi = pi->m_next)
++result;
return result;
}
bool category::empty() const
{
return m_head == nullptr or m_head->m_values == nullptr;
}
void category::drop(const string& field)
{
using namespace placeholders;
auto ci = find_if(m_columns.begin(), m_columns.end(),
[field](item_column& c) -> bool { return iequals(c.m_name, field); });
if (ci != m_columns.end())
{
uint32 column_ix = ci - m_columns.begin();
for (auto pi = m_head; pi != nullptr; pi = pi->m_next)
pi->drop(column_ix);
m_columns.erase(ci);
}
}
row category::operator[](condition&& cond)
{
row result;
for (auto r: *this)
{
if (cond(*this, r))
{
result = r;
break;
}
}
return result;
}
rowset category::find(condition&& cond)
{
rowset result(*this);
for (auto r: *this)
{
if (cond(*this, r))
result.push_back(r);
}
return result;
}
bool category::exists(condition&& cond)
{
bool result = false;
for (auto r: *this)
{
if (cond(*this, r))
{
result = true;
break;
}
}
return result;
}
rowset category::orderBy(std::initializer_list<string> items)
{
rowset result(*this);
result.insert(result.begin(), begin(), end());
return result.orderBy(items);
}
void category::clear()
{
delete m_head;
m_head = m_tail = nullptr;
if (m_index != nullptr)
{
delete m_index;
m_index = new cat_index(this);
}
}
template<class Iter>
tuple<row,bool> category::emplace(Iter b, Iter e)
{
// First, make sure all mandatory fields are supplied
tuple<row,bool> result = make_tuple(row(), true);
if (m_cat_validator != nullptr and b != e)
{
for (auto& col: m_columns)
{
auto iv = m_cat_validator->get_validator_for_item(col.m_name);
if (iv == nullptr)
continue;
bool seen = false;
for (auto v = b; v != e; ++v)
{
if (iequals(v->name(), col.m_name))
{
seen = true;
break;
}
}
if (not seen and iv->m_mandatory)
throw runtime_error("missing mandatory field " + col.m_name + " for category " + m_name);
}
if (m_index != nullptr)
{
unique_ptr<item_row> nr(new item_row{nullptr, this, nullptr});
row r(nr.get());
auto keys = key_fields();
for (auto v = b; v != e; ++v)
{
if (keys.count(v->name()))
r.assign(v->name(), v->value(), true);
}
auto test = m_index->find(nr.get());
if (test != nullptr)
{
if (VERBOSE > 1)
cerr << "Not inserting new record in " << m_name << " (duplicate key)" << endl;
result = make_tuple(row(test), false);
}
}
}
if (get<1>(result))
{
auto nr = new item_row{nullptr, this, nullptr};
if (m_head == nullptr)
{
assert(m_tail == nullptr);
m_head = m_tail = nr;
}
else
{
assert(m_tail != nullptr);
assert(m_head != nullptr);
m_tail->m_next = nr;
m_tail = nr;
}
row r(nr);
for (auto v = b; v != e; ++v)
r.assign(*v, true);
get<0>(result) = r;
if (m_index != nullptr)
m_index->insert(nr);
}
return result;
}
tuple<row,bool> category::emplace(row r)
{
return emplace(r.begin(), r.end());
}
void category::erase(condition&& cond)
{
rowset remove(*this);
for (auto r: *this)
{
if (cond(*this, r))
remove.push_back(r);
}
for (auto r: remove)
erase(r);
}
void category::erase(iterator p)
{
erase(*p);
}
void category::erase(row r)
{
iset keys;
if (m_cat_validator)
keys = iset(m_cat_validator->m_keys.begin(), m_cat_validator->m_keys.end());
for (auto& col: m_columns)
{
auto iv = col.m_validator;
if (iv == nullptr or iv->m_children.empty())
continue;
if (not keys.count(col.m_name))
continue;
const char* value = r[col.m_name].c_str();
for (auto child: iv->m_children)
{
if (child->m_category == nullptr)
continue;
auto child_cat = m_db.get(child->m_category->m_name);
if (child_cat == nullptr)
continue;
auto rows = child_cat->find(key(child->m_tag) == value);
for (auto& cr: rows)
child_cat->erase(cr);
}
}
if (m_head == nullptr)
throw runtime_error("erase");
if (m_index != nullptr)
m_index->erase(r.m_data);
if (r == m_head)
{
m_head = m_head->m_next;
r.m_data->m_next = nullptr;
delete r.m_data;
}
else
{
for (auto pi = m_head; pi != nullptr; pi = pi->m_next)
{
if (pi->m_next == r.m_data)
{
pi->m_next = r.m_data->m_next;
r.m_data->m_next = nullptr;
delete r.m_data;
break;
}
}
}
}
void category::get_tag_order(vector<string>& tags) const
{
for (auto& c: m_columns)
tags.push_back("_" + m_name + "." + c.m_name);
}
const detail::item_reference category::get_first_item(const char* item_name) const
{
return detail::item_reference{item_name, m_head};
}
category::iterator category::begin()
{
return iterator(m_head);
}
category::iterator category::end()
{
return iterator(nullptr);
}
void category::validate()
{
if (m_validator == nullptr)
throw runtime_error("no validator specified");
if (empty())
{
if (VERBOSE > 2)
cerr << "Skipping validation of empty category " << m_name << endl;
return;
}
if (m_cat_validator == nullptr)
{
m_validator->report_error("undefined category " + m_name);
return;
}
auto mandatory = m_cat_validator->m_mandatory_fields;
for (auto& col: m_columns)
{
auto iv = m_cat_validator->get_validator_for_item(col.m_name);
if (iv == nullptr)
m_validator->report_error("Field " + col.m_name + " is not valid in category " + m_name);
col.m_validator = iv;
mandatory.erase(col.m_name);
}
if (not mandatory.empty())
m_validator->report_error("In category " + m_name + " the following mandatory fields are missing: " + ba::join(mandatory, ", "));
// check index?
if (m_index)
{
#if not defined(NDEBUG)
m_index->validate();
for (auto r: *this)
{
if (m_index->find(r.m_data) != r.m_data)
m_validator->report_error("Key not found in index for category " + m_name);
}
#endif
}
// validate all values
mandatory = m_cat_validator->m_mandatory_fields;
for (auto ri = m_head; ri != nullptr; ri = ri->m_next)
{
for (size_t cix = 0; cix < m_columns.size(); ++cix)
{
bool seen = false;
auto iv = m_columns[cix].m_validator;
if (iv == nullptr)
{
m_validator->report_error("invalid field " + m_columns[cix].m_name + " for category " + m_name);
continue;
}
for (auto vi = ri->m_values; vi != nullptr; vi = vi->m_next)
{
if (vi->m_column_index == cix)
{
seen = true;
(*iv)(vi->m_text);
}
}
if (seen)
continue;
if (iv != nullptr and iv->m_mandatory)
m_validator->report_error("missing mandatory field " + m_columns[cix].m_name + " for category " + m_name);
}
}
}
const validator& category::get_validator() const
{
if (m_validator == nullptr)
throw runtime_error("no validator defined yet");
return *m_validator;
}
iset category::fields() const
{
if (m_validator == nullptr)
throw runtime_error("No validator specified");
if (m_cat_validator == nullptr)
m_validator->report_error("undefined category");
iset result;
for (auto& iv: m_cat_validator->m_item_validators)
result.insert(iv.m_tag);
return result;
}
iset category::mandatory_fields() const
{
if (m_validator == nullptr)
throw runtime_error("No validator specified");
if (m_cat_validator == nullptr)
m_validator->report_error("undefined category");
return m_cat_validator->m_mandatory_fields;
}
iset category::key_fields() const
{
if (m_validator == nullptr)
throw runtime_error("No validator specified");
if (m_cat_validator == nullptr)
m_validator->report_error("undefined category");
return iset{ m_cat_validator->m_keys.begin(), m_cat_validator->m_keys.end() };
}
auto category::iterator::operator++() -> iterator&
{
m_current = row(m_current.data()->m_next);
return *this;
}
namespace detail
{
size_t write_value(ostream& os, string value, size_t offset, size_t width)
{
if (value.find('\n') != string::npos or width == 0 or value.length() >= 132) // write as text field
{
ba::replace_all(value, "\n;", "\n\\;");
if (offset > 0)
os << endl;
os << ';' << value;
if (not ba::ends_with(value, "\n"))
os << endl;
os << ';' << endl;
offset = 0;
}
else if (is_unquoted_string(value.c_str()))
{
os << value;
if (value.length() < width)
{
os << string(width - value.length(), ' ');
offset += width;
}
else
{
os << ' ';
offset += value.length() + 1;
}
}
else
{
bool done = false;
for (char q: { '\'', '"'})
{
auto p = value.find(q); // see if we can use the quote character
while (p != string::npos and is_non_blank(value[p + 1]) and value[p + 1] != q)
p = value.find(q, p + 1);
if (p != string::npos)
continue;
os << q << value << q;
if (value.length() + 2 < width)
{
os << string(width - value.length() - 2, ' ');
offset += width;
}
else
{
os << ' ';
offset += value.length() + 1;
}
done = true;
break;
}
if (not done)
{
if (offset > 0)
os << endl;
os << ';' << value << endl
<< ';' << endl;
offset = 0;
}
}
return offset;
}
}
void category::write(ostream& os, const vector<int>& order, bool includeEmptyColumns)
{
if (empty())
return;
// If the first row has a next, we need a loop_
bool need_loop = (m_head->m_next != nullptr);
if (need_loop)
{
os << "loop_" << endl;
vector<size_t> column_widths;
for (auto cix: order)
{
auto& col = m_columns[cix];
os << '_' << m_name << '.' << col.m_name << ' ' << endl;
column_widths.push_back(2);
}
for (auto row = m_head; row != nullptr; row = row->m_next)
{
for (auto v = row->m_values; v != nullptr; v = v->m_next)
{
if (strchr(v->m_text, '\n') == nullptr)
{
size_t l = strlen(v->m_text);
if (not is_unquoted_string(v->m_text))
l += 2;
if (l >= 132)
continue;
if (column_widths[v->m_column_index] < l + 1)
column_widths[v->m_column_index] = l + 1;
}
}
}
for (auto row = m_head; row != nullptr; row = row->m_next) // loop over rows
{
size_t offset = 0;
for (size_t cix: order)
{
size_t w = column_widths[cix];
string s;
for (auto iv = row->m_values; iv != nullptr; iv = iv->m_next)
{
if (iv->m_column_index == cix)
{
s = iv->m_text;
break;
}
}
if (s.empty())
s = "?";
size_t l = s.length();
if (not is_unquoted_string(s.c_str()))
l += 2;
if (l < w)
l = w;
if (offset + l >= 132 and offset > 0)
{
os << endl;
offset = 0;
}
offset = detail::write_value(os, s, offset, w);
if (offset >= 132)
{
os << endl;
offset = 0;
}
}
if (offset > 0)
os << endl;
}
}
else
{
// first find the indent level
size_t l = 0;
for (auto& col: m_columns)
{
string tag = '_' + m_name + '.' + col.m_name;
if (l < tag.length())
l = tag.length();
}
l += 3;
for (size_t cix: order)
{
auto& col = m_columns[cix];
os << '_' << m_name << '.' << col.m_name << string(l - col.m_name.length() - m_name.length() - 2, ' ');
string s;
for (auto iv = m_head->m_values; iv != nullptr; iv = iv->m_next)
{
if (iv->m_column_index == cix)
{
s = iv->m_text;
break;
}
}
if (s.empty())
s = "?";
size_t offset = l;
if (s.length() + l >= kMaxLineLength)
{
os << endl;
offset = 0;
}
if (detail::write_value(os, s, offset, 1) != 0)
os << endl;
}
}
os << "# " << endl;
}
void category::write(ostream& os)
{
vector<int> order(m_columns.size());
iota(order.begin(), order.end(), 0);
write(os, order, false);
}
void category::write(ostream& os, const vector<string>& columns)
{
// make sure all columns are present
for (auto& c: columns)
add_column(c);
vector<int> order;
order.reserve(m_columns.size());
for (auto& c: columns)
order.push_back(get_column_index(c));
for (size_t i = 0; i < m_columns.size(); ++i)
{
if (std::find(order.begin(), order.end(), i) == order.end())
order.push_back(i);
}
write(os, order, true);
}
// --------------------------------------------------------------------
row::row(const row& rhs)
: m_data(rhs.m_data)
{
}
row& row::operator=(const row& rhs)
{
m_data = rhs.m_data;
return *this;
}
void row::assign(const string& name, const string& value, bool emplacing)
{
if (m_data == nullptr)
throw logic_error("invalid row, no data");
auto cat = m_data->m_category;
auto cix = cat->add_column(name);
auto& col = cat->m_columns[cix];
// auto& db = cat->m_db;
const char* oldValue = nullptr;
for (auto iv = m_data->m_values; iv != nullptr; iv = iv->m_next)
{
assert(iv != iv->m_next and (iv->m_next == nullptr or iv != iv->m_next->m_next));
if (iv->m_column_index == cix)
{
oldValue = iv->m_text;
break;
}
}
if (oldValue != nullptr and value == oldValue) // no need to update
return;
// check the value
if (col.m_validator)
(*col.m_validator)(value);
// If the field is part of the key for this category, remove it from the index
// before updating
bool reinsert = false;
if (not emplacing) // an update of an item's value
{
////#if DEBUG
//// if (VERBOSE)
//// cerr << "reassigning the value of key field _" << cat->m_name << '.' << name << endl;
////#endif
// // see if we need to update any child categories that depend on this value
// auto iv = col.m_validator;
// if (iv != nullptr and not iv->m_children.empty())
// {
// for (auto child: iv->m_children)
// {
// if (child->m_category == nullptr)
// continue;
//
// auto child_cat = db.get(child->m_category->m_name);
// if (child_cat == nullptr)
// continue;
//
// auto rows = child_cat->find(key(child->m_tag) == oldValue);
// for (auto& cr: rows)
// cr.assign(child->m_tag, value, false);
// }
// }
if (cat->m_index != nullptr and cat->key_fields().count(name))
{
reinsert = cat->m_index->find(m_data);
if (reinsert)
cat->m_index->erase(m_data);
}
}
// first remove old value with cix
if (m_data->m_values == nullptr)
; // nothing to do
else if (m_data->m_values->m_column_index == cix)
{
auto iv = m_data->m_values;
m_data->m_values = iv->m_next;
iv->m_next = nullptr;
delete iv;
}
else
{
for (auto iv = m_data->m_values; iv->m_next != nullptr; iv = iv->m_next)
{
if (iv->m_next->m_column_index == cix)
{
auto nv = iv->m_next;
iv->m_next = nv->m_next;
nv->m_next = nullptr;
delete nv;
break;
}
}
}
#if DEBUG
for (auto iv = m_data->m_values; iv != nullptr; iv = iv->m_next)
assert(iv != iv->m_next and (iv->m_next == nullptr or iv != iv->m_next->m_next));
#endif
if (not value.empty())
{
auto nv = new(value.length()) item_value(value.c_str(), cix);
if (m_data->m_values == nullptr)
m_data->m_values = nv;
else
{
auto iv = m_data->m_values;
while (iv->m_next != nullptr)
iv = iv->m_next;
iv->m_next = nv;
}
}
#if DEBUG
for (auto iv = m_data->m_values; iv != nullptr; iv = iv->m_next)
assert(iv != iv->m_next and (iv->m_next == nullptr or iv != iv->m_next->m_next));
#endif
if (reinsert)
cat->m_index->insert(m_data);
}
void row::assign(const item& value, bool emplacing)
{
assign(value.name(), value.value(), emplacing);
}
bool row::empty() const
{
return m_data == nullptr or m_data->m_values == nullptr;
}
auto row::begin() const -> const_iterator
{
return const_iterator(m_data, m_data->m_values);
}
auto row::end() const -> const_iterator
{
return const_iterator(m_data, nullptr);
}
row::const_iterator::const_iterator(item_row* data, item_value* ptr)
: m_data(data), m_ptr(ptr)
{
if (m_ptr != nullptr)
fetch();
}
row::const_iterator& row::const_iterator::operator++()
{
if (m_ptr != nullptr)
m_ptr = m_ptr->m_next;
if (m_ptr != nullptr)
fetch();
return *this;
}
void row::const_iterator::fetch()
{
m_current = item(
m_data->m_category->get_column_name(m_ptr->m_column_index),
m_ptr->m_text);
}
// --------------------------------------------------------------------
file::file()
: m_head(nullptr)
, m_validator(nullptr)
{
}
file::file(istream& is, bool validate)
: file()
{
// parser p(is, *this);
// p.parse_file();
load(is);
}
file::file(file&& rhs)
: m_head(nullptr), m_validator(nullptr)
{
swap(m_head, rhs.m_head);
swap(m_validator, rhs.m_validator);
}
file::~file()
{
delete m_head;
delete m_validator;
}
void file::append(datablock* e)
{
e->set_validator(m_validator);
if (m_head == nullptr)
m_head = e;
else
{
auto ie = m_head;
for (;;)
{
if (iequals(ie->name(), e->name()))
throw validation_error("datablock " + e->name() + " already defined in file");
if (ie->m_next == nullptr)
{
ie->m_next = e;
break;
}
ie = ie->m_next;
}
}
}
void file::load(istream& is)
{
validator* saved = m_validator;
set_validator(nullptr);
parser p(is, *this);
p.parse_file();
if (saved != nullptr)
{
set_validator(saved);
validate();
}
}
void file::save(ostream& os)
{
datablock* e = m_head;
while (e != nullptr)
{
e->write(os);
e = e->m_next;
}
}
void file::write(ostream& os, const vector<string>& order)
{
datablock* e = m_head;
while (e != nullptr)
{
e->write(os, order);
e = e->m_next;
}
}
datablock& file::operator[](const string& name)
{
datablock* result = m_head;
while (result != nullptr and not iequals(result->m_name, name))
result = result->m_next;
if (result == nullptr)
throw runtime_error("datablock " + name + " does not exist");
return *result;
}
void file::validate()
{
if (m_validator == nullptr)
{
if (VERBOSE)
cerr << "No dictionary loaded explicitly, loading default" << endl;
load_dictionary();
}
for (auto d = m_head; d != nullptr; d = d->m_next)
d->validate();
}
const validator& file::get_validator() const
{
if (m_validator == nullptr)
throw runtime_error("no validator defined yet");
return *m_validator;
}
void file::load_dictionary()
{
load_dictionary("mmcif_ddl");
}
void file::load_dictionary(const char* dict)
{
fs::path dict_file = string("dictionaries/") + dict + ".dic";
#if defined(USE_RSRC)
mrsrc::rsrc dict_data(dict_file.string());
if (not dict_data)
throw invalid_argument("no such dictionary");
struct membuf : public streambuf
{
membuf(char* dict, size_t length)
{
this->setg(dict, dict, dict + length);
}
} buffer(const_cast<char*>(dict_data.data()), dict_data.size());
istream is(&buffer);
#else
if (not fs::exists(dict_file))
throw runtime_error("Dictionary not found (" + dict_file.string() + ")");
fs::ifstream is(dict_file);
#endif
load_dictionary(is);
}
void file::load_dictionary(istream& is)
{
unique_ptr<validator> v(new validator());
dict_parser p(*v, is);
p.load_dictionary();
set_validator(v.release());
}
void file::set_validator(validator* v)
{
m_validator = v;
for (auto d = m_head; d != nullptr; d = d->m_next)
d->set_validator(m_validator);
}
void file::get_tag_order(vector<string>& tags) const
{
for (auto d = m_head; d != nullptr; d = d->m_next)
d->get_tag_order(tags);
}
auto file::iterator::operator++() -> iterator&
{
m_current = m_current->m_next;
return *this;
}
auto file::begin() const -> iterator
{
return iterator(m_head);
}
auto file::end() const -> iterator
{
return iterator(nullptr);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment