Commit 31c86d9c by Maarten L. Hekkelman

stripped to remove dependency on clipper and CCP4

parent 85b08f9d
......@@ -30,7 +30,8 @@ firstTarget: all
CXX = @CXX@
CXXFLAGS = @CXXFLAGS@ @BOOST_CPPFLAGS@ @LIBBZ2_CPPFLAGS@
LDFLAGS = @LDFLAGS@ @LIBS@ @BOOST_LDFLAGS@ @LIBBZ2_LDFLAGS@
LIBS = @LIBS@
LIBS = @BOOST_IOSTREAMS_LIB@ \
@LIBS@
prefix = @prefix@
exec_prefix = @exec_prefix@
......@@ -89,24 +90,18 @@ endif
$(OBJDIR):
mkdir -p $(OBJDIR)
LIBCIF_SRC = AtomShape.cpp \
AtomType.cpp \
BondMap.cpp \
LIBCIF_SRC = AtomType.cpp \
Cif2PDB.cpp \
Cif++.cpp \
CifParser.cpp \
CifUtils.cpp \
CifValidator.cpp \
Compound.cpp \
DistanceMap.cpp \
FixDMC.cpp \
MapMaker.cpp \
PDB2Cif.cpp \
PDB2CifRemark3.cpp \
Point.cpp \
ResolutionCalculator.cpp \
Secondary.cpp \
Statistics.cpp \
Structure.cpp \
Symmetry.cpp \
TlsParser.cpp
......@@ -170,10 +165,6 @@ distclean: clean
# Test rules
BOOST_LIBS = iostreams thread filesystem timer chrono regex
CLIPPER_LIBS = core ccp4
CLIPPER_LIBS := $(CLIPPER_LIBS:%=-lclipper-%)
define TEST_template =
-include $$(OBJDIR)/$(1)-test.d
......@@ -182,7 +173,7 @@ $(1)_OBJECTS = $$(OBJDIR)/$(1)-test.o
test/$(1)-test: $(LIB_TARGET) $$($(1)_OBJECTS)
@ echo ">>> building $(1)-test"
$(LIBTOOL) --silent --tag=CXX --mode=link $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $$@ $$($(1)_OBJECTS) -L.libs -Wl,-rpath /srv/ccp4-7.1/lib -lbz2 -lz -lcif++ $(CLIPPER_LIBS) $(BOOST_LIBS:%=-lboost_%) $(LIBS) -lstdc++fs
$(LIBTOOL) --silent --tag=CXX --mode=link $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $$@ $$($(1)_OBJECTS) -L.libs -lboost_timer -lcif++ $(LIBS)
.PHONY: $(1)-test
$(1)-test: test/$(1)-test
......
......@@ -6,7 +6,7 @@ AX_CXX_COMPILE_STDCXX_17([noext])
AX_CHECK_COMPILE_FLAG([-fstandalone-debug], , , [-Werror])
AC_CONFIG_SRCDIR([src/AtomShape.cpp])
AC_CONFIG_SRCDIR([src/Cif++.cpp])
AC_CONFIG_AUX_DIR(config)
AC_CONFIG_MACRO_DIR([config/m4])
AC_CONFIG_HEADERS([include/cif++/Config.hpp])
......@@ -31,28 +31,16 @@ AC_CHECK_HEADERS([sys/ioctl.h])
AC_CHECK_HEADERS([termios.h])
AC_CHECK_HEADER_STDBOOL
AC_CHECK_TYPES([ptrdiff_t])
AC_C_INLINE
AC_FUNC_ERROR_AT_LINE
AC_FUNC_MALLOC
AC_FUNC_STRTOD
AC_PROG_MAKE_SET
AC_TYPE_INT32_T
AC_TYPE_INT64_T
AC_TYPE_INT8_T
AC_TYPE_SIZE_T
AC_TYPE_UINT16_T
AC_TYPE_UINT32_T
AC_TYPE_UINT64_T
AC_TYPE_UINT8_T
AC_ARG_VAR([CCP4], [The location where CCP4 is installed])
AS_IF([test x"$CCP4" != x""],
[
CPPFLAGS="$CPPFLAGS -I ${CCP4}/include"
CXXFLAGS="$CXXFLAGS -I ${CCP4}/include"
LDFLAGS="$LDFLAGS -L${CCP4}/lib"
])
# AC_ARG_VAR([CCP4], [The location where CCP4 is installed])
# AS_IF([test x"$CCP4" != x""],
# [
# CPPFLAGS="$CPPFLAGS -I ${CCP4}/include"
# CXXFLAGS="$CXXFLAGS -I ${CCP4}/include"
# LDFLAGS="$LDFLAGS -L${CCP4}/lib"
# ])
AC_ARG_VAR([USE_RSRC], [Use resources to store internal data, requires mrc])
AC_ARG_VAR([MRC], [Specify a location for the mrc executable])
......@@ -103,48 +91,48 @@ AX_BOOST_IOSTREAMS
dnl AX_BOOST_FILESYSTEM
AX_BOOST_THREAD
AC_ARG_WITH([clipper],
AS_HELP_STRING([--with-clipper=@<:@location@:>@],
[Use the clipper library as specified.
@<:@location=$CCP4@:>@]),
[
CPPFLAGS="$CPPFLAGS -I ${withval}/include"
CXXFLAGS="$CXXFLAGS -I ${withval}/include"
LDFLAGS="$LDFLAGS -L${withval}/lib"
])
AC_ARG_WITH([newuoa],
AS_HELP_STRING([--with-newuoa=@<:@location@:>@],
[Use the newuoa library as specified.]),
[
CPPFLAGS="$CPPFLAGS -I ${withval}/include"
CXXFLAGS="$CXXFLAGS -I ${withval}/include"
LDFLAGS="$LDFLAGS -L${withval}/lib"
])
AC_CHECK_HEADER(
[clipper/clipper.h],
[],
[AC_MSG_ERROR([
Can't find the main clipper include file clipper/clipper.h. Please
specify either the CCP4 location of the location of the installed
clipper using --with-clipper=<path>
])])
# AC_ARG_WITH([clipper],
# AS_HELP_STRING([--with-clipper=@<:@location@:>@],
# [Use the clipper library as specified.
# @<:@location=$CCP4@:>@]),
# [
# CPPFLAGS="$CPPFLAGS -I ${withval}/include"
# CXXFLAGS="$CXXFLAGS -I ${withval}/include"
# LDFLAGS="$LDFLAGS -L${withval}/lib"
# ])
# AC_ARG_WITH([newuoa],
# AS_HELP_STRING([--with-newuoa=@<:@location@:>@],
# [Use the newuoa library as specified.]),
# [
# CPPFLAGS="$CPPFLAGS -I ${withval}/include"
# CXXFLAGS="$CXXFLAGS -I ${withval}/include"
# LDFLAGS="$LDFLAGS -L${withval}/lib"
# ])
# AC_CHECK_HEADER(
# [clipper/clipper.h],
# [],
# [AC_MSG_ERROR([
# Can't find the main clipper include file clipper/clipper.h. Please
# specify either the CCP4 location of the location of the installed
# clipper using --with-clipper=<path>
# ])])
# AC_MSG_CHECKING([clipper version])
# AC_COMPILE_IFELSE(
# [read_test(clipper-test.cpp)],
# [],
# [AC_MSG_ERROR([The version of clipper is not up to date])])
AC_MSG_CHECKING([clipper version])
AC_COMPILE_IFELSE(
[read_test(clipper-test.cpp)],
[],
[AC_MSG_ERROR([The version of clipper is not up to date])])
AC_CHECK_HEADER(
[newuoa.h],
[],
[AC_MSG_ERROR([
Can't find the newuoa include file newuoa.h. Please install this
library and specify its location with --with-newuoa=<path>.
You can find newuoa at https://github.com/elsid/newuoa-cpp.git
])])
# AC_CHECK_HEADER(
# [newuoa.h],
# [],
# [AC_MSG_ERROR([
# Can't find the newuoa include file newuoa.h. Please install this
# library and specify its location with --with-newuoa=<path>.
# You can find newuoa at https://github.com/elsid/newuoa-cpp.git
# ])])
AX_CHECK_LIBRARY([LIBZ], [zlib.h], [z], [],
[AC_MSG_ERROR([libz not found - compressed files not supported])])
......
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// AtomShape, analogue to the similarly named code in clipper
#pragma once
#include "cif++/Structure.hpp"
namespace mmcif
{
// --------------------------------------------------------------------
// Class used in calculating radii
class AtomShape
{
public:
AtomShape(const Atom& atom, float resHigh, float resLow,
bool electronScattering);
AtomShape(const Atom& atom, float resHigh, float resLow,
bool electronScattering, float bFactor);
~AtomShape();
AtomShape(const AtomShape&) = delete;
AtomShape& operator=(const AtomShape&) = delete;
float radius() const;
float calculatedDensity(float r) const;
float calculatedDensity(Point p) const;
private:
struct AtomShapeImpl* mImpl;
};
}
......@@ -28,9 +28,11 @@
#pragma once
#include "cif++/Config.hpp"
#include <cstdint>
#include <string>
#include <stdexcept>
#include <boost/math/quaternion.hpp>
#include "cif++/Config.hpp"
namespace mmcif
{
......
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <unordered_map>
#include "cif++/Structure.hpp"
namespace mmcif
{
class BondMap
{
public:
BondMap(const Structure& p);
BondMap(const BondMap&) = delete;
BondMap& operator=(const BondMap&) = delete;
bool operator()(const Atom& a, const Atom& b) const
{
return isBonded(index.at(a.id()), index.at(b.id()));
}
bool is1_4(const Atom& a, const Atom& b) const
{
uint32_t ixa = index.at(a.id());
uint32_t ixb = index.at(b.id());
return bond_1_4.count(key(ixa, ixb));
}
// links coming from the struct_conn records:
std::vector<std::string> linked(const Atom& a) const;
private:
bool isBonded(uint32_t ai, uint32_t bi) const
{
return bond.count(key(ai, bi)) != 0;
}
uint64_t key(uint32_t a, uint32_t b) const
{
if (a > b)
std::swap(a, b);
return static_cast<uint64_t>(a) | (static_cast<uint64_t>(b) << 32);
}
std::tuple<uint32_t,uint32_t> dekey(uint64_t k) const
{
return std::make_tuple(
static_cast<uint32_t>(k >> 32),
static_cast<uint32_t>(k)
);
}
uint32_t dim;
std::unordered_map<std::string,uint32_t> index;
std::set<uint64_t> bond, bond_1_4;
std::map<std::string,std::set<std::string>> link;
};
}
......@@ -29,7 +29,6 @@
#include "cif++/Config.hpp"
#include <string>
#include <cstdint>
#include <regex>
#include <iostream>
......@@ -107,9 +106,6 @@ namespace cif
// flag for verbose output
extern int VERBOSE;
using std::string;
using std::vector;
// mmCIF mapping
// A CIF data file in this case contains entries (data blocks) which can contain
// one or more Category objects. Each Category object contains arrays of Items.
......@@ -144,10 +140,10 @@ class Item
Item() {}
template<typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
Item(const string& name, const T& value)
Item(const std::string& name, const T& value)
: mName(name), mValue(std::to_string(value)) {}
Item(const string& name, const std::string& value)
Item(const std::string& name, const std::string& value)
: mName(name), mValue(value) {}
Item(const Item& rhs) : mName(rhs.mName), mValue(rhs.mValue) {}
......@@ -175,10 +171,10 @@ class Item
return *this;
}
const string& name() const { return mName; }
const string& value() const { return mValue; }
const std::string& name() const { return mName; }
const std::string& value() const { return mValue; }
void value(const string& v) { mValue = v; }
void value(const std::string& v) { mValue = v; }
// empty means either null or unknown
bool empty() const;
......@@ -193,8 +189,8 @@ class Item
const char* c_str() const { return mValue.c_str(); }
private:
string mName;
string mValue;
std::string mName;
std::string mValue;
};
// --------------------------------------------------------------------
......@@ -209,16 +205,16 @@ class Datablock
using iterator = CategoryList::iterator;
using const_iterator = CategoryList::const_iterator;
Datablock(const string& name);
Datablock(const std::string& name);
~Datablock();
Datablock(const Datablock&) = delete;
Datablock& operator=(const Datablock&) = delete;
string getName() const { return mName; }
void setName(const string& n) { mName = n; }
std::string getName() const { return mName; }
void setName(const std::string& n) { mName = n; }
string firstItem(const string& tag) const;
std::string firstItem(const std::string& tag) const;
iterator begin() { return mCategories.begin(); }
iterator end() { return mCategories.end(); }
......@@ -226,7 +222,7 @@ class Datablock
const_iterator begin() const { return mCategories.begin(); }
const_iterator end() const { return mCategories.end(); }
Category& operator[](const string& name);
Category& operator[](const std::string& name);
std::tuple<iterator,bool> emplace(const std::string& name);
......@@ -236,10 +232,10 @@ class Datablock
void setValidator(Validator* v);
// this one only looks up a Category, returns nullptr if it does not exist
Category* get(const string& name);
Category* get(const std::string& name);
void getTagOrder(vector<string>& tags) const;
void write(std::ostream& os, const vector<string>& order);
void getTagOrder(std::vector<std::string>& tags) const;
void write(std::ostream& os, const std::vector<std::string>& order);
void write(std::ostream& os);
// convenience function, add a line to the software category
......@@ -249,7 +245,7 @@ class Datablock
private:
std::list<Category> mCategories;
string mName;
std::string mName;
Validator* mValidator;
Datablock* mNext;
};
......@@ -296,7 +292,7 @@ namespace detail
return item_value_as<T>::convert(*this);
}
template<typename T>
template<typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
int compare(const T& value) const
{
int result = 0;
......@@ -317,6 +313,62 @@ namespace detail
return result;
}
template<typename T, std::enable_if_t<std::is_same_v<T, char>, int> = 0>
int compare(const T& value) const
{
return icompare(std::string{value}, c_str());
}
template<typename T, std::enable_if_t<std::is_integral_v<T> and std::is_signed_v<T> and not std::is_same_v<T, char>, int> = 0>
int compare(const T& value) const
{
int result = 0;
try
{
auto v = std::stol(c_str());
if (v < value)
result = -1;
else if (v > value)
result = 1;
}
catch (...)
{
if (VERBOSE)
std::cerr << "conversion error in compare for '" << c_str() << '\'' << std::endl;
result = 1;
}
return result;
}
template<typename T, std::enable_if_t<std::is_integral_v<T> and std::is_unsigned_v<T>, int> = 0>
int compare(const T& value) const
{
int result = 0;
try
{
auto v = std::stoul(c_str());
if (v < value)
result = -1;
else if (v > value)
result = 1;
}
catch (...)
{
if (VERBOSE)
std::cerr << "conversion error in compare for '" << c_str() << '\'' << std::endl;
result = 1;
}
return result;
}
template<typename T, std::enable_if_t<std::is_same_v<std::remove_cv_t<T>, std::string>, int> = 0>
int compare(const T& value) const
{
return icompare(c_str(), value.c_str());
}
// empty means either null or unknown
bool empty() const;
......@@ -334,8 +386,8 @@ namespace detail
// or, if specified, the value from _item_default.value in the dictionary
const char* c_str(const char* defaultValue) const;
bool operator!=(const string& s) const { return s != c_str(); }
bool operator==(const string& s) const { return s == c_str(); }
bool operator!=(const std::string& s) const { return s != c_str(); }
bool operator==(const std::string& s) const { return s == c_str(); }
friend std::ostream& operator<<(std::ostream& os, ItemReference& item);
......@@ -420,20 +472,6 @@ namespace detail
}
};
template<>
inline
int ItemReference::compare<string>(const string& value) const
{
return icompare(c_str(), value.c_str());
}
template<>
inline
int ItemReference::compare(const char* const& value) const
{
return cif::icompare(c_str(), value);
}
inline std::ostream& operator<<(std::ostream& os, const ItemReference& rhs)
{
os << rhs.c_str();
......@@ -592,13 +630,13 @@ class Row
return detail::ItemReference(itemTag, column, *this);
}
const detail::ItemReference operator[](const string& itemTag) const
const detail::ItemReference operator[](const std::string& itemTag) const
{
size_t column = ColumnForItemTag(itemTag.c_str());
return detail::ItemReference(itemTag.c_str(), column, *this);
}
detail::ItemReference operator[](const string& itemTag)
detail::ItemReference operator[](const std::string& itemTag)
{
size_t column = ColumnForItemTag(itemTag.c_str());
return detail::ItemReference(itemTag.c_str(), column, *this);
......@@ -623,6 +661,11 @@ class Row
return mData == rhs.mData;
}
bool operator!=(const Row& rhs) const
{
return mData != rhs.mData;
}
ItemRow* data() const { return mData; }
void swap(Row& rhs)
......@@ -634,8 +677,8 @@ class Row
private:
void assign(const string& name, const string& value, bool updateLinked);
void assign(size_t column, const string& value, bool updateLinked);
void assign(const std::string& name, const std::string& value, bool updateLinked);
void assign(size_t column, const std::string& value, bool updateLinked);
void assign(const Item& i, bool updateLinked);
static void swap(size_t column, ItemRow* a, ItemRow* b);
......@@ -747,7 +790,7 @@ namespace detail
struct KeyIsEmptyConditionImpl : public ConditionImpl
{
KeyIsEmptyConditionImpl(const string& ItemTag)
KeyIsEmptyConditionImpl(const std::string& ItemTag)
: mItemTag(ItemTag) {}
virtual void prepare(const Category& c);
......@@ -762,7 +805,7 @@ struct KeyIsEmptyConditionImpl : public ConditionImpl
return mItemTag + " == <empty>";
}
string mItemTag;
std::string mItemTag;
size_t mItemIx;
};
......@@ -771,7 +814,7 @@ struct KeyIsConditionImpl : public ConditionImpl
{
typedef T valueType;
KeyIsConditionImpl(const string& ItemTag, const valueType& value)
KeyIsConditionImpl(const std::string& ItemTag, const valueType& value)
: mItemTag(ItemTag), mValue(value) {}
virtual void prepare(const Category& c);
......@@ -786,12 +829,13 @@ struct KeyIsConditionImpl : public ConditionImpl
return mItemTag + " == " + std::to_string(mValue);
}
string mItemTag;
std::string mItemTag;
size_t mItemIx;
valueType mValue;
};
template<>
inline
std::string KeyIsConditionImpl<std::string>::str() const
{
return mItemTag + " == " + mValue;
......@@ -802,7 +846,7 @@ struct KeyIsNotConditionImpl : public ConditionImpl
{
typedef T valueType;
KeyIsNotConditionImpl(const string& ItemTag, const valueType& value)
KeyIsNotConditionImpl(const std::string& ItemTag, const valueType& value)
: mItemTag(ItemTag), mValue(value) {}
virtual void prepare(const Category& c);
......@@ -817,12 +861,13 @@ struct KeyIsNotConditionImpl : public ConditionImpl
return mItemTag + " != " + std::to_string(mValue);
}
string mItemTag;
std::string mItemTag;
size_t mItemIx;
valueType mValue;
};
template<>
inline
std::string KeyIsNotConditionImpl<std::string>::str() const
{
return mItemTag + " != " + mValue;
......@@ -831,7 +876,7 @@ std::string KeyIsNotConditionImpl<std::string>::str() const
template<typename COMP>
struct KeyCompareConditionImpl : public ConditionImpl
{
KeyCompareConditionImpl(const string& ItemTag, COMP&& comp)
KeyCompareConditionImpl(const std::string& ItemTag, COMP&& comp)
: mItemTag(ItemTag), mComp(std::move(comp)) {}
virtual void prepare(const Category& c);
......@@ -846,21 +891,21 @@ struct KeyCompareConditionImpl : public ConditionImpl
return mItemTag + " compare";
}
string mItemTag;
std::string mItemTag;
size_t mItemIx;
COMP mComp;
};
struct KeyMatchesConditionImpl : public ConditionImpl
{
KeyMatchesConditionImpl(const string& ItemTag, const std::regex& rx)
KeyMatchesConditionImpl(const std::string& ItemTag, const std::regex& rx)
: mItemTag(ItemTag), mRx(rx) {}
virtual void prepare(const Category& c);
virtual bool test(const Category& c, const Row& r) const
{
return std::regex_match(r[mItemIx].as<string>(), mRx);
return std::regex_match(r[mItemIx].as<std::string>(), mRx);
}
virtual std::string str() const
......@@ -868,7 +913,7 @@ struct KeyMatchesConditionImpl : public ConditionImpl
return mItemTag + " ~= " + "<rx>";
}
string mItemTag;
std::string mItemTag;
size_t mItemIx;
std::regex mRx;
};
......@@ -1053,13 +1098,13 @@ struct Empty {};
struct Key
{
Key(const string& itemTag) : mItemTag(itemTag) {}
Key(const std::string& itemTag) : mItemTag(itemTag) {}
Key(const char* itemTag) : mItemTag(itemTag) {}
Key(const Key&) = delete;
Key& operator=(const Key&) = delete;
string mItemTag;
std::string mItemTag;
};
template<typename T>
......@@ -1070,7 +1115,7 @@ Condition operator==(const Key& key, const T& v)
inline Condition operator==(const Key& key, const char* v)
{
string value(v ? v : "");
std::string value(v ? v : "");
return Condition(new detail::KeyIsConditionImpl<std::string>(key.mItemTag, value));
}
......@@ -1095,7 +1140,7 @@ Condition operator!=(const Key& key, const T& v)
inline Condition operator!=(const Key& key, const char* v)
{
string value(v ? v : "");
std::string value(v ? v : "");
return Condition(new detail::KeyIsNotConditionImpl<std::string>(key.mItemTag, value));
}
......@@ -1214,7 +1259,7 @@ class iterator_impl
}
bool operator==(const iterator_impl& rhs) const { return mCurrent == rhs.mCurrent; }
bool operator!=(const iterator_impl& rhs) const { return not (mCurrent == rhs.mCurrent); }
bool operator!=(const iterator_impl& rhs) const { return mCurrent != rhs.mCurrent; }
private:
Row mCurrent;
......@@ -1270,7 +1315,7 @@ class conditional_iterator_proxy
}
bool operator==(const conditional_iterator_impl& rhs) const { return mBegin == rhs.mBegin; }
bool operator!=(const conditional_iterator_impl& rhs) const { return not (mBegin == rhs.mBegin); }
bool operator!=(const conditional_iterator_impl& rhs) const { return mBegin != rhs.mBegin; }
private:
Category* mCat;
......@@ -1313,7 +1358,7 @@ class conditional_iterator_proxy
class RowSet
{
typedef vector<Row> base_type;
typedef std::vector<Row> base_type;
public:
......@@ -1329,10 +1374,10 @@ class RowSet
RowSet& operator=(const RowSet& rhs);
RowSet& operator=(RowSet&& rhs);
RowSet& orderBy(const string& Item)
RowSet& orderBy(const std::string& Item)
{ return orderBy({ Item }); }
RowSet& orderBy(std::initializer_list<string> Items);
RowSet& orderBy(std::initializer_list<std::string> Items);
class iterator
{
......@@ -1449,7 +1494,7 @@ class RowSet
private:
Category* mCat;
vector<ItemRow*> mItems;
std::vector<ItemRow*> mItems;
// Condition* mCond;
};
......@@ -1464,12 +1509,12 @@ class Category
friend class Row;
friend class detail::ItemReference;
Category(Datablock& db, const string& name, Validator* Validator);
Category(Datablock& db, const std::string& name, Validator* Validator);
Category(const Category&) = delete;
Category& operator=(const Category&) = delete;
~Category();
const string name() const { return mName; }
const std::string name() const { return mName; }
using iterator = iterator_impl<Row>;
using const_iterator = iterator_impl<const Row>;
......@@ -1505,10 +1550,10 @@ class Category
bool exists(Condition&& cond) const;
RowSet orderBy(const string& Item)
RowSet orderBy(const std::string& Item)
{ return orderBy({ Item }); }
RowSet orderBy(std::initializer_list<string> Items);
RowSet orderBy(std::initializer_list<std::string> Items);
std::tuple<Row,bool> emplace(Item value) { return emplace({ value }); }
......@@ -1567,15 +1612,15 @@ class Category
std::set<size_t> keyFieldsByIndex() const;
void drop(const string& field);
void drop(const std::string& field);
void getTagOrder(vector<string>& tags) const;
void getTagOrder(std::vector<std::string>& tags) const;
// return index for known column, or the next available column index
size_t getColumnIndex(const string& name) const;
bool hasColumn(const string& name) const;
const string& getColumnName(size_t columnIndex) const;
vector<string> getColumnNames() const;
size_t getColumnIndex(const std::string& name) const;
bool hasColumn(const std::string& name) const;
const std::string& getColumnName(size_t columnIndex) const;
std::vector<std::string> getColumnNames() const;
void reorderByIndex();
void sort(std::function<int(const Row&, const Row&)> comparator);
......@@ -1583,16 +1628,16 @@ class Category
private:
void write(std::ostream& os);
void write(std::ostream& os, const vector<string>& order);
void write(std::ostream& os, const vector<int>& order, bool includeEmptyColumns);
void write(std::ostream& os, const std::vector<std::string>& order);
void write(std::ostream& os, const std::vector<int>& order, bool includeEmptyColumns);
size_t addColumn(const string& name);
size_t addColumn(const std::string& name);
Datablock& mDb;
string mName;
std::string mName;
Validator* mValidator;
const ValidateCategory* mCatValidator = nullptr;
vector<ItemColumn> mColumns;
std::vector<ItemColumn> mColumns;
ItemRow* mHead;
ItemRow* mTail;
class CatIndex* mIndex;
......@@ -1621,8 +1666,8 @@ class File
void load(std::istream& is);
void save(std::ostream& os);
void save(std::ostream& os, const vector<string>& order) { write(os, order); }
void write(std::ostream& os, const vector<string>& order);
void save(std::ostream& os, const std::vector<std::string>& order) { write(os, order); }
void write(std::ostream& os, const std::vector<std::string>& order);
void loadDictionary(); // load the default dictionary, that is mmcifDdl in this case
void loadDictionary(const char* dict); // load one of the compiled in dictionaries
......@@ -1634,8 +1679,8 @@ class File
Datablock& firstDatablock() { return *mHead; }
void append(Datablock* e);
Datablock* get(const string& name) const;
Datablock& operator[](const string& name);
Datablock* get(const std::string& name) const;
Datablock& operator[](const std::string& name);
struct iterator : public std::iterator<std::forward_iterator_tag, Datablock>
{
......@@ -1662,7 +1707,7 @@ class File
iterator end() const;
const Validator& getValidator() const;
void getTagOrder(vector<string>& tags) const;
void getTagOrder(std::vector<std::string>& tags) const;
private:
......@@ -1706,7 +1751,7 @@ inline bool anyMatchesConditionImpl::test(const Category& c, const Row& r) const
{
try
{
if (std::regex_match(r[f].as<string>(), mRx))
if (std::regex_match(r[f].as<std::string>(), mRx))
{
result = true;
break;
......@@ -1809,7 +1854,7 @@ typename conditional_iterator_proxy<RowType>::iterator conditional_iterator_prox
template<typename RowType>
bool conditional_iterator_proxy<RowType>::empty() const
{
return mCBegin != mCEnd;
return mCBegin == mCEnd;
}
template<typename RowType>
......
......@@ -33,7 +33,7 @@ void WritePDBFile(std::ostream& pdbFile, cif::File& cifFile);
/// \brief Just the HEADER, COMPND, SOURCE and AUTHOR lines
void WritePDBHeaderLines(std::ostream& os, cif::File& cifFile);
std::string GetPDBHEADERLine(cif::File& cifFile, int truncate_at = 127);
std::string GetPDBCOMPNDLine(cif::File& cifFile, int truncate_at = 127);
std::string GetPDBSOURCELine(cif::File& cifFile, int truncate_at = 127);
std::string GetPDBAUTHORLine(cif::File& cifFile, int truncate_at = 127);
std::string GetPDBHEADERLine(cif::File& cifFile, std::string::size_type truncate_at = 127);
std::string GetPDBCOMPNDLine(cif::File& cifFile, std::string::size_type truncate_at = 127);
std::string GetPDBSOURCELine(cif::File& cifFile, std::string::size_type truncate_at = 127);
std::string GetPDBAUTHORLine(cif::File& cifFile, std::string::size_type truncate_at = 127);
......@@ -24,6 +24,8 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/Cif++.hpp"
#include <stack>
......@@ -155,7 +157,7 @@ class SacParser
virtual void produceDatablock(const std::string& name) = 0;
virtual void produceCategory(const std::string& name) = 0;
virtual void produceRow() = 0;
virtual void produceItem(const std::string& category, const std::string& item, const string& value) = 0;
virtual void produceItem(const std::string& category, const std::string& item, const std::string& value) = 0;
protected:
......
......@@ -31,6 +31,8 @@
#include <vector>
#include <set>
#include <cassert>
#include <memory>
#include <list>
#include <unistd.h>
......
......@@ -24,6 +24,8 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/Cif++.hpp"
//// the std regex of gcc is crashing....
......@@ -113,11 +115,11 @@ struct ValidateItem
struct ValidateCategory
{
std::string mName;
std::vector<string> mKeys;
cif::iset mGroups;
cif::iset mMandatoryFields;
std::set<ValidateItem> mItemValidators;
std::string mName;
std::vector<std::string> mKeys;
cif::iset mGroups;
cif::iset mMandatoryFields;
std::set<ValidateItem> mItemValidators;
bool operator<(const ValidateCategory& rhs) const
{
......
/* include/cif++/Config.hpp. Generated from Config.hpp.in by configure. */
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#if defined(_MSC_VER)
// These are Microsoft Visual C++ special settings
// the iso646 file contains the C++ keywords that are
// otherwise not recognized.
#include <ciso646>
#define snprintf _snprintf
// Disable some warnings
#pragma warning (disable : 4996)
#pragma warning (disable : 4355)
#endif
/* define if the Boost library is available */
#define HAVE_BOOST /**/
/* define if the Boost::IOStreams library is available */
#define HAVE_BOOST_IOSTREAMS /**/
/* define if the Boost::Regex library is available */
#define HAVE_BOOST_REGEX /**/
/* define if the Boost::Thread library is available */
#define HAVE_BOOST_THREAD /**/
/* define if the compiler supports basic C++17 syntax */
#define HAVE_CXX17 1
/* Define to 1 if you have the <dlfcn.h> header file. */
#define HAVE_DLFCN_H 1
/* Define to 1 if you have the `floor' function. */
#define HAVE_FLOOR 1
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
/* Define to 1 if LIBBZ2 is found */
#define HAVE_LIBBZ2 1
/* Define to 1 if LIBZ is found */
#define HAVE_LIBZ 1
/* Define to 1 if your system has a GNU libc compatible `malloc' function, and
to 0 otherwise. */
/* #undef HAVE_MALLOC */
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define to 1 if you have the `pow' function. */
#define HAVE_POW 1
/* Define to 1 if the system has the type `ptrdiff_t'. */
#define HAVE_PTRDIFF_T 1
/* Define to 1 if you have the `rint' function. */
#define HAVE_RINT 1
/* Define to 1 if you have the `sqrt' function. */
#define HAVE_SQRT 1
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define to 1 if you have the `strchr' function. */
#define HAVE_STRCHR 1
/* Define to 1 if you have the `strerror' function. */
#define HAVE_STRERROR 1
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define to 1 if you have the <sys/ioctl.h> header file. */
#define HAVE_SYS_IOCTL_H 1
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the <termios.h> header file. */
#define HAVE_TERMIOS_H 1
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* Define to 1 if the system has the type `_Bool'. */
/* #undef HAVE__BOOL */
/* Define to the sub-directory where libtool stores uninstalled libraries. */
#define LT_OBJDIR ".libs/"
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
......@@ -129,58 +129,3 @@
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
#define below would cause a syntax error. */
#undef _UINT32_T
/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
#define below would cause a syntax error. */
#undef _UINT64_T
/* Define for Solaris 2.5.1 so the uint8_t typedef from <sys/synch.h>,
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
#define below would cause a syntax error. */
#undef _UINT8_T
/* Define to `__inline__' or `__inline' if that's what the C compiler
calls it, or to nothing if 'inline' is not supported under any name. */
#ifndef __cplusplus
#undef inline
#endif
/* Define to the type of a signed integer type of width exactly 32 bits if
such a type exists and the standard includes do not define it. */
#undef int32_t
/* Define to the type of a signed integer type of width exactly 64 bits if
such a type exists and the standard includes do not define it. */
#undef int64_t
/* Define to the type of a signed integer type of width exactly 8 bits if such
a type exists and the standard includes do not define it. */
#undef int8_t
/* Define to rpl_malloc if the replacement function should be used. */
#undef malloc
/* Define to `unsigned int' if <sys/types.h> does not define. */
#undef size_t
/* Define to the type of an unsigned integer type of width exactly 16 bits if
such a type exists and the standard includes do not define it. */
#undef uint16_t
/* Define to the type of an unsigned integer type of width exactly 32 bits if
such a type exists and the standard includes do not define it. */
#undef uint32_t
/* Define to the type of an unsigned integer type of width exactly 64 bits if
such a type exists and the standard includes do not define it. */
#undef uint64_t
/* Define to the type of an unsigned integer type of width exactly 8 bits if
such a type exists and the standard includes do not define it. */
#undef uint8_t
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#if defined(_MSC_VER)
// These are Microsoft Visual C++ special settings
// the iso646 file contains the C++ keywords that are
// otherwise not recognized.
#include <ciso646>
#define snprintf _snprintf
// Disable some warnings
#pragma warning (disable : 4996)
#pragma warning (disable : 4355)
#endif
/* define if the Boost library is available */
#undef HAVE_BOOST
/* define if the Boost::IOStreams library is available */
#undef HAVE_BOOST_IOSTREAMS
/* define if the Boost::Regex library is available */
#undef HAVE_BOOST_REGEX
/* define if the Boost::Thread library is available */
#undef HAVE_BOOST_THREAD
/* define if the compiler supports basic C++17 syntax */
#undef HAVE_CXX17
/* Define to 1 if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* Define to 1 if you have the `floor' function. */
#undef HAVE_FLOOR
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if LIBBZ2 is found */
#undef HAVE_LIBBZ2
/* Define to 1 if LIBZ is found */
#undef HAVE_LIBZ
/* Define to 1 if your system has a GNU libc compatible `malloc' function, and
to 0 otherwise. */
#undef HAVE_MALLOC
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Define to 1 if you have the `pow' function. */
#undef HAVE_POW
/* Define to 1 if the system has the type `ptrdiff_t'. */
#undef HAVE_PTRDIFF_T
/* Define to 1 if you have the `rint' function. */
#undef HAVE_RINT
/* Define to 1 if you have the `sqrt' function. */
#undef HAVE_SQRT
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
/* Define to 1 if you have the `strchr' function. */
#undef HAVE_STRCHR
/* Define to 1 if you have the `strerror' function. */
#undef HAVE_STRERROR
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* Define to 1 if you have the <sys/ioctl.h> header file. */
#undef HAVE_SYS_IOCTL_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
/* Define to 1 if you have the <termios.h> header file. */
#undef HAVE_TERMIOS_H
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define to 1 if the system has the type `_Bool'. */
#undef HAVE__BOOL
/* Define to the sub-directory where libtool stores uninstalled libraries. */
#undef LT_OBJDIR
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
#define below would cause a syntax error. */
#undef _UINT32_T
/* Define for Solaris 2.5.1 so the uint64_t typedef from <sys/synch.h>,
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
#define below would cause a syntax error. */
#undef _UINT64_T
/* Define for Solaris 2.5.1 so the uint8_t typedef from <sys/synch.h>,
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
#define below would cause a syntax error. */
#undef _UINT8_T
/* Define to `__inline__' or `__inline' if that's what the C compiler
calls it, or to nothing if 'inline' is not supported under any name. */
#ifndef __cplusplus
#undef inline
#endif
/* Define to the type of a signed integer type of width exactly 32 bits if
such a type exists and the standard includes do not define it. */
#undef int32_t
/* Define to the type of a signed integer type of width exactly 64 bits if
such a type exists and the standard includes do not define it. */
#undef int64_t
/* Define to the type of a signed integer type of width exactly 8 bits if such
a type exists and the standard includes do not define it. */
#undef int8_t
/* Define to rpl_malloc if the replacement function should be used. */
#undef malloc
/* Define to `unsigned int' if <sys/types.h> does not define. */
#undef size_t
/* Define to the type of an unsigned integer type of width exactly 16 bits if
such a type exists and the standard includes do not define it. */
#undef uint16_t
/* Define to the type of an unsigned integer type of width exactly 32 bits if
such a type exists and the standard includes do not define it. */
#undef uint32_t
/* Define to the type of an unsigned integer type of width exactly 64 bits if
such a type exists and the standard includes do not define it. */
#undef uint64_t
/* Define to the type of an unsigned integer type of width exactly 8 bits if
such a type exists and the standard includes do not define it. */
#undef uint8_t
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <unordered_map>
#include <clipper/clipper.h>
#include "cif++/Structure.hpp"
namespace mmcif
{
class DistanceMap
{
public:
DistanceMap(const Structure& p, const clipper::Spacegroup& spacegroup, const clipper::Cell& cell,
float maxDistance);
// simplified version for subsets of atoms (used in refining e.g.)
// DistanceMap(const Structure& p, const std::vector<Atom>& atoms);
DistanceMap(const DistanceMap&) = delete;
DistanceMap& operator=(const DistanceMap&) = delete;
float operator()(const Atom& a, const Atom& b) const;
std::vector<Atom> near(const Atom& a, float maxDistance = 3.5f) const;
std::vector<Atom> near(const Point& p, float maxDistance = 3.5f) const;
static clipper::Coord_orth
CalculateOffsetForCell(const Structure& p, const clipper::Spacegroup& spacegroup, const clipper::Cell& cell);
static std::vector<clipper::RTop_orth>
AlternativeSites(const clipper::Spacegroup& spacegroup, const clipper::Cell& cell);
private:
typedef std::map<std::tuple<size_t,size_t>,std::tuple<float,int32_t>> DistMap;
void AddDistancesForAtoms(const Residue& a, const Residue& b, DistMap& dm, int32_t rtix);
const Structure& structure;
size_t dim;
std::unordered_map<std::string,size_t> index;
std::map<size_t,std::string> rIndex;
float mMaxDistance, mMaxDistanceSQ;
std::vector<std::tuple<float,int32_t>> mA;
std::vector<size_t> mIA, mJA;
Point mD; // needed to move atoms to center
std::vector<clipper::RTop_orth> mRtOrth;
};
}
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <clipper/clipper.h>
#include "cif++/Structure.hpp"
namespace mmcif
{
template<typename FTYPE>
class Map
{
public:
typedef FTYPE ftype;
typedef typename clipper::Xmap<ftype> Xmap;
Map();
~Map();
void calculateStats();
double rmsDensity() const { return mRMSDensity; }
double meanDensity() const { return mMeanDensity; }
operator Xmap& () { return mMap; }
operator const Xmap& () const { return mMap; }
Xmap& get() { return mMap; }
const Xmap& get() const { return mMap; }
// These routines work with CCP4 map files
void read(const std::string& f);
void write(const std::string& f);
void write_masked(std::ostream& os, clipper::Grid_range range);
void write_masked(const std::string& f,
clipper::Grid_range range);
clipper::Spacegroup spacegroup() const { return mMap.spacegroup(); }
clipper::Cell cell() const { return mMap.cell(); }
private:
Xmap mMap;
double mMinDensity, mMaxDensity;
double mRMSDensity, mMeanDensity;
};
using clipper::HKL_info;
using clipper::HKL_data;
using clipper::data32::F_phi;
using clipper::data32::F_sigF;
using clipper::data32::Phi_fom;
using clipper::data32::Flag;
using clipper::Spacegroup;
using clipper::Cell;
using clipper::Grid_sampling;
// --------------------------------------------------------------------
bool IsMTZFile(const std::string& p);
// --------------------------------------------------------------------
template<typename FTYPE>
class MapMaker
{
public:
typedef Map<FTYPE> MapType;
typedef typename MapType::Xmap Xmap;
enum AnisoScalingFlag {
as_None, as_Observed, as_Calculated
};
MapMaker();
~MapMaker();
MapMaker(const MapMaker&) = delete;
MapMaker& operator=(const MapMaker&) = delete;
void loadMTZ(const std::string& mtzFile,
float samplingRate,
std::initializer_list<std::string> fbLabels = { "FWT", "PHWT" },
std::initializer_list<std::string> fdLabels = { "DELFWT", "PHDELWT" },
std::initializer_list<std::string> foLabels = { "FP", "SIGFP" },
std::initializer_list<std::string> fcLabels = { "FC_ALL", "PHIC_ALL" },
std::initializer_list<std::string> faLabels = { "FAN", "PHAN" });
void loadMaps(
const std::string& fbMapFile,
const std::string& fdMapFile,
float reshi, float reslo);
// following works on both mtz files and structure factor files in CIF format
void calculate(const std::string& hklin,
const Structure& structure,
bool noBulk, AnisoScalingFlag anisoScaling,
float samplingRate, bool electronScattering = false,
std::initializer_list<std::string> foLabels = { "FP", "SIGFP" },
std::initializer_list<std::string> freeLabels = { "FREE" });
void recalc(const Structure& structure,
bool noBulk, AnisoScalingFlag anisoScaling,
float samplingRate, bool electronScattering = false);
void printStats();
void writeMTZ(const std::string& file,
const std::string& project, const std::string& crystal);
MapType& fb() { return mFb; }
MapType& fd() { return mFd; }
MapType& fa() { return mFa; }
const MapType& fb() const { return mFb; }
const MapType& fd() const { return mFd; }
const MapType& fa() const { return mFa; }
double resLow() const { return mResLow; }
double resHigh() const { return mResHigh; }
const Spacegroup& spacegroup() const { return mHKLInfo.spacegroup(); }
const Cell& cell() const { return mHKLInfo.cell(); }
const Grid_sampling& gridSampling() const { return mGrid; }
private:
void loadFoFreeFromReflectionsFile(const std::string& hklin);
void loadFoFreeFromMTZFile(const std::string& hklin,
std::initializer_list<std::string> foLabels,
std::initializer_list<std::string> freeLabels);
void fixMTZ();
MapType mFb, mFd, mFa;
Grid_sampling mGrid;
float mSamplingRate;
double mResLow, mResHigh;
int mNumRefln = 1000, mNumParam = 20;
// Cached raw data
HKL_info mHKLInfo;
HKL_data<F_sigF> mFoData;
HKL_data<Flag> mFreeData;
HKL_data<F_phi> mFcData, mFbData, mFdData, mFaData;
HKL_data<Phi_fom> mPhiFomData;
};
}
......@@ -32,8 +32,6 @@
#include <boost/math/quaternion.hpp>
#include "clipper/core/coords.h"
namespace mmcif
{
......@@ -60,7 +58,7 @@ struct PointF
PointF() : mX(0), mY(0), mZ(0) {}
PointF(FType x, FType y, FType z) : mX(x), mY(y), mZ(z) {}
PointF(const clipper::Coord_orth& pt): mX(pt[0]), mY(pt[1]), mZ(pt[2]) {}
// PointF(const clipper::Coord_orth& pt): mX(pt[0]), mY(pt[1]), mZ(pt[2]) {}
template<typename PF>
PointF(const PointF<PF>& pt)
......@@ -68,13 +66,13 @@ struct PointF
, mY(static_cast<F>(pt.mY))
, mZ(static_cast<F>(pt.mZ)) {}
PointF& operator=(const clipper::Coord_orth& rhs)
{
mX = rhs[0];
mY = rhs[1];
mZ = rhs[2];
return *this;
}
// PointF& operator=(const clipper::Coord_orth& rhs)
// {
// mX = rhs[0];
// mY = rhs[1];
// mZ = rhs[2];
// return *this;
// }
template<typename PF>
PointF& operator=(const PointF<PF>& rhs)
......@@ -171,10 +169,10 @@ struct PointF
mZ = p.R_component_4();
}
operator clipper::Coord_orth() const
{
return clipper::Coord_orth(mX, mY, mZ);
}
// operator clipper::Coord_orth() const
// {
// return clipper::Coord_orth(mX, mY, mZ);
// }
operator std::tuple<const FType&, const FType&, const FType&>() const
{
......
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/ResolutionCalculator.hpp"
#include <clipper/clipper.h>
#include <cmath>
namespace mmcif
{
// --------------------------------------------------------------------
class ResolutionCalculator
{
public:
ResolutionCalculator(double a, double b, double c,
double alpha, double beta, double gamma);
ResolutionCalculator(const clipper::Cell& cell);
double operator()(int h, int k, int l) const
{
double tmpres = h * h * mCoefs[0] + h * k * mCoefs[1] +
h * l * mCoefs[2] + k * k * mCoefs[3] +
k * l * mCoefs[4] + l * l * mCoefs[5];
return 1.0 / std::sqrt(tmpres);
}
private:
double mCoefs[6];
};
}
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/MapMaker.hpp"
#include "cif++/DistanceMap.hpp"
#include "cif++/BondMap.hpp"
namespace mmcif
{
// --------------------------------------------------------------------
struct AtomData;
class BoundingBox;
struct ResidueStatistics
{
std::string asymID;
int seqID;
std::string compID;
std::string authSeqID;
double RSR, SRSR, RSCCS, EDIAm, OPIA;
int ngrid;
};
std::ostream& operator<<(std::ostream& os, const ResidueStatistics& st);
// --------------------------------------------------------------------
class StatsCollector
{
public:
StatsCollector(const StatsCollector&) = delete;
StatsCollector& operator=(const StatsCollector&) = delete;
StatsCollector(const mmcif::MapMaker<float>& mm,
mmcif::Structure& structure, bool electronScattering);
virtual std::vector<ResidueStatistics> collect() const;
virtual std::vector<ResidueStatistics> collect(const std::string& asymID,
int resFirst, int resLast, bool authNameSpace = true) const;
virtual ResidueStatistics collect(std::initializer_list<const mmcif::Residue*> residues) const;
virtual ResidueStatistics collect(std::initializer_list<mmcif::Atom> atoms) const;
virtual ResidueStatistics collect(const std::vector<mmcif::Atom>& atoms) const;
protected:
// asym-seqid-compid
std::vector<ResidueStatistics> collect(
const std::vector<std::tuple<std::string,int,std::string,std::string>>& residues,
BoundingBox& bbox, bool addWaters) const;
void initialize();
virtual void calculate(std::vector<AtomData>& atomData) const;
struct cmpGPt
{
bool operator()(const clipper::Coord_grid& a, const clipper::Coord_grid& b) const
{
int d = a.u() - b.u();
if (d == 0)
d = a.v() - b.v();
if (d == 0)
d = a.w() - b.w();
return d < 0;
}
};
typedef std::map<clipper::Coord_grid,double,cmpGPt> GridPtDataMap;
mmcif::Structure& mStructure;
const mmcif::MapMaker<float>& mMapMaker;
clipper::Spacegroup mSpacegroup;
clipper::Cell mCell;
clipper::Grid_sampling mGrid;
float mResHigh, mResLow;
bool mElectronScattering;
std::map<std::string,std::pair<double,double>> mRmsScaled;
void collectSums(std::vector<AtomData>& atomData, GridPtDataMap& gridPointDensity) const;
void sumDensity(std::vector<AtomData>& atomData,
GridPtDataMap& gridPointDensity, std::map<std::string,std::vector<double>>& zScoresPerAsym) const;
// Other variables we cache
double mMeanDensityFb, mRMSDensityFb, mRMSDensityFd;
double mSZ; // average electron density in cell
double mVF; // degrees of freedom
double mVC; // cell volume?
};
// --------------------------------------------------------------------
class EDIAStatsCollector : public StatsCollector
{
public:
EDIAStatsCollector(mmcif::MapMaker<float>& mm,
mmcif::Structure& structure, bool electronScattering,
const mmcif::BondMap& bondMap);
protected:
virtual void calculate(std::vector<AtomData>& atomData) const;
mmcif::DistanceMap mDistanceMap;
const mmcif::BondMap& mBondMap;
std::map<mmcif::AtomType,float> mRadii;
};
}
......@@ -28,12 +28,6 @@
#include <numeric>
#include <boost/math/quaternion.hpp>
#include <boost/any.hpp>
#include <clipper/core/coords.h>
#include "cif++/AtomType.hpp"
#include "cif++/Point.hpp"
#include "cif++/Compound.hpp"
......@@ -87,10 +81,10 @@ class Atom
Point location() const;
void location(Point p);
Atom symmetryCopy(const Point& d, const clipper::RTop_orth& rt);
bool isSymmetryCopy() const;
std::string symmetry() const;
const clipper::RTop_orth& symop() const;
// Atom symmetryCopy(const Point& d, const clipper::RTop_orth& rt);
// bool isSymmetryCopy() const;
// std::string symmetry() const;
// const clipper::RTop_orth& symop() const;
const Compound& comp() const;
bool isWater() const;
......@@ -126,8 +120,8 @@ class Atom
bool operator==(const Atom& rhs) const;
// get clipper format Atom
clipper::Atom toClipper() const;
// // get clipper format Atom
// clipper::Atom toClipper() const;
// Radius calculation based on integrating the density until perc of electrons is found
void calculateRadius(float resHigh, float resLow, float perc);
......
......@@ -26,137 +26,9 @@
#pragma once
#include "cif++/Structure.hpp"
namespace mmcif
{
// --------------------------------------------------------------------
// Functions to use when working with symmetry stuff
clipper::Coord_orth CalculateOffsetForCell(const Structure& p, const clipper::Spacegroup& spacegroup, const clipper::Cell& cell);
std::vector<clipper::RTop_orth> AlternativeSites(const clipper::Spacegroup& spacegroup, const clipper::Cell& cell);
int GetSpacegroupNumber(std::string spacegroup); // alternative for clipper's parsing code
// std::string SpacegroupToHall(std::string spacegroup);
// --------------------------------------------------------------------
// To iterate over all symmetry copies of an atom
class SymmetryAtomIteratorFactory
{
public:
// SymmetryAtomIteratorFactory(const Structure& p);
// SymmetryAtomIteratorFactory(const Structure& p, const clipper::Spacegroup& spacegroup, const clipper::Cell& cell)
// : mSpacegroupNr(spacegroup.spacegroup_number())
// , mSpacegroup(spacegroup)
// , mD(CalculateOffsetForCell(p, spacegroup, cell))
// , mRtOrth(AlternativeSites(spacegroup, cell))
// , mCell(cell) {}
SymmetryAtomIteratorFactory(const Structure& p, int spacegroupNr, const clipper::Cell& cell);
SymmetryAtomIteratorFactory(const SymmetryAtomIteratorFactory&) = delete;
SymmetryAtomIteratorFactory& operator=(const SymmetryAtomIteratorFactory&) = delete;
class SymmetryAtomIterator : public std::iterator<std::forward_iterator_tag, const Atom>
{
public:
typedef std::iterator<std::forward_iterator_tag, const Atom> baseType;
typedef typename baseType::pointer pointer;
typedef typename baseType::reference reference;
SymmetryAtomIterator(const SymmetryAtomIteratorFactory& factory, const Atom& atom)
: m_f(&factory), m_i(0), m_a(atom), m_c(atom) {}
SymmetryAtomIterator(const SymmetryAtomIteratorFactory& factory, const Atom& atom, int)
: SymmetryAtomIterator(factory, atom)
{
m_i = m_f->mRtOrth.size();
}
SymmetryAtomIterator(const SymmetryAtomIterator& iter)
: m_f(iter.m_f), m_i(iter.m_i), m_a(iter.m_a), m_c(iter.m_c) {}
SymmetryAtomIterator& operator=(const SymmetryAtomIterator& iter)
{
if (this != &iter)
{
m_f = iter.m_f;
m_i = iter.m_i;
m_a = iter.m_a;
m_c = iter.m_c;
}
return *this;
}
reference operator*() { return m_c; }
pointer operator->() { return &m_c; }
SymmetryAtomIterator operator++()
{
if (++m_i < m_f->mRtOrth.size())
m_c = m_a.symmetryCopy(m_f->mD, m_f->mRtOrth[m_i]);
return *this;
}
SymmetryAtomIterator operator++(int)
{
SymmetryAtomIterator result(*this);
this->operator++();
return result;
}
bool operator==(const SymmetryAtomIterator& iter) const
{
return m_f == iter.m_f and m_i == iter.m_i;
}
bool operator!=(const SymmetryAtomIterator& iter) const
{
return m_f != iter.m_f or m_i != iter.m_i;
}
private:
const SymmetryAtomIteratorFactory* m_f;
size_t m_i;
Atom m_a, m_c;
};
class SymmetryAtomIteratorRange
{
public:
SymmetryAtomIteratorRange(const SymmetryAtomIteratorFactory& f, const Atom& a)
: m_f(f), m_a(a) {}
SymmetryAtomIterator begin()
{
return SymmetryAtomIterator(m_f, m_a);
}
SymmetryAtomIterator end()
{
return SymmetryAtomIterator(m_f, m_a, 1);
}
private:
const SymmetryAtomIteratorFactory& m_f;
Atom m_a;
};
SymmetryAtomIteratorRange operator()(const Atom& a) const
{
return SymmetryAtomIteratorRange(*this, a);
}
std::string symop_mmcif(const Atom& a) const;
private:
int mSpacegroupNr;
clipper::Spacegroup mSpacegroup;
Point mD; // needed to move atoms to center
std::vector<clipper::RTop_orth> mRtOrth;
clipper::Cell mCell;
};
}
......@@ -24,6 +24,8 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <vector>
#include <string>
#include <tuple>
......
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <thread>
#include <mutex>
#include "cif++/Structure.hpp"
#include "cif++/AtomShape.hpp"
#include "cif++/Point.hpp"
#include <newuoa.h>
using namespace std;
namespace mmcif
{
// --------------------------------------------------------------------
template <class F>
NewuoaClosure make_closure(F &function) {
struct Wrap {
static double call(void *data, long n, const double *values) {
return reinterpret_cast<F *>(data)->operator()(n, values);
}
};
return NewuoaClosure {&function, &Wrap::call};
}
// --------------------------------------------------------------------
// sine integration function based on the description in
// GalSim: The modular galaxy image simulation toolkit
// B.T.P. Rowe et. al.
// DOI: 10.1016/j.ascom.2015.02.002
double sineIntegration(double x)
{
struct P { double n, d; };
double result = 0;
if (x <= 4)
{
const P kP[] =
{
{ 1, 1, },
{ -4.54393409816329991e-2, 1.01162145739225565e-2 },
{ 1.15457225751016682e-3, 4.99175116169755106e-5 },
{ -1.41018536821330254e-5, 1.55654986308745614e-7 },
{ 9.43280809438713025e-8, 3.28067571055789734e-10 },
{ -3.53201978997168357e-10, 4.5049097575386581e-13 },
{ 7.08240282274875911e-13, 3.21107051193712168e-16 },
{ -6.05338212010422477e-16, 0 }
};
double xs = x * x;
double xi = 1;
double sn = 0, sd = 0;
for (auto p: kP)
{
sn += xi * p.n;
sd += xi * p.d;
xi *= xs;
}
result = x * (sn / sd);
}
else
{
const P
kF[] =
{
{ 1, 1, },
{ 7.44437068161936700618e2, 7.46437068161927678031e2, },
{ 1.96396372895146869801e5, 1.97865247031583951450e5, },
{ 2.37750310125431834034e7, 2.41535670165126845144e7, },
{ 1.43073403821274636888e9, 1.47478952192985464958e9, },
{ 4.33736238870432522765e10, 4.58595115847765779830e10, },
{ 6.40533830574022022911e11, 7.08501308149515401563e11, },
{ 4.20968180571076940208e12, 5.06084464593475076774e12, },
{ 1.00795182980368574617e13, 1.43468549171581016479e13, },
{ 4.94816688199951963482e12, 1.11535493509914254097e13, },
{ -4.94701168645415959931e11, 0 }
},
kG[] = {
{ 1, 1, },
{ 8.1359520115168615e2, 8.19595201151451564e2, },
{ 2.35239181626478200e5, 2.40036752835578777e5, },
{ 3.12557570795778731e7, 3.26026661647090822e7, },
{ 2.06297595146763354e9, 2.23355543278099360e9, },
{ 6.83052205423625007e10, 7.87465017341829930e10, },
{ 1.09049528450362786e12, 1.39866710696414565e12, },
{ 7.57664583257834349e12, 1.17164723371736605e13, },
{ 1.81004487464664575e13, 4.01839087307656620e13, },
{ 6.43291613143049485e12, 3.99653257887490811e13, },
{ -1.36517137670871689e12, 0 }
};
double xs = pow(x, -2);
double xi = 1;
double sn = 0, sd = 0;
for (auto p: kF)
{
sn += xi * p.n;
sd += xi * p.d;
xi *= xs;
}
double fx = (sn / sd) / x;
sn = 0;
sd = 0;
xi = 1;
for (auto p : kG)
{
sn += xi * p.n;
sd += xi * p.d;
xi *= xs;
}
double gx = (sn / sd) / (x * x);
result = mmcif::kPI / 2 - fx * cos(x) - gx * sin(x);
}
return result;
}
// --------------------------------------------------------------------
// Internal class to cache some common data
class DensityIntegration
{
public:
DensityIntegration(float resolutionLow, float resolutionHigh);
static DensityIntegration& instance(float resolutionLow, float resolutionHigh);
double integrateRadius(float perc, float occupancy, double yi, const vector<double>& fst) const;
double integrateDensity(double r, int ks, const vector<double>& fst) const;
float a() const { return mA; }
float b() const { return mB; }
const vector<double>& st() const { return mST; }
const vector<double>& sts() const { return mSTS; }
const vector<double>& wa() const { return mWA; }
private:
float mA, mB;
int mM;
// Gauss-Legendre quadrature weights and abscissae
vector<double> mWA, mST, mSTS;
static list<DensityIntegration> sInstances;
};
list<DensityIntegration> DensityIntegration::sInstances;
DensityIntegration& DensityIntegration::instance(float resolutionLow, float resolutionHigh)
{
static mutex m;
lock_guard<mutex> lock(m);
float a = 0.5f / resolutionLow, b = 0.5f / resolutionHigh;
auto i = find_if(sInstances.begin(), sInstances.end(), [=](const DensityIntegration& di)
{ return di.mA == a and di.mB == b; });
if (i == sInstances.end())
{
sInstances.emplace_back(resolutionLow, resolutionHigh);
i = prev(sInstances.end());
}
return *i;
}
DensityIntegration::DensityIntegration(float resolutionLow, float resolutionHigh)
{
mA = 0.5f / resolutionLow;
mB = 0.5f / resolutionHigh;
mM = static_cast<int>(12.0 * sqrt(1.2 * mB) + 1);
if (mM < 3)
mM = 3;
int N = 2 * mM;
// int J = N;
double xr = mB - mA;
double xh = .5 * xr;
double xm = 0.5 * (mB + mA);
mWA = vector<double>(N, 0);
mST = vector<double>(N, 0);
mSTS = vector<double>(N, 0);
for (int i = 1, j = N; i <= mM; ++i, --j)
{
double z, zo, dp;
z = cos(mmcif::kPI * (i - 0.25) / (N + 0.5));
do
{
double p1 = 1;
double p2 = 0;
for (int k = 1; k <= N; ++k)
{
double p3 = p2;
p2 = p1;
p1 = ((2 * k - 1) * z * p2 - (k - 1) * p3) / k;
}
dp = N * (z * p1 - p2) / (z * z - 1);
zo = z;
z = z - p1 / dp;
}
while (abs(z - zo) > 3e-14);
mWA[i - 1] = xr / ((1 - z * z) * dp * dp);
mWA[j - 1] = mWA[i - 1];
mST[i - 1] = xm - xh * z;
mST[j - 1] = xm + xh * z;
}
transform(mST.begin(), mST.end(), mSTS.begin(), [](double s) { return s * s; });
}
// --------------------------------------------------------------------
// Calculate Radius integral over r of calculated density
// code inspired by radint.f in edstats
double DensityIntegration::integrateDensity(double r, int ks, const vector<double>& fst) const
{
double y = 0;
double rt = r;
if (rt < 0)
rt = 0;
if (rt > 1e-10)
{
double t = 4 * mmcif::kPI * rt;
y = 0;
for (size_t i = 0; i < mST.size(); ++i)
y += fst[i] * sineIntegration(t * mST[i]);
if (r < 0)
y = y - ks * r;
}
return ks * y;
}
double DensityIntegration::integrateRadius(float perc, float occupancy, double yi, const vector<double>& fst) const
{
double yt = perc * 0.25 * mmcif::kPI * occupancy * yi;
double initialValue = 0.25;
// code from newuoa-example
const long variables_count = 1;
const long number_of_interpolation_conditions = (variables_count + 1)*(variables_count + 2)/2;
double variables_values[] = { initialValue };
const double initial_trust_region_radius = 1e-3;
const double final_trust_region_radius = 1e3;
const long max_function_calls_count = 100;
const size_t working_space_size = NEWUOA_WORKING_SPACE_SIZE(variables_count,
number_of_interpolation_conditions);
double working_space[working_space_size];
auto function = [&] (long n, const double *x)
{
assert(n == 1);
return this->integrateDensity(x[0], -1, fst);
};
auto closure = make_closure(function);
double result = newuoa_closure(
&closure,
variables_count,
number_of_interpolation_conditions,
variables_values,
initial_trust_region_radius,
final_trust_region_radius,
max_function_calls_count,
working_space);
//
const double kRE = 5e-5;
double y1 = 0;
double y2 = -result;
double x1 = 0;
double x2 = variables_values[0];
if (y2 > yt)
{
for (int it = 0; it < 100; ++it)
{
double x = 0.5 * (x1 + x2);
double y = integrateDensity(x, 1, fst);
if (abs(y - yt) < kRE * abs(yt))
{
result = x;
break;
}
if ((y1 < yt and y < yt) or (y1 > yt and y > yt))
{
x1 = x;
y1 = y;
}
else
{
x2 = x;
y2 = y;
}
}
}
else
result = x2;
return result;
}
// --------------------------------------------------------------------
struct AtomShapeImpl
{
AtomShapeImpl(Point location, AtomType symbol, int charge, float uIso, float occupancy, float resHigh, float resLow, bool electronScattering)
: mSymbol(symbol), mCharge(charge), mUIso(uIso), mOccupancy(occupancy)
, mResHigh(resHigh), mResLow(resLow)
, mElectronScattering(electronScattering)
, mLocation(location)
, mIntegrator(DensityIntegration::instance(resLow, resHigh))
{
auto st = mIntegrator.st();
auto sts = mIntegrator.sts();
auto wa = mIntegrator.wa();
mYi = 0;
mFst = vector<double>(st.size(), 0);
auto& D =
mElectronScattering ? AtomTypeTraits(symbol).elsf() : AtomTypeTraits(symbol).wksf(charge);
auto bIso = clipper::Util::u2b(uIso);
float as = mIntegrator.a() * mIntegrator.a();
float bs = mIntegrator.b() * mIntegrator.b();
for (int i = 0; i < 6; ++i)
{
double bi = D.b[i] + bIso;
mYi += D.a[i] * (exp(-bi * as) - exp(-bi * bs)) / bi;
}
for (size_t i = 0; i < st.size(); ++i)
{
double t = 0;
for (int j = 0; j < 6; ++j)
{
double bj = D.b[j] + bIso;
t += D.a[j] * exp(-bj * sts[i]);
}
mFst[i] = occupancy * wa[i] * t * st[i];
}
for (size_t i = 0; i < 6; ++i)
{
mBW[i] = -4 * kPI * kPI / (D.b[i] + bIso);
mAW[i] = D.a[i] * pow(-mBW[i] / kPI, 1.5);
}
}
virtual ~AtomShapeImpl() {}
AtomType mSymbol;
int mCharge;
float mUIso, mOccupancy;
float mResHigh, mResLow;
bool mElectronScattering;
Point mLocation;
const DensityIntegration& mIntegrator;
double mYi;
vector<double> mFst;
float mAW[6], mBW[6];
float integratedRadius(float perc) const
{
float result = mIntegrator.integrateRadius(perc, mOccupancy, mYi, mFst);
assert(not isnan(result));
return result;
}
virtual float calculatedDensity(float r) const
{
float rsq = r * r;
return mOccupancy *
(
mAW[0] * exp(mBW[0] * rsq) + mAW[1] * exp(mBW[1] * rsq) +
mAW[2] * exp(mBW[2] * rsq) + mAW[3] * exp(mBW[3] * rsq) +
mAW[4] * exp(mBW[4] * rsq) + mAW[5] * exp(mBW[5] * rsq)
);
}
virtual float calculatedDensity(Point p) const
{
return calculatedDensity(Distance(mLocation, p));
}
};
struct AtomShapeAnisoImpl : public AtomShapeImpl
{
AtomShapeAnisoImpl(Point location, AtomType symbol, int charge, clipper::U_aniso_orth& anisou, float occupancy, float resHigh, float resLow, bool electronScattering)
: AtomShapeImpl(location, symbol, charge, anisou.u_iso(), occupancy, resHigh, resLow, electronScattering)
, mAnisoU(anisou)
{
auto& D =
mElectronScattering ? AtomTypeTraits(symbol).elsf() : AtomTypeTraits(symbol).wksf(charge);
const float fourpi2 = 4 * kPI * kPI;
const float pi3 = kPI * kPI * kPI;
for (int i = 0; i < 6; ++i)
{
mAnisoInv[i] = clipper::Mat33sym<>(
-2 * mAnisoU.mat00() - D.b[i] / fourpi2,
-2 * mAnisoU.mat11() - D.b[i] / fourpi2,
-2 * mAnisoU.mat22() - D.b[i] / fourpi2,
-2 * mAnisoU.mat01(),
-2 * mAnisoU.mat02(),
-2 * mAnisoU.mat12()).inverse();
double det = - mAnisoInv[i].det();
mAW[i] = D.a[i] * sqrt(det / pi3);
mBW[i] = -pow(det, 1.0/3);
}
}
virtual float calculatedDensity(Point p) const
{
const clipper::Coord_orth dxyz(p - mLocation);
return mOccupancy *
(
mAW[0] * exp(mAnisoInv[0].quad_form(dxyz)) +
mAW[1] * exp(mAnisoInv[1].quad_form(dxyz)) +
mAW[2] * exp(mAnisoInv[2].quad_form(dxyz)) +
mAW[3] * exp(mAnisoInv[3].quad_form(dxyz)) +
mAW[4] * exp(mAnisoInv[4].quad_form(dxyz)) +
mAW[5] * exp(mAnisoInv[5].quad_form(dxyz))
);
}
clipper::U_aniso_orth mAnisoU;
array<clipper::Mat33sym<>,6> mAnisoInv;
};
// --------------------------------------------------------------------
AtomShape::AtomShape(const Atom& atom, float resHigh, float resLow, bool electronScattering)
: mImpl(nullptr)
{
float anisou[6];
if (atom.getAnisoU(anisou))
{
clipper::U_aniso_orth u(anisou[0], anisou[3], anisou[5], anisou[1], anisou[2], anisou[4]);
while (u.det() < 1.0e-20)
u = u + clipper::U_aniso_orth(0.01, 0.01, 0.01, 0, 0, 0);
mImpl = new AtomShapeAnisoImpl(atom.location(), atom.type(), atom.charge(), u, atom.occupancy(), resHigh, resLow, electronScattering);
}
else
mImpl = new AtomShapeImpl(atom.location(), atom.type(), atom.charge(), atom.uIso(), atom.occupancy(), resHigh, resLow, electronScattering);
}
AtomShape::AtomShape(const Atom& atom, float resHigh, float resLow, bool electronScattering, float bFactor)
: mImpl(new AtomShapeImpl(atom.location(), atom.type(), atom.charge(), clipper::Util::b2u(bFactor),
1.0, resHigh, resLow, electronScattering))
{
}
AtomShape::~AtomShape()
{
delete mImpl;
}
float AtomShape::radius() const
{
return mImpl->integratedRadius(0.95);
}
float AtomShape::calculatedDensity(float r) const
{
return mImpl->calculatedDensity(r);
}
float AtomShape::calculatedDensity(Point p) const
{
return mImpl->calculatedDensity(p);
}
}
......@@ -24,6 +24,8 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cmath>
#include "cif++/AtomType.hpp"
#include "cif++/Cif++.hpp"
......@@ -35,7 +37,7 @@ namespace mmcif
namespace data
{
const float kNA = nan("1");
const float kNA = std::nan("1");
const AtomTypeInfo kKnownAtoms[] =
{
......
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cif++/Config.hpp"
#include "cif++/Cif++.hpp"
#include "cif++/BondMap.hpp"
#include "cif++/Compound.hpp"
#include "cif++/CifUtils.hpp"
using namespace std;
namespace mmcif
{
// --------------------------------------------------------------------
BondMap::BondMap(const Structure& p)
{
auto atoms = p.atoms();
dim = atoms.size();
// bond = vector<bool>(dim * (dim - 1), false);
for (auto& atom: atoms)
{
size_t ix = index.size();
index[atom.id()] = ix;
};
auto bindAtoms = [this](const string& a, const string& b)
{
uint32_t ixa = index[a];
uint32_t ixb = index[b];
bond.insert(key(ixa, ixb));
};
auto linkAtoms = [this,&bindAtoms](const string& a, const string& b)
{
bindAtoms(a, b);
link[a].insert(b);
link[b].insert(a);
};
cif::Datablock& db = p.getFile().data();
// collect all compounds first
set<string> compounds;
for (auto c: db["chem_comp"])
compounds.insert(c["id"].as<string>());
// make sure we also have all residues in the polyseq
for (auto m: db["entity_poly_seq"])
{
string c = m["mon_id"].as<string>();
if (compounds.count(c))
continue;
if (cif::VERBOSE > 1)
cerr << "Warning: mon_id " << c << " is missing in the chem_comp category" << endl;
compounds.insert(c);
}
cif::Progress progress(compounds.size(), "Creating bond map");
// some helper indices to speed things up a bit
map<tuple<string,int,string>,string> atomMapByAsymSeqAndAtom;
for (auto& a: p.atoms())
{
auto key = make_tuple(a.labelAsymID(), a.labelSeqID(), a.labelAtomID());
atomMapByAsymSeqAndAtom[key] = a.id();
}
// first link all residues in a polyseq
string lastAsymID;
int lastSeqID = 0;
for (auto r: db["pdbx_poly_seq_scheme"])
{
string asymID;
int seqID;
cif::tie(asymID, seqID) = r.get("asym_id", "seq_id");
if (asymID != lastAsymID) // first in a new sequece
{
lastAsymID = asymID;
lastSeqID = seqID;
continue;
}
auto c = atomMapByAsymSeqAndAtom[make_tuple(asymID, lastSeqID, "C")];
auto n = atomMapByAsymSeqAndAtom[make_tuple(asymID, seqID, "N")];
// auto c = db["atom_site"].find(cif::Key("label_asym_id") == asymID and cif::Key("label_seq_id") == lastSeqID and cif::Key("label_atom_id") == "C");
// if (c.size() != 1 and VERBOSE > 1)
// cerr << "Unexpected number (" << c.size() << ") of atoms with atom ID C in asym_id " << asymID << " with seq id " << lastSeqID << endl;
//
// auto n = db["atom_site"].find(cif::Key("label_asym_id") == asymID and cif::Key("label_seq_id") == seqID and cif::Key("label_atom_id") == "N");
// if (n.size() != 1 and VERBOSE > 1)
// cerr << "Unexpected number (" << n.size() << ") of atoms with atom ID N in asym_id " << asymID << " with seq id " << seqID << endl;
//
// if (not (c.empty() or n.empty()))
// bindAtoms(c.front()["id"].as<string>(), n.front()["id"].as<string>());
if (not (c.empty() or n.empty()))
bindAtoms(c, n);
lastSeqID = seqID;
}
for (auto l: db["struct_conn"])
{
string asym1, asym2, atomId1, atomId2;
int seqId1 = 0, seqId2 = 0;
cif::tie(asym1, asym2, atomId1, atomId2, seqId1, seqId2) =
l.get("ptnr1_label_asym_id", "ptnr2_label_asym_id",
"ptnr1_label_atom_id", "ptnr2_label_atom_id",
"ptnr1_label_seq_id", "ptnr2_label_seq_id");
string a = atomMapByAsymSeqAndAtom[make_tuple(asym1, seqId1, atomId1)];
string b = atomMapByAsymSeqAndAtom[make_tuple(asym2, seqId2, atomId2)];
// auto a =
// l["ptnr1_label_seq_id"].empty() ?
// db["atom_site"].find(cif::Key("label_asym_id") == asym1 and cif::Key("label_atom_id") == atomId1) :
// db["atom_site"].find(cif::Key("label_asym_id") == asym1 and cif::Key("label_seq_id") == seqId1 and cif::Key("label_atom_id") == atomId1);
//
// if (a.size() != 1 and VERBOSE > 1)
// cerr << "Unexpected number (" << a.size() << ") of atoms for link with asym_id " << asym1 << " seq_id " << seqId1 << " atom_id " << atomId1 << endl;
// auto b =
// l["ptnr2_label_seq_id"].empty() ?
// db["atom_site"].find(cif::Key("label_asym_id") == asym2 and cif::Key("label_atom_id") == atomId2) :
// db["atom_site"].find(cif::Key("label_asym_id") == asym2 and cif::Key("label_seq_id") == seqId2 and cif::Key("label_atom_id") == atomId2);
//
// if (b.size() != 1 and VERBOSE > 1)
// cerr << "Unexpected number (" << b.size() << ") of atoms for link with asym_id " << asym2 << " seq_id " << seqId2 << " atom_id " << atomId2 << endl;
// if (not (a.empty() or b.empty()))
// linkAtoms(a.front()["id"].as<string>(), b.front()["id"].as<string>());
if (not (a.empty() or b.empty()))
linkAtoms(a, b);
}
// then link all atoms in the compounds
for (auto c: compounds)
{
auto* compound = mmcif::Compound::create(c);
if (not compound)
{
if (cif::VERBOSE)
cerr << "Missing compound information for " << c << endl;
continue;
}
if (compound->isWater())
{
if (cif::VERBOSE)
cerr << "skipping water in bond map calculation" << endl;
continue;
}
// loop over poly_seq_scheme
for (auto r: db["pdbx_poly_seq_scheme"].find(cif::Key("mon_id") == c))
{
string asymID;
int seqID;
cif::tie(asymID, seqID) = r.get("asym_id", "seq_id");
vector<Atom> rAtoms;
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
[&](auto& a) { return a.labelAsymID() == asymID and a.labelSeqID() == seqID; });
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
{
for (uint32_t j = i + 1; j < rAtoms.size(); ++j)
{
if (compound->atomsBonded(rAtoms[i].labelAtomID(), rAtoms[j].labelAtomID()))
bindAtoms(rAtoms[i].id(), rAtoms[j].id());
}
}
}
// loop over pdbx_nonpoly_scheme
for (auto r: db["pdbx_nonpoly_scheme"].find(cif::Key("mon_id") == c))
{
string asymID;
cif::tie(asymID) = r.get("asym_id");
vector<Atom> rAtoms;
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
[&](auto& a) { return a.labelAsymID() == asymID; });
// for (auto a: db["atom_site"].find(cif::Key("label_asym_id") == asymID))
// rAtoms.push_back(p.getAtomByID(a["id"].as<string>()));
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
{
for (uint32_t j = i + 1; j < rAtoms.size(); ++j)
{
if (compound->atomsBonded(rAtoms[i].labelAtomID(), rAtoms[j].labelAtomID()))
{
uint32_t ixa = index[rAtoms[i].id()];
uint32_t ixb = index[rAtoms[j].id()];
bond.insert(key(ixa, ixb));
}
}
}
}
}
// start by creating an index for single bonds
//cout << "Maken van b1_2 voor " << bond.size() << " bindingen" << endl;
multimap<uint32_t,uint32_t> b1_2;
for (auto& bk: bond)
{
uint32_t a, b;
tie(a, b) = dekey(bk);
b1_2.insert({ a, b });
b1_2.insert({ b, a });
}
//cout << "Afmeting b1_2: " << b1_2.size() << endl;
multimap<uint32_t,uint32_t> b1_3;
for (uint32_t i = 0; i < dim; ++i)
{
auto a = b1_2.equal_range(i);
vector<uint32_t> s;
for (auto j = a.first; j != a.second; ++j)
s.push_back(j->second);
for (size_t si1 = 0; si1 + 1 < s.size(); ++si1)
{
for (size_t si2 = si1 + 1; si2 < s.size(); ++si2)
{
uint32_t x = s[si1];
uint32_t y = s[si2];
if (isBonded(x, y))
continue;
b1_3.insert({ x, y });
b1_3.insert({ y, x });
}
}
}
//cout << "Afmeting b1_3: " << b1_3.size() << endl;
for (uint32_t i = 0; i < dim; ++i)
{
auto a1 = b1_2.equal_range(i);
auto a2 = b1_3.equal_range(i);
for (auto ai1 = a1.first; ai1 != a1.second; ++ai1)
{
for (auto ai2 = a2.first; ai2 != a2.second; ++ai2)
{
uint32_t b1 = ai1->second;
uint32_t b2 = ai2->second;
if (isBonded(b1, b2))
continue;
bond_1_4.insert(key(b1, b2));
}
}
}
//cout << "Afmeting b1_4: " << bond_1_4.size() << endl;
}
vector<string> BondMap::linked(const Atom& a) const
{
auto i = link.find(a.id());
vector<string> result;
if (i != link.end())
result = vector<string>(i->second.begin(), i->second.end());
return result;
}
}
......@@ -34,13 +34,7 @@
#include <numeric>
#include <fstream>
#if __has_include(<filesystem>)
#include <filesystem>
namespace fs = std::filesystem;
#elif __has_include(<boost/filesystem.hpp>)
#include <boost/filesystem.hpp>
namespace fs = boost::filesystem;
#endif
#include <boost/algorithm/string.hpp>
#include <boost/iostreams/filtering_stream.hpp>
......@@ -56,6 +50,7 @@ namespace fs = boost::filesystem;
using namespace std;
namespace ba = boost::algorithm;
namespace io = boost::iostreams;
namespace fs = std::filesystem;
namespace cif
{
......
This source diff could not be displayed because it is too large. You can view the blob instead.
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cif++/Config.hpp"
#include <atomic>
#include <mutex>
#include "cif++/DistanceMap.hpp"
#include "cif++/CifUtils.hpp"
using namespace std;
//#define DEBUG_VOOR_BART
namespace mmcif
{
// --------------------------------------------------------------------
inline ostream& operator<<(ostream& os, const Atom& a)
{
os << a.labelAsymID() << ':' << a.labelSeqID() << '/' << a.labelAtomID();
return os;
}
// --------------------------------------------------------------------
clipper::Coord_orth DistanceMap::CalculateOffsetForCell(const Structure& p, const clipper::Spacegroup& spacegroup, const clipper::Cell& cell)
{
auto& atoms = p.atoms();
size_t dim = atoms.size();
vector<clipper::Coord_orth> locations;
locations.reserve(dim);
// bounding box
Point pMin(numeric_limits<float>::max(), numeric_limits<float>::max(), numeric_limits<float>::max()),
pMax(numeric_limits<float>::min(), numeric_limits<float>::min(), numeric_limits<float>::min());
for (auto& atom: atoms)
{
auto p = atom.location();
locations.push_back(p);
if (pMin.mX > p.mX)
pMin.mX = p.mX;
if (pMin.mY > p.mY)
pMin.mY = p.mY;
if (pMin.mZ > p.mZ)
pMin.mZ = p.mZ;
if (pMax.mX < p.mX)
pMax.mX = p.mX;
if (pMax.mY < p.mY)
pMax.mY = p.mY;
if (pMax.mZ < p.mZ)
pMax.mZ = p.mZ;
};
// correct locations so that the median of x, y and z are inside the cell
vector<float> c(dim);
auto median = [&]()
{
return dim % 1 == 0
? c[dim / 2]
: (c[dim / 2 - 1] + c[dim / 2]) / 2;
};
transform(locations.begin(), locations.end(), c.begin(), [](auto& l) { return l[0]; });
sort(c.begin(), c.end());
float mx = median();
transform(locations.begin(), locations.end(), c.begin(), [](auto& l) { return l[1]; });
sort(c.begin(), c.end());
float my = median();
transform(locations.begin(), locations.end(), c.begin(), [](auto& l) { return l[2]; });
sort(c.begin(), c.end());
float mz = median();
if (cif::VERBOSE > 1)
cerr << "median position of atoms: " << Point(mx, my, mz) << endl;
auto calculateD = [&](float m, float c)
{
float d = 0;
if (c != 0)
{
while (m + d < -(c / 2))
d += c;
while (m + d > (c / 2))
d -= c;
}
return d;
};
Point D;
if (cell.a() == 0 or cell.b() == 0 or cell.c() == 0)
throw runtime_error("Invalid cell, contains a dimension that is zero");
D.mX = calculateD(mx, cell.a());
D.mY = calculateD(my, cell.b());
D.mZ = calculateD(mz, cell.c());
if (D.mX != 0 or D.mY != 0 or D.mZ != 0)
{
if (cif::VERBOSE)
cerr << "moving coorinates by " << D.mX << ", " << D.mY << " and " << D.mZ << endl;
}
return D;
}
// --------------------------------------------------------------------
vector<clipper::RTop_orth> DistanceMap::AlternativeSites(const clipper::Spacegroup& spacegroup,
const clipper::Cell& cell)
{
vector<clipper::RTop_orth> result;
// to make the operation at index 0 equal to identity
result.push_back(clipper::RTop_orth::identity());
for (int i = 0; i < spacegroup.num_symops(); ++i)
{
const auto& symop = spacegroup.symop(i);
for (int u: { -1, 0, 1})
for (int v: { -1, 0, 1})
for (int w: { -1, 0, 1})
{
if (i == 0 and u == 0 and v == 0 and w == 0)
continue;
auto rtop = clipper::RTop_frac(
symop.rot(), symop.trn() + clipper::Vec3<>(u, v, w)
).rtop_orth(cell);
result.push_back(move(rtop));
}
}
return result;
}
// --------------------------------------------------------------------
DistanceMap::DistanceMap(const Structure& p, const clipper::Spacegroup& spacegroup, const clipper::Cell& cell,
float maxDistance)
: structure(p), dim(0), mMaxDistance(maxDistance), mMaxDistanceSQ(maxDistance * maxDistance)
{
auto& atoms = p.atoms();
dim = atoms.size();
vector<clipper::Coord_orth> locations(dim);
// bounding box
Point pMin(numeric_limits<float>::max(), numeric_limits<float>::max(), numeric_limits<float>::max()),
pMax(numeric_limits<float>::min(), numeric_limits<float>::min(), numeric_limits<float>::min());
for (auto& atom: atoms)
{
size_t ix = index.size();
index[atom.id()] = ix;
rIndex[ix] = atom.id();
locations[ix] = atom.location();
auto p = atom.location();
if (pMin.mX > p.mX)
pMin.mX = p.mX;
if (pMin.mY > p.mY)
pMin.mY = p.mY;
if (pMin.mZ > p.mZ)
pMin.mZ = p.mZ;
if (pMax.mX < p.mX)
pMax.mX = p.mX;
if (pMax.mY < p.mY)
pMax.mY = p.mY;
if (pMax.mZ < p.mZ)
pMax.mZ = p.mZ;
};
// correct locations so that the median of x, y and z are inside the cell
vector<float> c(locations.size());
auto median = [&]()
{
return dim % 1 == 0
? c[dim / 2]
: (c[dim / 2 - 1] + c[dim / 2]) / 2;
};
transform(locations.begin(), locations.end(), c.begin(), [](auto& l) { return l[0]; });
sort(c.begin(), c.end());
float mx = median();
transform(locations.begin(), locations.end(), c.begin(), [](auto& l) { return l[1]; });
sort(c.begin(), c.end());
float my = median();
transform(locations.begin(), locations.end(), c.begin(), [](auto& l) { return l[2]; });
sort(c.begin(), c.end());
float mz = median();
if (cif::VERBOSE > 1)
cerr << "median position of atoms: " << Point(mx, my, mz) << endl;
auto calculateD = [&](float m, float c)
{
float d = 0;
while (m + d < -(c / 2))
d += c;
while (m + d > (c / 2))
d -= c;
return d;
};
mD.mX = calculateD(mx, cell.a());
mD.mY = calculateD(my, cell.b());
mD.mZ = calculateD(mz, cell.c());
clipper::Coord_orth D = mD;
if (mD.mX != 0 or mD.mY != 0 or mD.mZ != 0)
{
if (cif::VERBOSE)
cerr << "moving coorinates by " << mD.mX << ", " << mD.mY << " and " << mD.mZ << endl;
for_each(locations.begin(), locations.end(), [&](auto& p) { p += mD; });
}
pMin -= mMaxDistance; // extend bounding box
pMax += mMaxDistance;
mRtOrth = AlternativeSites(spacegroup, cell);
DistMap dist;
vector<const Residue*> residues;
residues.reserve(p.residues().size());
for (auto& r: p.residues())
{
residues.emplace_back(&r);
// Add distances for atoms in this residue
AddDistancesForAtoms(r, r, dist, 0);
}
cif::Progress progress(residues.size() * residues.size(), "Creating distance map");
for (size_t i = 0; i + 1 < residues.size(); ++i)
{
auto& ri = *residues[i];
Point centerI;
float radiusI;
tie(centerI, radiusI) = ri.centerAndRadius();
for (size_t j = i + 1; j < residues.size(); ++j)
{
progress.consumed(1);
auto& rj = *residues[j];
// first case, no symmetry operations
Point centerJ;
float radiusJ;
tie(centerJ, radiusJ) = rj.centerAndRadius();
auto d = Distance(centerI, centerJ) - radiusI - radiusJ;
if (d < mMaxDistance)
{
AddDistancesForAtoms(ri, rj, dist, 0);
continue;
}
// now try all symmetry operations to see if we can move rj close to ri
clipper::Coord_orth cI = centerI;
clipper::Coord_orth cJ = centerJ;
auto minR2 = d;
int32_t kbest = 0;
for (int32_t k = 1; k < static_cast<int32_t>(mRtOrth.size()); ++k)
{
auto& rt = mRtOrth[k];
auto pJ = (cJ + D).transform(rt) - D;
double r2 = sqrt((cI - pJ).lengthsq()) - radiusI - radiusJ;
if (minR2 > r2)
{
minR2 = r2;
kbest = k;
}
}
if (minR2 < mMaxDistance)
AddDistancesForAtoms(ri, rj, dist, kbest);
}
}
// Store as a sparse CSR compressed matrix
size_t nnz = dist.size();
mA.reserve(nnz);
mIA.reserve(dim + 1);
mJA.reserve(nnz);
size_t lastR = 0;
mIA.push_back(0);
for (auto& di: dist)
{
size_t c, r;
tie(r, c) = di.first;
if (r != lastR) // new row
{
for (size_t ri = lastR; ri < r; ++ri)
mIA.push_back(mA.size());
lastR = r;
}
mA.push_back(di.second);
mJA.push_back(c);
}
for (size_t ri = lastR; ri < dim; ++ri)
mIA.push_back(mA.size());
}
// --------------------------------------------------------------------
void DistanceMap::AddDistancesForAtoms(const Residue& a, const Residue& b, DistMap& dm, int32_t rtix)
{
for (auto& aa: a.atoms())
{
clipper::Coord_orth pa = aa.location();
size_t ixa = index[aa.id()];
for (auto& bb: b.atoms())
{
if (aa.id() == bb.id())
continue;
clipper::Coord_orth pb = bb.location();
if (rtix)
pb = pb.transform(mRtOrth[rtix]);
auto d = (pa - pb).lengthsq();
if (d > mMaxDistanceSQ)
continue;
d = sqrt(d);
size_t ixb = index[bb.id()];
dm[make_tuple(ixa, ixb)] = make_tuple(d, rtix);
dm[make_tuple(ixb, ixa)] = make_tuple(d, -rtix);
}
}
}
float DistanceMap::operator()(const Atom& a, const Atom& b) const
{
size_t ixa, ixb;
try
{
ixa = index.at(a.id());
}
catch (const out_of_range& ex)
{
throw runtime_error("atom " + a.id() + " not found in distance map");
}
try
{
ixb = index.at(b.id());
}
catch (const out_of_range& ex)
{
throw runtime_error("atom " + b.id() + " not found in distance map");
}
// if (ixb < ixa)
// std::swap(ixa, ixb);
size_t L = mIA[ixa];
size_t R = mIA[ixa + 1] - 1;
while (L <= R)
{
size_t i = (L + R) / 2;
if (mJA[i] == ixb)
return get<0>(mA[i]);
if (mJA[i] < ixb)
L = i + 1;
else
R = i - 1;
}
return 100.f;
}
vector<Atom> DistanceMap::near(const Atom& a, float maxDistance) const
{
assert(maxDistance <= mMaxDistance);
if (maxDistance > mMaxDistance)
throw runtime_error("Invalid max distance in DistanceMap::near");
size_t ixa;
try
{
ixa = index.at(a.id());
}
catch (const out_of_range& ex)
{
throw runtime_error("atom " + a.id() + " not found in distance map");
}
vector<Atom> result;
for (size_t i = mIA[ixa]; i < mIA[ixa + 1]; ++i)
{
float d;
int32_t rti;
tie(d, rti) = mA[i];
if (d > maxDistance)
continue;
size_t ixb = mJA[i];
Atom b = structure.getAtomByID(rIndex.at(ixb));
if (rti > 0)
result.emplace_back(b.symmetryCopy(mD, mRtOrth.at(rti)));
else if (rti < 0)
result.emplace_back(b.symmetryCopy(mD, mRtOrth.at(-rti).inverse()));
else
result.emplace_back(b);
}
return result;
}
}
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cif++/Config.hpp"
#include <iomanip>
#include <fstream>
#if __has_include(<filesystem>)
#include <filesystem>
namespace fs = std::filesystem;
#elif __has_include(<boost/filesystem.hpp>)
#include <boost/filesystem.hpp>
namespace fs = boost::filesystem;
#endif
#include <boost/format.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/bzip2.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/copy.hpp>
#include <clipper/clipper-contrib.h>
#include <clipper/clipper-ccp4.h>
#include "cif++/Cif++.hpp"
#include "cif++/MapMaker.hpp"
#include "cif++/ResolutionCalculator.hpp"
using namespace std;
namespace io = boost::iostreams;
namespace ba = boost::algorithm;
namespace mmcif
{
// --------------------------------------------------------------------
// a private ccp4 map file implementation
// 1 NC # of Columns (fastest changing in map)
// 2 NR # of Rows
// 3 NS # of Sections (slowest changing in map)
// 4 MODE Data type
// 0 = envelope stored as signed bytes (from
// -128 lowest to 127 highest)
// 1 = Image stored as Integer*2
// 2 = Image stored as Reals
// 3 = Transform stored as Complex Integer*2
// 4 = Transform stored as Complex Reals
// 5 == 0
// Note: Mode 2 is the normal mode used in
// the CCP4 programs. Other modes than 2 and 0
// may NOT WORK
// 5 NCSTART Number of first COLUMN in map
// 6 NRSTART Number of first ROW in map
// 7 NSSTART Number of first SECTION in map
// 8 NX Number of intervals along X
// 9 NY Number of intervals along Y
// 10 NZ Number of intervals along Z
// 11 X length Cell Dimensions (Angstroms)
// 12 Y length "
// 13 Z length "
// 14 Alpha Cell Angles (Degrees)
// 15 Beta "
// 16 Gamma "
// 17 MAPC Which axis corresponds to Cols. (1,2,3 for X,Y,Z)
// 18 MAPR Which axis corresponds to Rows (1,2,3 for X,Y,Z)
// 19 MAPS Which axis corresponds to Sects. (1,2,3 for X,Y,Z)
// 20 AMIN Minimum density value
// 21 AMAX Maximum density value
// 22 AMEAN Mean density value (Average)
// 23 ISPG Space group number
// 24 NSYMBT Number of bytes used for storing symmetry operators
// 25 LSKFLG Flag for skew transformation, =0 none, =1 if foll
// 26-34 SKWMAT Skew matrix S (in order S11, S12, S13, S21 etc) if
// LSKFLG .ne. 0.
// 35-37 SKWTRN Skew translation t if LSKFLG .ne. 0.
// Skew transformation is from standard orthogonal
// coordinate frame (as used for atoms) to orthogonal
// map frame, as
// Xo(map) = S * (Xo(atoms) - t)
// 38 future use (some of these are used by the MSUBSX routines
// . " in MAPBRICK, MAPCONT and FRODO)
// . " (all set to zero by default)
// . "
// 52 "
// 53 MAP Character string 'MAP ' to identify file type
// 54 MACHST Machine stamp indicating the machine type
// which wrote file
// 55 ARMS Rms deviation of map from mean density
// 56 NLABL Number of labels being used
// 57-256 LABEL(20,10) 10 80 character text labels (ie. A4 format)
enum CCP4MapFileMode : uint32_t
{
AS_REALS = 2 // do not support anything else for now...
};
struct CCP4MapFileHeader
{
uint32_t NC, NR, NS;
CCP4MapFileMode MODE;
int32_t NCSTART, NRSTART, NSSTART;
uint32_t NX, NY, NZ;
float cellLengths[3];
float cellAngles[3];
uint32_t MAPC, MAPR, MAPS;
float AMIN, AMAX, AMEAN;
uint32_t ISPG;
uint32_t NSYMBT;
uint32_t LSKFLG;
float SKWMAT[9];
float SKWTRN[3];
uint32_t UNUSED[15];
char MAP[4] = { 'M', 'A', 'P', ' ' };
uint32_t MACHST = 0x00004144;
float ARMS;
uint32_t NLABL = 1;
char LABEL[200 * 4];
};
template<typename FTYPE>
tuple<float,float,float,float> CalculateMapStatistics(const clipper::Xmap<FTYPE>& xmap, clipper::Grid_range r)
{
float
amin = numeric_limits<float>::max(),
amax = numeric_limits<float>::min();
long double asum = 0, asum2 = 0;
size_t n = 0;
clipper::Xmap_base::Map_reference_coord c(xmap);
for (int g0 = r.min()[0]; g0 <= r.max()[0]; ++g0)
for (int g1 = r.min()[1]; g1 <= r.max()[1]; ++g1)
for (int g2 = r.min()[2]; g2 <= r.max()[2]; ++g2)
{
c.set_coord({ g0, g1, g2});
float v = xmap[c];
asum += v;
asum2 += v * v;
if (amin > v)
amin = v;
if (amax < v)
amax = v;
++n;
}
float mean = static_cast<float>(asum / n);
float rmsd = static_cast<float>(sqrt((asum2 / n) - (mean * mean)));
return make_tuple(amin, amax, mean, rmsd);
}
template<typename FTYPE>
void writeCCP4MapFile(ostream& os, clipper::Xmap<FTYPE>& xmap, clipper::Grid_range range)
{
static_assert(sizeof(CCP4MapFileHeader) == 256 * 4, "Map header is of incorrect size");
static_assert(__BYTE_ORDER == __LITTLE_ENDIAN, "Code for big endian systems is not implemented yet");
auto& spacegroup = xmap.spacegroup();
int spaceGroupNumber = spacegroup.descr().spacegroup_number();
int orderFMS[3] = { 3, 1, 2 };
switch (spaceGroupNumber)
{
case 1: case 2: case 3: case 4:
case 10: case 16: case 17: case 18:
case 20: case 21: case 23:
orderFMS[0] = 2;
orderFMS[2] = 3;
break;
}
int orderXYZ[3];
for (size_t i = 0; i < 3; ++i)
orderXYZ[orderFMS[i] - 1] = i;
int grid[3], gridFMSMin[3], gridFMSMax[3], dim[3];
for (size_t i = 0; i < 3; ++i)
{
grid[i] = xmap.grid_sampling()[i];
gridFMSMin[orderXYZ[i]] = range.min()[i];
gridFMSMax[orderXYZ[i]] = range.max()[i];
}
for (size_t i = 0; i < 3; ++i)
dim[i] = gridFMSMax[i] - gridFMSMin[i] + 1;
auto cellDescription = xmap.cell().descr();
CCP4MapFileHeader h = {};
int r = snprintf(h.LABEL, sizeof(h.LABEL), "%s", "Map created with map-maker from the PDB-REDO suite of applications");
for (size_t i = r; i < sizeof(h.LABEL); ++i)
h.LABEL[i] = ' ';
h.NC = dim[0];
h.NR = dim[1];
h.NS = dim[2];
h.MODE = AS_REALS;
h.NCSTART = gridFMSMin[0];
h.NRSTART = gridFMSMin[1];
h.NSSTART = gridFMSMin[2];
h.NX = grid[0];
h.NY = grid[1];
h.NZ = grid[2];
h.cellLengths[0] = cellDescription.a();
h.cellLengths[1] = cellDescription.b();
h.cellLengths[2] = cellDescription.c();
h.cellAngles[0] = cellDescription.alpha_deg();
h.cellAngles[1] = cellDescription.beta_deg();
h.cellAngles[2] = cellDescription.gamma_deg();
h.MAPC = orderFMS[0];
h.MAPR = orderFMS[1];
h.MAPS = orderFMS[2];
h.ISPG = spaceGroupNumber;
h.NSYMBT = spacegroup.num_symops() * 80;
tie(h.AMIN, h.AMAX, h.AMEAN, h.ARMS) = CalculateMapStatistics(xmap, range);
os.write(reinterpret_cast<char*>(&h), sizeof(h));
const string kSpaces(80, ' ');
for (int si = 0; si < spacegroup.num_symops(); ++si)
{
string symop = spacegroup.symop(si).format();
os.write(symop.c_str(), symop.length());
os.write(kSpaces.c_str(), 80 - symop.length());
}
clipper::Xmap_base::Map_reference_coord c(xmap);
const uint32_t kSectionLength = dim[0] * dim[1];
vector<float> section(kSectionLength);
int g[3];
for (g[2] = gridFMSMin[2]; g[2] <= gridFMSMax[2]; ++g[2])
{
auto si = section.begin();
for (g[1] = gridFMSMin[1]; g[1] <= gridFMSMax[1]; ++g[1])
for (g[0] = gridFMSMin[0]; g[0] <= gridFMSMax[0]; ++g[0])
{
c.set_coord({ g[orderXYZ[0]], g[orderXYZ[1]], g[orderXYZ[2]] });
*si++ = xmap[c];
}
assert(si == section.end());
os.write(reinterpret_cast<char*>(section.data()), kSectionLength * sizeof(float));
}
}
// --------------------------------------------------------------------
bool IsMTZFile(const std::string& p)
{
bool result = false;
std::ifstream f(p);
if (f.is_open())
{
char sig[5] = {};
f.read(sig, 4);
result = sig == string("MTZ ");
}
return result;
}
// --------------------------------------------------------------------
template<typename FTYPE>
Map<FTYPE>::Map()
{
}
template<typename FTYPE>
Map<FTYPE>::~Map()
{
}
template<typename FTYPE>
void Map<FTYPE>::calculateStats()
{
double sum = 0, sum2 = 0;
int count = 0;
mMinDensity = numeric_limits<double>::max();
mMaxDensity = numeric_limits<double>::min();
for (auto ix = mMap.first(); not ix.last(); ix.next())
{
auto v = mMap[ix];
if (isnan(v))
throw runtime_error("map contains NaN values");
if (mMinDensity > v)
mMinDensity = v;
if (mMaxDensity < v)
mMaxDensity = v;
++count;
sum += v;
sum2 += v * v;
}
mMeanDensity = sum / count;
mRMSDensity = sqrt((sum2 / count) - (mMeanDensity * mMeanDensity));
}
template<typename FTYPE>
void Map<FTYPE>::read(const std::string& f)
{
fs::path mapFile(f);
fs::path dataFile = mapFile;
if (cif::VERBOSE)
cout << "Reading map from " << mapFile << endl;
if (mapFile.extension() == ".gz" or mapFile.extension() == ".bz2")
{
// file is compressed
fs::path p = mapFile.parent_path();
string s = mapFile.filename().string();
io::filtering_stream<io::input> in;
std::ifstream fi(mapFile);
if (mapFile.extension() == ".gz")
in.push(io::gzip_decompressor());
else
in.push(io::bzip2_decompressor());
in.push(fi);
char tmpFileName[] = "/tmp/map-tmp-XXXXXX";
if (mkstemp(tmpFileName) < 0)
throw runtime_error(string("Could not create temp file for map: ") + strerror(errno));
dataFile = fs::path(tmpFileName);
std::ofstream out(dataFile);
io::copy(in, out);
}
if (not fs::exists(dataFile))
throw runtime_error("Could not open map file " + mapFile.string());
using namespace clipper;
CCP4MAPfile mapin;
mapin.open_read(dataFile.string());
mapin.import_xmap(mMap);
mapin.close_read();
if (dataFile != mapFile)
fs::remove(dataFile);
calculateStats();
}
template<typename FTYPE>
void Map<FTYPE>::write(const std::string& f)
{
write_masked(f, mMap.grid_asu());
}
template<typename FTYPE>
void Map<FTYPE>::write_masked(ostream& os, clipper::Grid_range r)
{
writeCCP4MapFile(os, mMap, r);
}
template<typename FTYPE>
void Map<FTYPE>::write_masked(const std::string& f, clipper::Grid_range r)
{
std::ofstream file(f, ios_base::binary);
if (not file.is_open())
throw runtime_error("Could not open map file for writing: " + f);
write_masked(file, r);
}
template class Map<float>;
template class Map<double>;
// --------------------------------------------------------------------
template<typename FTYPE>
MapMaker<FTYPE>::MapMaker()
{
}
template<typename FTYPE>
MapMaker<FTYPE>::~MapMaker()
{
}
template<typename FTYPE>
void MapMaker<FTYPE>::loadMTZ(const std::string& f, float samplingRate,
initializer_list<string> fbLabels, initializer_list<string> fdLabels,
initializer_list<string> foLabels, initializer_list<string> fcLabels,
initializer_list<string> faLabels)
{
fs::path hklin(f);
if (cif::VERBOSE)
cerr << "Reading map from " << hklin << endl
<< " with labels: FB: " << ba::join(fbLabels, ",") << endl
<< " with labels: FD: " << ba::join(fdLabels, ",") << endl
<< " with labels: FA: " << ba::join(faLabels, ",") << endl
<< " with labels: FO: " << ba::join(foLabels, ",") << endl
<< " with labels: FC: " << ba::join(fcLabels, ",") << endl;
fs::path dataFile = hklin;
if (hklin.extension() == ".gz" or hklin.extension() == ".bz2")
{
// file is compressed
fs::path p = hklin.parent_path();
string s = hklin.filename().string();
io::filtering_stream<io::input> in;
std::ifstream fi(hklin);
if (hklin.extension() == ".gz")
in.push(io::gzip_decompressor());
else
in.push(io::bzip2_decompressor());
in.push(fi);
char tmpFileName[] = "/tmp/mtz-tmp-XXXXXX";
if (mkstemp(tmpFileName) < 0)
throw runtime_error(string("Could not create temp file for mtz: ") + strerror(errno));
dataFile = fs::path(tmpFileName);
std::ofstream out(dataFile);
io::copy(in, out);
}
if (not fs::exists(dataFile))
throw runtime_error("Could not open mtz file " + hklin.string());
const string kBasePath("/%1%/%2%/[%3%]");
using clipper::CCP4MTZfile;
CCP4MTZfile mtzin;
mtzin.open_read(dataFile.string());
mtzin.import_hkl_info(mHKLInfo);
bool hasFAN = false;
const regex rx(R"(^/[^/]+/[^/]+/(.+) \S$)");
for (auto& label: mtzin.column_labels())
{
smatch m;
if (regex_match(label, m, rx) and m[1] == "FAN")
{
hasFAN = true;
break;
}
}
mtzin.import_hkl_data(mFbData,
(boost::format(kBasePath) % "*" % "*" % ba::join(fbLabels, ",")).str());
mtzin.import_hkl_data(mFdData,
(boost::format(kBasePath) % "*" % "*" % ba::join(fdLabels, ",")).str());
if (hasFAN)
mtzin.import_hkl_data(mFaData,
(boost::format(kBasePath) % "*" % "*" % ba::join(faLabels, ",")).str());
mtzin.import_hkl_data(mFoData,
(boost::format(kBasePath) % "*" % "*" % ba::join(foLabels, ",")).str());
mtzin.import_hkl_data(mFcData,
(boost::format(kBasePath) % "*" % "*" % ba::join(fcLabels, ",")).str());
mtzin.import_hkl_data(mFreeData,
(boost::format(kBasePath) % "*" % "*" % "FREE").str());
mtzin.import_hkl_data(mPhiFomData,
(boost::format(kBasePath) % "*" % "*" % "PHWT,FOM").str());
mtzin.close_read();
if (dataFile != hklin)
fs::remove(dataFile);
Cell cell = mHKLInfo.cell();
Spacegroup spacegroup = mHKLInfo.spacegroup();
ResolutionCalculator rc(cell);
mResHigh = 99;
mResLow = 0;
for (auto hi = mFoData.first_data(); not hi.last(); hi = mFoData.next_data(hi))
{
float res = rc(hi.hkl().h(), hi.hkl().k(), hi.hkl().l());
if (mResHigh > res)
mResHigh = res;
if (mResLow < res)
mResLow = res;
}
// fixMTZ();
mGrid.init(spacegroup, cell,
mHKLInfo.resolution(), samplingRate); // define grid
clipper::Xmap<FTYPE>& fbMap = mFb;
clipper::Xmap<FTYPE>& fdMap = mFd;
clipper::Xmap<FTYPE>& faMap = mFa;
fbMap.init(spacegroup, cell, mGrid); // define map
fbMap.fft_from(mFbData); // generate map
fdMap.init(spacegroup, cell, mGrid); // define map
fdMap.fft_from(mFdData); // generate map
if (not mFaData.is_null())
{
faMap.init(spacegroup, cell, mGrid);
faMap.fft_from(mFaData);
}
if (cif::VERBOSE)
{
cerr << "Read Xmaps with sampling rate: " << samplingRate << endl
<< " stored resolution: " << mHKLInfo.resolution().limit() << endl
<< " calculated reshi = " << mResHigh << " reslo = " << mResLow << endl
<< " spacegroup: " << spacegroup.symbol_hm() << endl
<< " cell: " << cell.format() << endl
<< " grid: " << mGrid.format() << endl;
printStats();
}
mFb.calculateStats();
mFd.calculateStats();
}
// --------------------------------------------------------------------
template<typename FTYPE>
void MapMaker<FTYPE>::loadMaps(
const std::string& fbMapFile, const std::string& fdMapFile, float reshi, float reslo)
{
mResHigh = reshi;
mResLow = reslo;
mFb.read(fbMapFile);
mFd.read(fdMapFile);
if (not mFb.cell().equals(mFd.cell()))
throw runtime_error("Fb and Fd map do not contain the same cell");
}
// --------------------------------------------------------------------
ostream& operator<<(ostream& os, const clipper::HKL& hkl)
{
os << "h: " << hkl.h() << ", "
<< "k: " << hkl.k() << ", "
<< "l: " << hkl.l();
return os;
};
// --------------------------------------------------------------------
template<typename FTYPE>
void MapMaker<FTYPE>::calculate(const std::string& hklin,
const Structure& structure, bool noBulk, AnisoScalingFlag anisoScaling,
float samplingRate, bool electronScattering,
initializer_list<std::string> foLabels, initializer_list<std::string> freeLabels)
{
if (IsMTZFile(hklin))
loadFoFreeFromMTZFile(hklin, foLabels, freeLabels);
else
loadFoFreeFromReflectionsFile(hklin);
recalc(structure, noBulk, anisoScaling, samplingRate, electronScattering);
}
// --------------------------------------------------------------------
template<typename FTYPE>
void MapMaker<FTYPE>::loadFoFreeFromReflectionsFile(const std::string& hklin)
{
using clipper::HKL;
cif::File reflnsFile(hklin);
auto& reflns = reflnsFile.firstDatablock();
// m_xname = reflns["exptl_crystal"].front()["id"].as<string>();
// m_pname = reflns["entry"].front()["id"].as<string>();
float a, b, c, alpha, beta, gamma;
cif::tie(a, b, c, alpha, beta, gamma) = reflns["cell"].front().get(
"length_a", "length_b", "length_c", "angle_alpha", "angle_beta", "angle_gamma");
using clipper::Cell_descr;
Cell cell = Cell(Cell_descr{a, b, c, alpha, beta, gamma});
// if (not cell2.equals(m_cell))
// throw runtime_error("Reflections file and coordinates file do not agree upon the cell parameters");
// --------------------------------------------------------------------
// Read reflections file to calculate resolution low and high
ResolutionCalculator rc(a, b, c, alpha, beta, gamma);
double hires = 99;
for (auto r: reflns["refln"])
{
int h, k, l;
cif::tie(h, k, l) = r.get("index_h", "index_k", "index_l");
double res = rc(h, k, l);
if (hires > res)
hires = res;
}
string spacegroupDescr = reflns["symmetry"].front()["space_group_name_H-M"].as<string>();
auto spacegroup = Spacegroup(clipper::Spgr_descr{spacegroupDescr});
mHKLInfo = HKL_info(spacegroup, cell, clipper::Resolution{hires}, true);
// m_crystal = MTZcrystal(m_xname, m_pname, m_cell);
mFoData.init(mHKLInfo, mHKLInfo.cell());
mFreeData.init(mHKLInfo, mHKLInfo.cell());
for (auto ih = mFreeData.first(); not ih.last(); ih.next())
mFreeData[ih].set_null();
// --------------------------------------------------------------------
enum FreeRConvention { frXPLO, frCCP4 } freeRConvention = frXPLO;
int freeRefl = 1, workRefl = 0;
if (false /*m_statusXPLO*/)
{
freeRConvention = frCCP4;
freeRefl = 0;
workRefl = 1;
}
bool first = false;
for (auto r: reflns["refln"])
{
int h, k, l;
char flag;
float F, sigF;
cif::tie(h, k, l, flag, F, sigF) = r.get("index_h", "index_k", "index_l", "status", "F_meas_au", "F_meas_sigma_au");
int ix = mHKLInfo.index_of(HKL{h, k, l});
if (ix < 0)
{
if (cif::VERBOSE)
cerr << "Ignoring hkl(" << h << ", " << k << ", " << l << ")" << endl;
continue;
}
if (first and (flag == freeRefl or flag == workRefl))
{
cerr << "Non-standard _refln.status column detected" << endl
<< "Assuming " << (freeRConvention == frXPLO ? "XPLOR" : "CCP4") << " convention for free R flag" << endl;
first = false;
}
mFoData[ix] = F_sigF(F, sigF);
switch (flag)
{
case 'o':
case 'h':
case 'l':
mFreeData[ix] = Flag(1);
break;
case 'f':
mFreeData[ix] = Flag(0);
break;
case '0':
case '1':
mFreeData[ix] = Flag(workRefl == flag ? 1 : 0);
break;
default:
if (cif::VERBOSE > 1)
cerr << "Unexpected value in status: '" << flag << "' for hkl(" << h << ", " << k << ", " << l << ")" << endl;
break;
}
}
}
// --------------------------------------------------------------------
template<typename FTYPE>
void MapMaker<FTYPE>::loadFoFreeFromMTZFile(const std::string& hklin,
initializer_list<std::string> foLabels, initializer_list<std::string> freeLabels)
{
if (cif::VERBOSE)
cerr << "Recalculating maps from " << hklin << endl;
const string kBasePath("/%1%/%2%/[%3%]");
using clipper::CCP4MTZfile;
CCP4MTZfile mtzin;
mtzin.open_read(hklin);
mtzin.import_hkl_info(mHKLInfo);
mtzin.import_hkl_data(mFoData,
(boost::format(kBasePath) % "*" % "*" % ba::join(foLabels, ",")).str());
mtzin.import_hkl_data(mFreeData,
(boost::format(kBasePath) % "*" % "*" % ba::join(freeLabels, ",")).str());
mtzin.close_read();
}
// --------------------------------------------------------------------
template<typename FTYPE>
void MapMaker<FTYPE>::recalc(const Structure& structure,
bool noBulk, AnisoScalingFlag anisoScaling,
float samplingRate, bool electronScattering)
{
Cell cell = mHKLInfo.cell();
Spacegroup spacegroup = mHKLInfo.spacegroup();
// The calculation work
vector<clipper::Atom> atoms;
for (auto a: structure.atoms())
atoms.push_back(a.toClipper());
mFcData.init(mHKLInfo, cell);
if (not electronScattering)
{
auto& exptl = structure.getFile().data()["exptl"];
electronScattering = not exptl.empty() and exptl.front()["method"] == "ELECTRON CRYSTALLOGRAPHY";
}
clipper::ScatteringFactors::selectScattteringFactorsType(
electronScattering ? clipper::SF_ELECTRON : clipper::SF_WAASMAIER_KIRFEL);
if (noBulk)
{
clipper::SFcalc_aniso_fft<float> sfc;
sfc(mFcData, atoms);
}
else
{
clipper::SFcalc_obs_bulk<float> sfcb;
sfcb(mFcData, mFoData, atoms);
if (cif::VERBOSE)
cerr << "Bulk correction volume: " << sfcb.bulk_frac() << endl
<< "Bulk correction factor: " << sfcb.bulk_scale() << endl;
}
if (anisoScaling != as_None)
{
clipper::SFscale_aniso<float>::TYPE F = clipper::SFscale_aniso<float>::F;
clipper::SFscale_aniso<float> sfscl;
if (anisoScaling == as_Observed)
sfscl(mFoData, mFcData); // scale Fobs
else
sfscl(mFcData, mFoData); // scale Fcal
if (cif::VERBOSE)
cerr << "Anisotropic scaling:" << endl
<< sfscl.u_aniso_orth(F).format() << endl;
}
// now do sigmaa calc
mFbData.init(mHKLInfo, cell);
mFdData.init(mHKLInfo, cell);
mPhiFomData.init(mHKLInfo, cell);
HKL_data<Flag> flag(mHKLInfo, cell);
const int freeflag = 0;
for (auto ih = mFreeData.first(); not ih.last(); ih.next())
{
if (not mFoData[ih].missing() and (mFreeData[ih].missing() or mFreeData[ih].flag() == freeflag))
flag[ih].flag() = clipper::SFweight_spline<float>::BOTH;
else
flag[ih].flag() = clipper::SFweight_spline<float>::NONE;
}
// do sigmaa calc
clipper::SFweight_spline<float> sfw(mNumRefln, mNumParam);
sfw(mFbData, mFdData, mPhiFomData, mFoData, mFcData, flag);
// mFbData now contains 2mFo - DFc
// mFdData now contains mFo - DFc
fixMTZ();
ResolutionCalculator rc(cell);
mResHigh = 99; mResLow = 0;
for (auto hi = mFoData.first_data(); not hi.last(); hi = mFoData.next_data(hi))
{
float res = rc(hi.hkl().h(), hi.hkl().k(), hi.hkl().l());
if (mResHigh > res)
mResHigh = res;
if (mResLow < res)
mResLow = res;
}
if (cif::VERBOSE > 1)
cerr << "calculated reshi = " << mResHigh << " reslo = " << mResLow << endl;
samplingRate /= 2;
mGrid.init(spacegroup, cell,
mHKLInfo.resolution(), samplingRate); // define grid
clipper::Xmap<FTYPE>& fbMap = mFb;
clipper::Xmap<FTYPE>& fdMap = mFd;
fbMap.init(spacegroup, cell, mGrid); // define map
fbMap.fft_from(mFbData); // generate map
fdMap.init(spacegroup, cell, mGrid); // define map
fdMap.fft_from(mFdData); // generate map
if (cif::VERBOSE)
{
cerr << "Read Xmaps with sampling rate: " << samplingRate << endl
<< " resolution: " << mResHigh
<< endl;
printStats();
}
mFb.calculateStats();
mFd.calculateStats();
}
template<typename FTYPE>
void MapMaker<FTYPE>::fixMTZ()
{
Spacegroup spacegroup = mHKLInfo.spacegroup();
enum {
A1, // A1: FC = 2mFo - FM
A2, // A2: FC >= 2mFo - FM
A3, // A3: FD = FM - mFo
A4, // A4: FD = 2(FM - mFo)
C5, // C5: FC = 2mFo - FM
C6, // C6: FM = mFo
C7, // C7: FD = mFo - FC
C8, // C8: FD = 2(mFo - FC)
C9, // C9: FD <= mFo - FC
T10, // 10: FM = FC (unobserved only)
T11, // 11: FD = 0 (unobserved only)
TestCount
};
vector<bool> tests(TestCount, true);
// first run the tests to see if we need to fix anything
if (cif::VERBOSE)
cerr << "Testing MTZ file" << endl;
for (auto ih = mFbData.first(); not ih.last(); ih.next())
{
clipper::HKL_class cls(spacegroup, ih.hkl());
auto W = mPhiFomData[ih].fom();
auto FM = mFbData[ih].f();
auto PM = mFbData[ih].phi() * 180 / kPI;
auto FD = mFdData[ih].f();
auto PD = mFdData[ih].phi() * 180 / kPI;
auto FO = mFoData[ih].f();
auto FC = mFcData[ih].f();
auto PC = mFcData[ih].phi() * 180 / kPI;
auto WFO = W * FO;
if (abs(fmod(abs(PM - PC) + 180, 360) - 180) > 90)
FM = -FM;
if (abs(fmod(abs(PD - PC) + 180, 360) - 180) > 90)
FD = -FD;
if (mFoData[ih].missing() or W == 0)
{
if (tests[T10] and abs(FM - FC) > 0.05)
{
tests[T10] = false;
if (cif::VERBOSE) cerr << "Test 10 failed at " << ih.hkl() << endl;
}
if (tests[T11] and abs(FD) > 0.05)
{
tests[T11] = false;
if (cif::VERBOSE) cerr << "Test 11 failed at " << ih.hkl() << endl;
}
}
else if (cls.centric())
{
if (tests[C5] and abs(FC + FM - 2 * WFO) > 0.05)
{
tests[C5] = false;
if (cif::VERBOSE) cerr << "Test C5 failed at " << ih.hkl() << endl;
}
if (tests[C6] and abs(FM - WFO) > 0.05)
{
tests[C6] = false;
if (cif::VERBOSE) cerr << "Test C6 failed at " << ih.hkl() << endl;
}
if (tests[C7] and abs(FC + FD - WFO) > 0.05)
{
tests[C7] = false;
if (cif::VERBOSE) cerr << "Test C7 failed at " << ih.hkl() << endl;
}
if (tests[C8] and abs(FC + 0.5 * FD - WFO) > 0.05)
{
tests[C8] = false;
if (cif::VERBOSE) cerr << "Test C8 failed at " << ih.hkl() << endl;
}
if (tests[C9] and (1.01 * FC + Gd - WFO) < -0.05)
{
tests[C9] = false;
if (cif::VERBOSE) cerr << "Test C9 failed at " << ih.hkl() << endl;
}
}
else
{
if (tests[A1] and abs(FC + FM - 2 * WFO) > 0.05)
{
tests[A1] = false;
if (cif::VERBOSE) cerr << "Test A1 failed at " << ih.hkl() << endl;
}
if (tests[A2] and 1.01 * FC + FM - 2 * WFO < -0.05)
{
tests[A2] = false;
if (cif::VERBOSE) cerr << "Test A2 failed at " << ih.hkl() << endl;
}
if (tests[A3] and abs(FM - FD - WFO) > 0.05)
{
tests[A3] = false;
if (cif::VERBOSE) cerr << "Test A3 failed at " << ih.hkl() << endl;
}
if (tests[A4] and abs(FM - 0.5 * FD - WFO) > 0.05)
{
tests[A4] = false;
if (cif::VERBOSE) cerr << "Test A4 failed at " << ih.hkl() << endl;
}
}
}
using clipper::HKL_class;
using clipper::data32::F_phi;
const F_phi fzero(0, 0);
// mtzfix...
for (auto ih = mFbData.first(); not ih.last(); ih.next())
{
if (mFbData[ih].missing() or mFdData[ih].missing())
continue;
auto PM = mFbData[ih].phi() * 180 / kPI;
auto PD = mFdData[ih].phi() * 180 / kPI;
auto PC = mFcData[ih].phi() * 180 / kPI;
if (abs(fmod(abs(PM - PC) + 180, 360) - 180) > 90)
{
mFbData[ih].f() = -mFbData[ih].f();
mFbData[ih].phi() = mFcData[ih].phi();
}
if (abs(fmod(abs(PD - PC) + 180, 360) - 180) > 90)
{
mFdData[ih].f() = -mFdData[ih].f();
mFdData[ih].phi() = mFcData[ih].phi();
}
auto mFo = mFbData[ih] - mFdData[ih];
HKL_class cls(spacegroup, ih.hkl());
if (not mFoData[ih].missing() and mPhiFomData[ih].fom() > 0)
{
if (cls.centric())
{
if (not tests[C6])
mFbData[ih] = mFo;
if (not tests[C7] and tests[C8])
mFdData[ih].f() = mFdData[ih].f() / 2;
}
else
{
if (tests[A3] and not tests[A4])
mFdData[ih] = mFdData[ih] + mFdData[ih];
}
}
else
{
if (not tests[T10])
{
if ((not cls.centric() and tests[A1]) or
(cls.centric() and (tests[C5] or tests[C7] or tests[C8])))
{
mFbData[ih] = mFcData[ih];
}
}
if (not tests[T11])
mFdData[ih] = fzero;
}
}
}
template<typename FTYPE>
void MapMaker<FTYPE>::printStats()
{
// calc R and R-free
vector<double> params(mNumParam, 1.0);
clipper::BasisFn_spline basisfn(mFoData, mNumParam, 1.0);
clipper::TargetFn_scaleF1F2<clipper::data32::F_phi,clipper::data32::F_sigF> targetfn(mFcData, mFoData);
clipper::ResolutionFn rfn(mHKLInfo, basisfn, targetfn, params);
double r1w = 0, f1w = 0, r1f = 0, f1f = 0;
const int freeflag = 0;
for (auto ih = mFoData.first_data(); not ih.last(); ih = mFoData.next_data(ih))
{
if (mFcData[ih].missing())
continue;
// throw runtime_error("missing Fc");
double Fo = mFoData[ih].f();
double Fc = sqrt(rfn.f(ih)) * mFcData[ih].f();
if (mFreeData[ih].flag() == freeflag)
{
r1f += fabs(Fo - Fc);
f1f += Fo;
}
else
{
r1w += fabs(Fo - Fc);
f1w += Fo;
}
}
if (f1f < 0.1)
f1f = 0.1;
r1f /= f1f;
if (f1w < 0.1)
f1w = 0.1;
r1w /= f1w;
cerr << "R-factor : " << r1w << endl
<< "Free R-factor : " << r1f << endl;
}
template<typename FTYPE>
void MapMaker<FTYPE>::writeMTZ(const std::string& file, const string& pname, const string& cname)
{
if (mHKLInfo.is_null())
throw runtime_error("HKL info not initialized");
clipper::CCP4MTZfile mtz;
clipper::MTZdataset dataset(pname, 0);
clipper::MTZcrystal crystal(cname, pname, mHKLInfo.cell());
const string col = "/" + pname + "/" + cname + "/";
mtz.open_write(file);
mtz.export_hkl_info(mHKLInfo);
mtz.export_crystal(crystal, col);
mtz.export_dataset(dataset, col);
if (not mFreeData.is_null()) mtz.export_hkl_data(mFreeData, col + "[FREE]");
if (not mFoData.is_null()) mtz.export_hkl_data(mFoData, col + "[FP,SIGFP]");
if (not mFcData.is_null()) mtz.export_hkl_data(mFcData, col + "[FC_ALL,PHIC_ALL]");
if (not mFbData.is_null()) mtz.export_hkl_data(mFbData, col + "[FWT,PHWT]");
if (not mFdData.is_null()) mtz.export_hkl_data(mFdData, col + "[DELFWT,PHDELWT]");
if (not mPhiFomData.is_null()) mtz.export_hkl_data(mPhiFomData, col + "[PHI,FOM]");
mtz.close_write();
}
template class MapMaker<float>;
template class MapMaker<double>;
}
......@@ -35,13 +35,12 @@
#include <boost/format.hpp>
#include <boost/numeric/ublas/matrix.hpp>
#include <clipper/core/spacegroup.h>
#include "cif++/PDB2Cif.hpp"
#include "cif++/AtomType.hpp"
#include "cif++/Compound.hpp"
#include "cif++/PDB2CifRemark3.hpp"
#include "cif++/CifUtils.hpp"
#include "cif++/Symmetry.hpp"
using namespace std;
namespace ba = boost::algorithm;
......@@ -4928,8 +4927,7 @@ void PDBFileParser::ParseCrystallographic()
try
{
spaceGroup = vS(56, 66);
clipper::Spacegroup sg(clipper::Spgr_descr{spaceGroup});
intTablesNr = to_string(sg.spacegroup_number());
intTablesNr = std::to_string(mmcif::GetSpacegroupNumber(spaceGroup));
}
catch (...)
{
......
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cif++/ResolutionCalculator.hpp"
#include "cif++/Point.hpp"
using namespace std;
namespace mmcif
{
ResolutionCalculator::ResolutionCalculator(const clipper::Cell& cell)
: ResolutionCalculator(cell.a(), cell.b(), cell.c(),
180 * cell.alpha() / kPI,
180 * cell.beta() / kPI,
180 * cell.gamma() / kPI)
{
}
ResolutionCalculator::ResolutionCalculator(double a, double b, double c,
double alpha, double beta, double gamma)
{
double deg2rad = atan(1.0) / 45.0;
double ca = cos(deg2rad * alpha);
double sa = sin(deg2rad * alpha);
double cb = cos(deg2rad * beta);
double sb = sin(deg2rad * beta);
double cg = cos(deg2rad * gamma);
double sg = sin(deg2rad * gamma);
double cast = (cb * cg - ca) / (sb * sg);
double cbst = (cg * ca - cb) / (sg * sa);
double cgst = (ca * cb - cg) / (sa * sb);
double sast = sqrt(1 - cast * cast);
double sbst = sqrt(1 - cbst * cbst);
double sgst = sqrt(1 - cgst * cgst);
double ast = 1 / (a * sb * sgst);
double bst = 1 / (b * sg * sast);
double cst = 1 / (c * sa * sbst);
mCoefs[0] = ast * ast;
mCoefs[1] = 2 * ast * bst * cgst;
mCoefs[2] = 2 * ast * cst * cbst;
mCoefs[3] = bst * bst;
mCoefs[4] = 2 * bst * cst * cast;
mCoefs[5] = cst * cst;
}
}
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cif++/Config.hpp"
#include <fstream>
#include <numeric>
#include "cif++/Structure.hpp"
#include "cif++/AtomShape.hpp"
#include "cif++/Statistics.hpp"
using namespace std;
namespace mmcif
{
using clipper::Coord_grid;
using clipper::Coord_orth;
using clipper::Xmap;
// --------------------------------------------------------------------
ostream& operator<<(ostream& os, const ResidueStatistics& st)
{
if (st.compID == "HOH")
os << st.asymID << '_' << st.authSeqID << '_' << st.compID << '\t';
else
os << st.asymID << '_' << st.seqID << '_' << st.compID << '\t';
os << st.RSR << '\t'
<< st.SRSR << '\t'
<< st.RSCCS << '\t'
<< st.ngrid << '\t'
<< st.EDIAm << '\t'
<< st.OPIA;
return os;
}
// --------------------------------------------------------------------
double anorm(double x)
{
return 0.5 * erfc(-x * sqrt(0.5));
}
double phinvs(double p)
{
//
// ALGORITHM AS241 APPL. STATIST. (1988) VOL. 37, NO. 3.
//
// Produces the normal deviate Z corresponding to a given lower tail
// area of P; Z is accurate to about 1 part in 10**16.
// Coefficients for P close to 0.5
const double A[8] = {
3.3871328727963666080, 1.3314166789178437745e+2, 1.9715909503065514427e+3, 1.3731693765509461125e+4,
4.5921953931549871457e+4, 6.7265770927008700853e+4, 3.3430575583588128105e+4, 2.5090809287301226727e+3
}, B[8] = {
0, 4.2313330701600911252e+1, 6.8718700749205790830e+2, 5.3941960214247511077e+3,
2.1213794301586595867e+4, 3.9307895800092710610e+4, 2.8729085735721942674e+4, 5.2264952788528545610e+3
};
// Coefficients for P not close to 0, 0.5 or 1.
const double C[8] = {
1.42343711074968357734e0, 4.63033784615654529590e0, 5.76949722146069140550e0, 3.64784832476320460504e0,
1.27045825245236838258e0, 2.41780725177450611770e-1, 2.27238449892691845833e-2, 7.74545014278341407640e-4,
}, D[8] = {
0, 2.05319162663775882187e0, 1.67638483018380384940e0, 6.89767334985100004550e-1,
1.48103976427480074590e-1, 1.51986665636164571966e-2, 5.47593808499534494600e-4, 1.05075007164441684324e-9
};
// Coefficients for P near 0 or 1.
const double E[8] = {
6.65790464350110377720e0, 5.46378491116411436990e0, 1.78482653991729133580e0, 2.96560571828504891230e-1,
2.65321895265761230930e-2, 1.24266094738807843860e-3, 2.71155556874348757815e-5, 2.01033439929228813265e-7,
}, F[8] = {
0, 5.99832206555887937690e-1, 1.36929880922735805310e-1, 1.48753612908506148525e-2,
7.86869131145613259100e-4, 1.84631831751005468180e-5, 1.42151175831644588870e-7, 2.04426310338993978564e-15
};
if (p < 0 or p > 1)
throw runtime_error("P should be >=0 and <=1");
double q = p - 0.5;
double result;
if (abs(q) < 0.425)
{
double r = 0.180625e0 - q * q;
result =
q * (((((((A[7] * r + A[6]) * r + A[5]) * r + A[4]) * r + A[3]) * r + A[2]) * r + A[1]) * r + A[0])
/ (((((((B[7] * r + B[6]) * r + B[5]) * r + B[4]) * r + B[3]) * r + B[2]) * r + B[1]) * r + 1);
}
else
{
double r;
if (q < 0)
r = p;
else
r = 1 - p;
r = sqrt(-log(r));
if (r <= 5)
{
r -= 1.6;
result = (((((((C[7] * r + C[6]) * r + C[5]) * r + C[4]) * r + C[3]) * r + C[2]) * r + C[1]) * r + C[0])
/ (((((((D[7] * r + D[6]) * r + D[5]) * r + D[4]) * r + D[3]) * r + D[2]) * r + D[1]) * r + 1);
}
else
{
r -= 0.5;
result = (((((((E[7] * r + E[6]) * r + E[5]) * r + E[4]) * r + E[3]) * r + E[2]) * r + E[1]) * r + E[0])
/ (((((((F[7] * r + F[6]) * r + F[5]) * r + F[4]) * r + F[3]) * r + F[2]) * r + F[1]) * r + 1);
}
if (q < 0)
result = -result;
}
return result;
}
double errsol(double a)
{
auto c = sqrt(2.0 / kPI);
auto b = abs(a);
double result = 0;
if (b > 3 / c)
{
auto x = abs(pow(b, 1/3.0) - 2 * pow(kPI / b, 2));
if (a < 0)
x = -x;
for (;;)
{
auto xx = x * x;
auto y = c * exp(-0.5 * xx);
auto d = (b * (2 * anorm(x) - 1 - x * y) / xx - x) / (b * y - 3);
x -= d;
if (abs(d) <= 1e-4)
break;
}
result = x;
}
return result;
}
// --------------------------------------------------------------------
class PointWeightFunction
{
public:
PointWeightFunction(Point center, float atomRadius)
: m_Center(center), m_Radius(atomRadius)
{
m_P[0] = P{ -1.0f, 0, 1.0f, 1.0822f };
m_P[1] = P{ 5.1177f, 1.29366f, -0.4f, 1.4043f };
m_P[2] = P{ -0.9507f, 2, 0, 2 };
}
float operator()(Point p) const
{
float d = Distance(m_Center, p);
d /= m_Radius;
float result = 0;
for (auto& p: m_P)
{
if (d > p.x)
continue;
result = p.m * (d - p.c) * (d - p.c) + p.b;
// assert(result != 0);
if (result == 0)
result = numeric_limits<float>::epsilon();
break;
}
return result;
}
private:
struct P
{
float m, c, b, x;
};
Point m_Center;
float m_Radius;
P m_P[3];
};
// --------------------------------------------------------------------
template<typename F>
void iterateGrid(const Coord_orth& p, float r, const Xmap<float>& m, F&& func)
{
using namespace clipper;
Coord_frac fp = p.coord_frac(m.cell());
Coord_frac o = Coord_orth(r, r, r).coord_frac(m.cell());
o[0] = abs(o[0]);
o[1] = abs(o[1]);
o[2] = abs(o[2]);
Coord_frac fMin = fp - o, fMax = fp + o;
Coord_map mMin = fMin.coord_map(m.grid_sampling()), mMax = fMax.coord_map(m.grid_sampling());
Coord_grid gMin = mMin.floor(), gMax = mMax.ceil();
auto i0 = Xmap_base::Map_reference_coord(m, gMin);
for (auto iu = i0; iu.coord().u() <= gMax[0]; iu.next_u())
for (auto iv = iu; iv.coord().v() <= gMax[1]; iv.next_v())
for (auto iw = iv; iw.coord().w() <= gMax[2]; iw.next_w())
func(iw);
}
// --------------------------------------------------------------------
struct AtomGridData
{
AtomGridData(const Coord_grid& gp, double density)
: p(gp), density(density) {}
Coord_grid p;
double density;
};
struct AtomDataSums
{
size_t ngrid = 0;
double rfSums[2] = {}; // sums for R-Factor
double edSums[2] = {}; // Sums for ED1 and ED3
double ccSums[3] = {}; // Sums for CC calculation
double rgSums[2] = {};
double swSums[3] = {}; // Sums used for sample CC calculation
AtomDataSums& operator+=(const AtomDataSums& rhs)
{
ngrid += rhs.ngrid;
rfSums[0] += rhs.rfSums[0];
rfSums[1] += rhs.rfSums[1];
edSums[0] += rhs.edSums[0];
edSums[1] += rhs.edSums[1];
ccSums[0] += rhs.ccSums[0];
ccSums[1] += rhs.ccSums[1];
ccSums[2] += rhs.ccSums[2];
rgSums[0] += rhs.rgSums[0];
rgSums[1] += rhs.rgSums[1];
swSums[0] += rhs.swSums[0];
swSums[1] += rhs.swSums[1];
swSums[2] += rhs.swSums[2];
return *this;
}
double cc() const
{
double s = (ccSums[1] - (edSums[0] * edSums[0]) / ngrid) * (ccSums[2] - (edSums[1] * edSums[1]) / ngrid);
return (ccSums[0] - edSums[0] * edSums[1] / ngrid) / sqrt(s);
}
double srg() const
{
double rg = sqrt(rgSums[0] / rgSums[1]);
return sqrt(swSums[0] - rg * rg * swSums[1] + 0.5 * rg * rg * rg * rg * swSums[2]) / (rg * rgSums[1]);
}
};
struct AtomData
{
AtomData(Atom atom, float radius)
: atom(atom)
// , asymID(atom.authAsymID())
// , seqID(atom.property<string>("auth_seq_id"))
, asymID(atom.labelAsymID())
, seqID(atom.labelSeqID())
, radius(radius) {}
Atom atom;
string asymID;
int seqID;
float radius;
vector<AtomGridData> points;
double averageDensity = 0;
double edia = 0;
AtomDataSums sums;
};
// --------------------------------------------------------------------
tuple<float,float> CalculateMapStatistics(const Xmap<float>& f)
{
double sum = 0, sum2 = 0;
int count = 0;
for (auto ix = f.first(); not ix.last(); ix.next())
{
auto v = f[ix];
if (isnan(v))
throw runtime_error("map contains NaN values");
++count;
sum += v;
sum2 += v * v;
}
float meanDensity = static_cast<float>(sum / count);
float rmsDensity = static_cast<float>(sqrt((sum2 / count) - (meanDensity * meanDensity)));
return make_tuple(meanDensity, rmsDensity);
}
// --------------------------------------------------------------------
class BoundingBox
{
public:
// BoundingBox(const Structure& structure, const vector<tuple<string,int,string,string>>& residues, float margin)
// {
// mXMin = mYMin = mZMin = numeric_limits<float>::max();
// mXMax = mYMax = mZMax = numeric_limits<float>::min();
//
// for (auto& r: residues)
// {
// int seqID;
// string asymID, compID, pdbID;
// tie(asymID, seqID, compID, pdbID) = r;
//
// Residue res(structure, compID, asymID, seqID);
// for (auto& atom: res.atoms())
// {
// auto l = atom.location();
// if (mXMin > l.mX)
// mXMin = l.mX;
// if (mXMax < l.mX)
// mXMax = l.mX;
// if (mYMin > l.mY)
// mYMin = l.mY;
// if (mYMax < l.mY)
// mYMax = l.mY;
// if (mZMin > l.mZ)
// mZMin = l.mZ;
// if (mZMax < l.mZ)
// mZMax = l.mZ;
// }
// }
//
// mXMin -= margin;
// mXMax += margin;
// mYMin -= margin;
// mYMax += margin;
// mZMin -= margin;
// mZMax += margin;
// }
template<class List>
BoundingBox(const Structure& structure, List atoms, float margin)
{
mXMin = mYMin = mZMin = numeric_limits<float>::max();
mXMax = mYMax = mZMax = numeric_limits<float>::min();
for (auto& atom: atoms)
{
auto l = atom.location();
if (mXMin > l.mX)
mXMin = l.mX;
if (mXMax < l.mX)
mXMax = l.mX;
if (mYMin > l.mY)
mYMin = l.mY;
if (mYMax < l.mY)
mYMax = l.mY;
if (mZMin > l.mZ)
mZMin = l.mZ;
if (mZMax < l.mZ)
mZMax = l.mZ;
}
mXMin -= margin;
mXMax += margin;
mYMin -= margin;
mYMax += margin;
mZMin -= margin;
mZMax += margin;
}
bool contains(const Point& p) const
{
return p.mX >= mXMin and p.mX <= mXMax
and p.mY >= mYMin and p.mY <= mYMax
and p.mZ >= mZMin and p.mZ <= mZMax;
}
private:
float mXMin, mXMax, mYMin, mYMax, mZMin, mZMax;
};
// --------------------------------------------------------------------
StatsCollector::StatsCollector(const MapMaker<float>& mm, Structure& structure, bool electronScattering)
: mStructure(structure), mMapMaker(mm), mElectronScattering(electronScattering)
{
mSpacegroup = mm.spacegroup();
mCell = mm.cell();
mGrid = mm.gridSampling();
mResHigh = mm.resHigh();
mResLow = mm.resLow();
initialize();
}
void StatsCollector::initialize()
{
mMeanDensityFb = mMapMaker.fb().meanDensity();
mRMSDensityFb = mMapMaker.fb().rmsDensity();
mRMSDensityFd = mMapMaker.fd().rmsDensity();
// calculate degrees of freedom
auto omcd = mCell.matrix_orth();
mVF = 1;
mVC = 1;
for (int i = 0; i < 3; ++i)
{
mVC *= omcd(i, i);
mVF *= omcd(i, i) / mGrid[i];
}
mVF *= pow(2 / mResHigh, 3);
mSZ = 0;
// double so = 0;
// const double C = sqrt(2.0 / kPI);
for (auto& a: mStructure.atoms())
{
auto t = a.type();
if (t <= He)
continue;
float w = a.occupancy() * t;
if (w <= 0)
continue;
mSZ += w;
// float bIso = Util::u2b(a.uIso());
// if (bIso < 4)
// bIso = 4;
// float x = sqrt(bIso) / mResHigh;
// x = w * (2 * anorm(x) - 1 - C * x * exp(-0.5 * pow(x, 2))) / pow(x, 3);
//
// so += x;
}
// auto bo = mSZ;
mSZ = mSZ * mSpacegroup.num_symops() / mVC;
// mMeanBIso = pow(mResHigh * errsol(bo / so), 2);
// Calculate overall rms data
vector<AtomData> atomData;
for (auto atom: mStructure.atoms())
{
AtomShape shape(atom, mResHigh, mResLow, mElectronScattering);
float radius = shape.radius();
if (cif::VERBOSE > 2)
cerr << (atomData.size() + 1) << '\t'
<< AtomTypeTraits(atom.type()).symbol() << '\t'
<< radius << endl;
atomData.emplace_back(atom, radius);
}
GridPtDataMap gridPointDensity;
map<string,vector<double>> zScoresPerAsym;
sumDensity(atomData, gridPointDensity, zScoresPerAsym);
// Now that we have the density data, we can calculate the correction/rescale factors
for (auto zsc: zScoresPerAsym)
{
// collect array of z-scores
vector<double>& zdca0 = zsc.second;
// double qa, qb;
// tie(qa, qb) = interpolateCumulativeProbabilities(zdca0, mVF);
auto& z = zdca0;
auto vf = mVF;
sort(z.begin(), z.end());
double qa = 0, qb = 1;
size_t nd = z.size();
size_t n = static_cast<size_t>(round(vf * nd));
if (n > 100)
{
size_t i1 = static_cast<size_t>((n + 1) * anorm(-1.5)) + 1;
size_t i2 = static_cast<size_t>((n + 1) * anorm(1.5));
size_t ns = i2 - i1 + 1;
double vr = (nd - 1) / (n - 1.0);
double sw = 0, swx = 0, swxs = 0, swy = 0, swxy = 0, swys = 0;
for (auto i = i1; i <= i2; ++i)
{
double qx = phinvs(static_cast<double>(i) / (n + 1));
double x = vr * i;
size_t j = static_cast<size_t>(x);
x -= j;
// assert(j < z.size());
if (j < 1 or j >= z.size())
continue;
auto qyd = (1.0 - x) * z[j - 1] + x * z[j] - qx;
auto wx = exp(-0.5 * qx * qx);
sw += wx;
swx += wx * qx;
swxs += wx * qx * qx;
swy += wx * qyd;
swxy += wx * qx * qyd;
swys += wx * qyd * qyd;
}
double dd = 1.0 / (sw * swxs - swx * swx);
double qa = dd * (swxs * swy - swx * swxy);
double qb = dd * (sw * swxy - swx * swy);
if (cif::VERBOSE > 1)
{
swys = dd * (swys - (qa * swy + qb * swxy)) / (ns - 2);
cerr << endl
<< "Intercept & gradient before LS: " << qa << " (" << sqrt(swys * swxs) << ") " << qb << " (" << sqrt(swys * sw) << ')' << endl;
}
qb += 1.0;
if (cif::VERBOSE > 1)
{
cerr << endl
<< "Rescale SD(delta-rho) using Q-Q plot for asym " << zsc.first << ':' << endl
<< string(54, '=') << endl
<< "Input & updated SD(delta-rho): " << mRMSDensityFd << " ; " << qb * mRMSDensityFd << endl
<< endl;
}
}
mRmsScaled[zsc.first] = make_pair(qa * mRMSDensityFd, qb * mRMSDensityFd);
}
}
vector<ResidueStatistics> StatsCollector::collect() const
{
vector<tuple<string,int,string,string>> residues;
vector<Atom> atoms;
for (auto atom: mStructure.atoms())
{
if (atom.isWater())
continue;
auto k = make_tuple(atom.labelAsymID(), atom.labelSeqID(), atom.labelCompID(), atom.authSeqID());
// auto k = make_tuple(atom.authAsymID(), atom.property<string>("auth_seq_id"), atom.authCompID());
if (residues.empty() or residues.back() != k)
{
residues.emplace_back(move(k));
atoms.emplace_back(move(atom));
}
}
BoundingBox bbox(mStructure, atoms, 5.0f);
return collect(residues, bbox, true);
}
vector<ResidueStatistics> StatsCollector::collect(const string& asymID, int resFirst, int resLast, bool authNameSpace) const
{
vector<tuple<string,int,string,string>> residues;
vector<Atom> atoms;
for (auto atom: mStructure.atoms())
{
if (atom.isWater())
continue;
if (authNameSpace)
{
int authSeqID = stoi(atom.authSeqID());
if (atom.authAsymID() != asymID or authSeqID < resFirst or authSeqID > resLast)
continue;
}
else
{
if (atom.labelAsymID() != asymID or atom.labelSeqID() < resFirst or atom.labelSeqID() > resLast)
continue;
}
auto k = make_tuple(atom.labelAsymID(), atom.labelSeqID(), atom.labelCompID(), atom.authSeqID());
// auto k = make_tuple(atom.authAsymID(), atom.property<string>("auth_seq_id"), atom.authCompID());
if (residues.empty() or residues.back() != k)
{
residues.emplace_back(move(k));
atoms.emplace_back(move(atom));
}
}
BoundingBox bbox(mStructure, atoms, 5.0f);
return collect(residues, bbox, false);
}
vector<ResidueStatistics> StatsCollector::collect(const vector<tuple<string,int,string,string>>& residues,
BoundingBox& bbox, bool addWaters) const
{
vector<AtomData> atomData;
// BoundingBox bb(mStructure, residues, 5.0f);
for (auto atom: mStructure.atoms())
{
if (atom.isWater())
{
if (not addWaters)
continue;
}
else if (not bbox.contains(atom.location()))
continue;
AtomShape shape(atom, mResHigh, mResLow, mElectronScattering);
float radius = shape.radius();
if (cif::VERBOSE > 2)
cerr << (atomData.size() + 1) << '\t'
<< AtomTypeTraits(atom.type()).symbol() << '\t'
<< radius << endl;
atomData.emplace_back(atom, radius);
}
calculate(atomData);
set<string> missing;
vector<ResidueStatistics> result;
// And now collect the per residue information
for (auto r: residues)
{
int seqID;
string asymID, compID, authSeqID;
tie(asymID, seqID, compID, authSeqID) = r;
AtomDataSums sums;
double ediaSum = 0;
size_t n = 0, m = 0;
auto comp = Compound::create(compID);
if (comp == nullptr)
{
if (not missing.count(compID) and compID != "HOH")
cerr << "Missing information for compound '" << compID << '\'' << endl;
missing.insert(compID);
for (const auto& d: atomData)
{
if (d.asymID != asymID or d.seqID != seqID)
continue;
sums += d.sums;
ediaSum += pow(d.edia + 0.1, -2);
++n;
if (d.edia >= 0.8)
++m;
}
}
else
{
for (auto& compAtom: comp->atoms())
{
if (compAtom.typeSymbol == H)
continue;
++n;
auto ci = find_if(atomData.begin(), atomData.end(),
[=](auto& d) { return d.asymID == asymID and d.seqID == seqID and d.atom.labelAtomID() == compAtom.id; });
if (ci == atomData.end())
{
if (compAtom.id == "OXT")
--n;
else if (cif::VERBOSE > 1)
cerr << "Missing atom '" << compAtom.id << "' in residue " << asymID << ':' << seqID << endl;
continue;
}
sums += ci->sums;
ediaSum += pow(ci->edia + 0.1, -2);
if (ci->edia >= 0.8)
++m;
}
}
result.emplace_back(ResidueStatistics{asymID, seqID, compID,
authSeqID,
(sums.rfSums[0] / sums.rfSums[1]), // rsr
sums.srg(), // srsr
sums.cc(), // rsccs
1 / sqrt(ediaSum / n) - 0.1, // ediam
100. * m / n, // opia
static_cast<int>(round(mVF * sums.ngrid))}); // ngrid
}
if (addWaters)
{
for (const auto& d: atomData)
{
const Atom& atom = d.atom;
if (not atom.isWater())
continue;
result.emplace_back(ResidueStatistics{d.asymID, d.seqID, "HOH",
atom.authSeqID(),
(d.sums.rfSums[0] / d.sums.rfSums[1]), // rsr
d.sums.srg(), // srsr
d.sums.cc(), // rsccs
d.edia, // ediam
(d.edia > 0.8 ? 100. : 0.), // opia
static_cast<int>(round(mVF * d.sums.ngrid))}); // ngrid
}
}
return result;
}
ResidueStatistics StatsCollector::collect(initializer_list<const Residue*> residues) const
{
vector<Atom> atoms;
for (auto& r: residues)
for (auto a: r->atoms())
atoms.push_back(a);
return collect(atoms);
}
ResidueStatistics StatsCollector::collect(initializer_list<Atom> atoms) const
{
vector<Atom> v(atoms);
return collect(v);
}
ResidueStatistics StatsCollector::collect(const vector<Atom>& atoms) const
{
vector<AtomData> atomData;
BoundingBox bb(mStructure, atoms, 4.f);
for (auto atom: mStructure.atoms())
{
if (not bb.contains(atom.location()))
continue;
AtomShape shape(atom, mResHigh, mResLow, mElectronScattering);
float radius = shape.radius();
if (cif::VERBOSE > 2)
cerr << (atomData.size() + 1) << '\t'
<< AtomTypeTraits(atom.type()).symbol() << '\t'
<< radius << endl;
atomData.emplace_back(atom, radius);
}
calculate(atomData);
AtomDataSums sums;
size_t n = 0, m = 0;
double ediaSum = 0;
for (auto& atom: atoms)
{
++n;
auto ci = find_if(atomData.begin(), atomData.end(),
[=](auto& d) { return d.asymID == atom.labelAsymID() and d.seqID == atom.labelSeqID() and d.atom.labelAtomID() == atom.labelAtomID(); });
if (ci == atomData.end())
continue;
sums += ci->sums;
ediaSum += pow(ci->edia + 0.1, -2);
if (ci->edia >= 0.8)
++m;
}
ResidueStatistics result{"", 0, "", "",
(sums.rfSums[0] / sums.rfSums[1]), // rsr
sums.srg(), // srsr
sums.cc(), // rsccs
1 / sqrt(ediaSum / n) - 0.1, // ediam
100. * m / n, // opia
static_cast<int>(round(mVF * sums.ngrid)) // ngrid
};
return result;
}
void StatsCollector::sumDensity(vector<AtomData>& atomData,
GridPtDataMap& gridPointDensity, map<string,vector<double>>& zScoresPerAsym) const
{
using namespace clipper;
const Xmap<float>& Fb = mMapMaker.fb();
const Xmap<float>& Fd = mMapMaker.fd();
// First step, iterate over atoms, then over grid points covered by this atom
// collecting per gridpoint statistics
for (auto& data: atomData)
{
auto& atom = data.atom;
AtomShape shape(atom, mResHigh, mResLow, mElectronScattering);
string asymID = data.asymID;
if (atom.isWater())
asymID = "0";
auto radius = data.radius;
double sumDensity = 0;
iterateGrid(atom.location(), radius, Fb, [&](Xmap_base::Map_reference_coord& iw)
{
auto p = Point(iw.coord_orth());
double d = Distance(p, atom.location());
if (d <= radius)
{
double density = shape.calculatedDensity(p);
assert(not isnan(density));
gridPointDensity[iw.coord()] += density;
data.points.emplace_back(iw.coord(), density);
sumDensity += density;
zScoresPerAsym[data.asymID].push_back(Fd[iw] / (Fd.multiplicity(iw.coord()) * mRMSDensityFd));
}
});
data.averageDensity = sumDensity / data.points.size();
}
}
void StatsCollector::collectSums(vector<AtomData>& atomData, GridPtDataMap& gridPointDensity) const
{
using namespace clipper;
const Xmap<float>& Fb = mMapMaker.fb();
const Xmap<float>& Fd = mMapMaker.fd();
cif::Progress progress(atomData.size(), "Stats calculation");
// Iterate over the atom data to collect the sums
for (auto& d: atomData)
{
auto rmsScaledF = mRmsScaled.at(d.asymID);
for (auto gp: d.points)
{
++d.sums.ngrid;
double e = gp.density / gridPointDensity[gp.p];
double t = e * mSZ / rmsScaledF.second;
Xmap_base::Map_reference_coord ix(Fb, gp.p);
double fb = Fb[ix];
double fd = Fd[ix];
double ed1 = e * (fb - rmsScaledF.first) / rmsScaledF.second + t;
double ed2 = e * (fd - rmsScaledF.first) / rmsScaledF.second;
double ed3 = ed1 - ed2;
d.sums.rfSums[0] += abs(ed2);
d.sums.rfSums[1] += abs(ed1 + ed3);
double w = gp.density / d.averageDensity;
if (w < 0)
w = 0;
if (w > 1)
w = 1;
d.sums.rgSums[0] += w * ed2 * ed2;
d.sums.rgSums[1] += w * ed1 * ed1;
d.sums.swSums[0] += (w * ed2) * (w * ed2);
d.sums.swSums[1] += (w * ed1) * (w * ed2);
d.sums.swSums[2] += (w * ed1) * (w * ed1);
ed1 -= t;
ed3 -= t;
d.sums.ccSums[0] += ed1 * ed3;
d.sums.ccSums[1] += ed1 * ed1;
d.sums.ccSums[2] += ed3 * ed3;
d.sums.edSums[0] += ed1;
d.sums.edSums[1] += ed3;
}
progress.consumed(1);
}
}
void StatsCollector::calculate(vector<AtomData>& atomData) const
{
GridPtDataMap gridPointDensity;
map<string,vector<double>> zScoresPerAsym;
sumDensity(atomData, gridPointDensity, zScoresPerAsym);
collectSums(atomData, gridPointDensity);
}
// --------------------------------------------------------------------
EDIAStatsCollector::EDIAStatsCollector(MapMaker<float>& mm,
Structure& structure, bool electronScattering, const BondMap& bondMap)
: StatsCollector(mm, structure, electronScattering)
, mDistanceMap(structure, mm.spacegroup(), mm.cell(), 3.5f), mBondMap(bondMap)
{
// create a atom radius map, for EDIA
const double kResolutions[] =
{
0.5, 1.0, 1.5, 2.0, 2.5
};
// The following numbers were harvested with the application collect-b-factors
const double kAverageBFactors[] =
{
6.31912, // 0.5
14.4939, // 1.0
20.8827, // 1.5
27.7075, // 2.0
55.6378 // 2.5
};
const int kAverageBFactorCount = sizeof(kAverageBFactors) / sizeof(double);
int i = static_cast<int>(floor(mResHigh / 0.5)) - 1;
if (i > kAverageBFactorCount - 1)
i = kAverageBFactorCount - 1;
if (i < 0)
i = 0;
float ediaBFactor;
if (i < kAverageBFactorCount - 1)
ediaBFactor = kAverageBFactors[i] +
((kAverageBFactors[i + 1] - kAverageBFactors[i]) * (mResHigh - kResolutions[i]) / (kResolutions[i + 1] - kResolutions[i]));
else
ediaBFactor = kAverageBFactors[i];
if (cif::VERBOSE)
cerr << "Calculating radii with B Factor " << ediaBFactor << endl;
for (auto atom: mStructure.atoms())
{
if (mRadii.count(atom.type()))
continue;
AtomShape shape(atom, mResHigh, mResLow, mElectronScattering, ediaBFactor);
mRadii[atom.type()] = shape.radius();
if (cif::VERBOSE)
cerr << "Radius for atom with type " << AtomTypeTraits(atom.type()).symbol() << " is " << mRadii[atom.type()] << endl;
}
}
void EDIAStatsCollector::calculate(vector<AtomData>& atomData) const
{
StatsCollector::calculate(atomData);
const Xmap<float>& Fb = mMapMaker.fb();
// Xmap<float>& fd = mMapMaker.fd();
struct lessAtom
{
bool operator()(const Atom& a, const Atom& b) const { return a.id().compare(b.id()) < 0; }
};
typedef set<Atom, lessAtom> atomSet;
// Calculate EDIA scores
cif::Progress progress(atomData.size(), "EDIA calculation");
for (auto& data: atomData)
{
auto& atom = data.atom;
float radius = mRadii.at(atom.type());
// if (cif::VERBOSE > 2)
// cerr << (atomData.size() + 1) << '\t'
// << AtomTypeTraits(atom.type()).symbol() << '\t'
// << radius << endl;
//
PointWeightFunction w(atom.location(), radius);
vector<Atom> atomsNearBy = mDistanceMap.near(atom, 3.5f);
vector<PointWeightFunction> wn;
for (auto a: atomsNearBy)
wn.emplace_back(a.location(), mRadii.at(a.type()));
float ediaSum[2] = {};
iterateGrid(atom.location(), radius, Fb, [&](auto iw)
{
Point p = iw.coord_orth();
// EDIA calculations
float z = (Fb[iw] - mMeanDensityFb) / mRMSDensityFb;
if (z < 0)
z = 0;
if (z > 1.2)
z = 1.2;
float wp = w(p);
// And divide the ownership
atomSet S, D, I;
if (wp != 0)
{
if (wp < 0)
D.insert(atom);
else
{
S.insert(atom);
I.insert(atom);
}
}
for (size_t i = 0; i < atomsNearBy.size(); ++i)
{
float w = wn[i](p);
if (w == 0)
continue;
if (w < 0)
D.insert(atomsNearBy[i]);
else if (w > 0)
{
S.insert(atomsNearBy[i]);
if (not mBondMap(atomsNearBy[i], atom))
I.insert(atomsNearBy[i]);
}
}
float o = 0;
if (wp > 0)
{
if (I.size() == 1)
o = 1;
else
{
float sumpb = accumulate(I.begin(), I.end(), 0.f,
[p](float s, const Atom& b) -> float
{
return s + Distance(p, b.location());
});
o = 1 - Distance(atom.location(), p) / sumpb;
}
}
else if (D.count(atom) and S.empty())
{
if (D.size() == 1)
o = 1;
else
{
float sumpb = accumulate(D.begin(), D.end(), 0.f,
[p](float s, const Atom& b) -> float
{
return s + Distance(p, b.location());
});
o = 1 - Distance(atom.location(), p) / sumpb;
}
}
// if (cif::VERBOSE > 2)
// cout << Point(p) << ":\td: " << xmap[iw] << "\tz: " << z << "\to: " << o << "\tzraw: " << ((xmap[iw] - meanDensity) / rmsDensity) << "\twp: " << wp << endl;
ediaSum[0] += z * wp * o;
if (wp > 0)
ediaSum[1] += wp;
});
data.edia = ediaSum[0] / ediaSum[1];
progress.consumed(1);
}
}
}
......@@ -47,7 +47,7 @@ namespace fs = boost::filesystem;
#include "cif++/PDB2Cif.hpp"
#include "cif++/CifParser.hpp"
#include "cif++/Cif2PDB.hpp"
#include "cif++/AtomShape.hpp"
// #include "cif++/AtomShape.hpp"
using namespace std;
......@@ -190,7 +190,7 @@ struct AtomImpl
, mRefcount(1), mRow(i.mRow), mCompound(i.mCompound)
, mRadius(i.mRadius), mCachedProperties(i.mCachedProperties)
, mSymmetryCopy(i.mSymmetryCopy), mClone(true)
, mRTop(i.mRTop), mD(i.mD)
// , mRTop(i.mRTop), mD(i.mD)
{
}
......@@ -211,18 +211,18 @@ struct AtomImpl
prefetch();
}
AtomImpl(const AtomImpl& impl, const Point& d, const clipper::RTop_orth& rt)
: mFile(impl.mFile), mID(impl.mID), mType(impl.mType), mAtomID(impl.mAtomID)
, mCompID(impl.mCompID), mAsymID(impl.mAsymID), mSeqID(impl.mSeqID)
, mAltID(impl.mAltID), mLocation(impl.mLocation), mRefcount(1)
, mRow(impl.mRow), mCompound(impl.mCompound), mRadius(impl.mRadius)
, mCachedProperties(impl.mCachedProperties)
, mSymmetryCopy(true), mRTop(rt), mD(d)
{
mLocation += d;
mLocation = ((clipper::Coord_orth)mLocation).transform(rt);
mLocation -= d;
}
// AtomImpl(const AtomImpl& impl, const Point& d, const clipper::RTop_orth& rt)
// : mFile(impl.mFile), mID(impl.mID), mType(impl.mType), mAtomID(impl.mAtomID)
// , mCompID(impl.mCompID), mAsymID(impl.mAsymID), mSeqID(impl.mSeqID)
// , mAltID(impl.mAltID), mLocation(impl.mLocation), mRefcount(1)
// , mRow(impl.mRow), mCompound(impl.mCompound), mRadius(impl.mRadius)
// , mCachedProperties(impl.mCachedProperties)
// , mSymmetryCopy(true), mRTop(rt), mD(d)
// {
// mLocation += d;
// mLocation = ((clipper::Coord_orth)mLocation).transform(rt);
// mLocation -= d;
// }
void prefetch()
{
......@@ -245,52 +245,52 @@ struct AtomImpl
mCompound = Compound::create(compID);
}
clipper::Atom toClipper() const
{
clipper::Atom result;
result.set_coord_orth(mLocation);
// clipper::Atom toClipper() const
// {
// clipper::Atom result;
// result.set_coord_orth(mLocation);
if (mRow["occupancy"].empty())
result.set_occupancy(1.0);
else
result.set_occupancy(mRow["occupancy"].as<float>());
// if (mRow["occupancy"].empty())
// result.set_occupancy(1.0);
// else
// result.set_occupancy(mRow["occupancy"].as<float>());
string element = mRow["type_symbol"].as<string>();
if (not mRow["pdbx_formal_charge"].empty())
{
int charge = mRow["pdbx_formal_charge"].as<int>();
if (abs(charge) > 1)
element += to_string(charge);
if (charge < 0)
element += '-';
else
element += '+';
}
result.set_element(element);
// string element = mRow["type_symbol"].as<string>();
// if (not mRow["pdbx_formal_charge"].empty())
// {
// int charge = mRow["pdbx_formal_charge"].as<int>();
// if (abs(charge) > 1)
// element += to_string(charge);
// if (charge < 0)
// element += '-';
// else
// element += '+';
// }
// result.set_element(element);
if (not mRow["U_iso_or_equiv"].empty())
result.set_u_iso(mRow["U_iso_or_equiv"].as<float>());
else if (not mRow["B_iso_or_equiv"].empty())
result.set_u_iso(mRow["B_iso_or_equiv"].as<float>() / (8 * kPI * kPI));
else
throw runtime_error("Missing B_iso or U_iso");
// if (not mRow["U_iso_or_equiv"].empty())
// result.set_u_iso(mRow["U_iso_or_equiv"].as<float>());
// else if (not mRow["B_iso_or_equiv"].empty())
// result.set_u_iso(mRow["B_iso_or_equiv"].as<float>() / (8 * kPI * kPI));
// else
// throw runtime_error("Missing B_iso or U_iso");
auto& db = *mFile.impl().mDb;
auto& cat = db["atom_site_anisotrop"];
auto r = cat[cif::Key("id") == mID];
if (r.empty())
result.set_u_aniso_orth(clipper::U_aniso_orth(nan("0"), 0, 0, 0, 0, 0));
else
{
float u11, u12, u13, u22, u23, u33;
cif::tie(u11, u12, u13, u22, u23, u33) =
r.get("U[1][1]", "U[1][2]", "U[1][3]", "U[2][2]", "U[2][3]", "U[3][3]");
// auto& db = *mFile.impl().mDb;
// auto& cat = db["atom_site_anisotrop"];
// auto r = cat[cif::Key("id") == mID];
// if (r.empty())
// result.set_u_aniso_orth(clipper::U_aniso_orth(nan("0"), 0, 0, 0, 0, 0));
// else
// {
// float u11, u12, u13, u22, u23, u33;
// cif::tie(u11, u12, u13, u22, u23, u33) =
// r.get("U[1][1]", "U[1][2]", "U[1][3]", "U[2][2]", "U[2][3]", "U[3][3]");
result.set_u_aniso_orth(clipper::U_aniso_orth(u11, u22, u33, u12, u13, u23));
}
// result.set_u_aniso_orth(clipper::U_aniso_orth(u11, u22, u33, u12, u13, u23));
// }
return result;
}
// return result;
// }
void reference()
{
......@@ -423,9 +423,9 @@ struct AtomImpl
bool mSymmetryCopy = false;
bool mClone = false;
clipper::RTop_orth mRTop;
Point mD;
int32_t mRTix;
// clipper::RTop_orth mRTop;
// Point mD;
// int32_t mRTix;
};
//Atom::Atom(const File& f, const string& id)
......@@ -642,25 +642,25 @@ void Atom::location(Point p)
impl()->moveTo(p);
}
Atom Atom::symmetryCopy(const Point& d, const clipper::RTop_orth& rt)
{
return Atom(new AtomImpl(*impl(), d, rt));
}
// Atom Atom::symmetryCopy(const Point& d, const clipper::RTop_orth& rt)
// {
// return Atom(new AtomImpl(*impl(), d, rt));
// }
bool Atom::isSymmetryCopy() const
{
return impl()->mSymmetryCopy;
}
// bool Atom::isSymmetryCopy() const
// {
// return impl()->mSymmetryCopy;
// }
string Atom::symmetry() const
{
return clipper::Symop(impl()->mRTop).format() + "\n" + impl()->mRTop.format();
}
// string Atom::symmetry() const
// {
// return clipper::Symop(impl()->mRTop).format() + "\n" + impl()->mRTop.format();
// }
const clipper::RTop_orth& Atom::symop() const
{
return impl()->mRTop;
}
// const clipper::RTop_orth& Atom::symop() const
// {
// return impl()->mRTop;
// }
const Compound& Atom::comp() const
{
......@@ -678,20 +678,20 @@ bool Atom::operator==(const Atom& rhs) const
(&impl()->mFile == &rhs.impl()->mFile and impl()->mID == rhs.impl()->mID);
}
clipper::Atom Atom::toClipper() const
{
return impl()->toClipper();
}
// clipper::Atom Atom::toClipper() const
// {
// return impl()->toClipper();
// }
void Atom::calculateRadius(float resHigh, float resLow, float perc)
{
AtomShape shape(*this, resHigh, resLow, false);
impl()->mRadius = shape.radius();
// void Atom::calculateRadius(float resHigh, float resLow, float perc)
// {
// AtomShape shape(*this, resHigh, resLow, false);
// impl()->mRadius = shape.radius();
// verbose
if (cif::VERBOSE > 1)
cout << "Calculated radius for " << AtomTypeTraits(impl()->mType).name() << " with charge " << charge() << " is " << impl()->mRadius << endl;
}
// // verbose
// if (cif::VERBOSE > 1)
// cout << "Calculated radius for " << AtomTypeTraits(impl()->mType).name() << " with charge " << charge() << " is " << impl()->mRadius << endl;
// }
float Atom::radius() const
{
......
......@@ -38,197 +38,13 @@ namespace mmcif
{
// --------------------------------------------------------------------
clipper::Coord_orth CalculateOffsetForCell(const Structure& p, const clipper::Spacegroup& spacegroup, const clipper::Cell& cell)
{
auto& atoms = p.atoms();
size_t dim = atoms.size();
vector<clipper::Coord_orth> locations;
locations.reserve(dim);
// bounding box
Point pMin(numeric_limits<float>::max(), numeric_limits<float>::max(), numeric_limits<float>::max()),
pMax(numeric_limits<float>::min(), numeric_limits<float>::min(), numeric_limits<float>::min());
for (auto& atom: atoms)
{
auto p = atom.location();
locations.push_back(p);
if (pMin.mX > p.mX)
pMin.mX = p.mX;
if (pMin.mY > p.mY)
pMin.mY = p.mY;
if (pMin.mZ > p.mZ)
pMin.mZ = p.mZ;
if (pMax.mX < p.mX)
pMax.mX = p.mX;
if (pMax.mY < p.mY)
pMax.mY = p.mY;
if (pMax.mZ < p.mZ)
pMax.mZ = p.mZ;
};
// correct locations so that the median of x, y and z are inside the cell
vector<float> c(dim);
auto median = [&]()
{
return dim % 1 == 0
? c[dim / 2]
: (c[dim / 2 - 1] + c[dim / 2]) / 2;
};
transform(locations.begin(), locations.end(), c.begin(), [](auto& l) { return l[0]; });
sort(c.begin(), c.end());
float mx = median();
transform(locations.begin(), locations.end(), c.begin(), [](auto& l) { return l[1]; });
sort(c.begin(), c.end());
float my = median();
transform(locations.begin(), locations.end(), c.begin(), [](auto& l) { return l[2]; });
sort(c.begin(), c.end());
float mz = median();
if (cif::VERBOSE > 1)
cerr << "median position of atoms: " << Point(mx, my, mz) << endl;
auto calculateD = [&](float m, float c)
{
float d = 0;
assert(c != 0);
if (c != 0)
{
while (m + d < -(c / 2))
d += c;
while (m + d > (c / 2))
d -= c;
}
return d;
};
if (cell.a() == 0 or cell.b() == 0 or cell.c() == 0)
throw runtime_error("Invalid cell, contains a dimension that is zero");
Point D;
D.mX = calculateD(mx, cell.a());
D.mY = calculateD(my, cell.b());
D.mZ = calculateD(mz, cell.c());
if (D.mX != 0 or D.mY != 0 or D.mZ != 0)
{
if (cif::VERBOSE)
cerr << "moving coorinates by " << D.mX << ", " << D.mY << " and " << D.mZ << endl;
}
return D;
}
// --------------------------------------------------------------------
vector<clipper::RTop_orth> AlternativeSites(const clipper::Spacegroup& spacegroup,
const clipper::Cell& cell)
{
vector<clipper::RTop_orth> result;
// to make the operation at index 0 equal to identity
result.push_back(clipper::RTop_orth::identity());
for (int i = 0; i < spacegroup.num_symops(); ++i)
{
const auto& symop = spacegroup.symop(i);
for (int u: { -1, 0, 1})
for (int v: { -1, 0, 1})
for (int w: { -1, 0, 1})
{
if (i == 0 and u == 0 and v == 0 and w == 0)
continue;
auto rtop = clipper::RTop_frac(
symop.rot(), symop.trn() + clipper::Vec3<>(u, v, w)
).rtop_orth(cell);
result.push_back(move(rtop));
}
}
return result;
}
// --------------------------------------------------------------------
// Unfortunately, clipper has a different numbering scheme than PDB
// for rotation numbers. So we created a table to map those.
// Perhaps a bit over the top, but hey....
#include "SymOpTable_data.cpp"
int32_t GetRotationalIndexNumber(int spacegroup, const clipper::RTop_frac& rt)
{
auto& rot = rt.rot();
auto& trn = rt.trn();
auto rte = [&rot](int i, int j) { return static_cast<int8_t>(lrint(rot(i, j))); };
SymopData k
{
{
rte(0, 0), rte(0, 1), rte(0, 2),
rte(1, 0), rte(1, 1), rte(1, 2),
rte(2, 0), rte(2, 1), rte(2, 2)
}
};
for (int i = 0; i < 3; ++i)
{
int n = lrint(trn[i] * 24);
int d = 24;
if (n == 0 or abs(n) == 24)
continue; // is 0, 0 in our table
for (int i = 5; i > 1; --i)
if (n % i == 0 and d % i == 0)
{
n /= i;
d /= i;
}
n = (n + d) % d;
switch (i)
{
case 0: k.trn_0_0 = n; k.trn_0_1 = d; break;
case 1: k.trn_1_0 = n; k.trn_1_1 = d; break;
case 2: k.trn_2_0 = n; k.trn_2_1 = d; break;
}
}
const size_t N = sizeof(kSymopNrTable) / sizeof(SymopDataBlock);
int32_t L = 0, R = static_cast<int32_t>(N - 1);
while (L <= R)
{
int32_t i = (L + R) / 2;
if (kSymopNrTable[i].spacegroupNr < spacegroup)
L = i + 1;
else
R = i - 1;
}
for (size_t i = L; i < N and kSymopNrTable[i].spacegroupNr == spacegroup; ++i)
{
if (kSymopNrTable[i].rt.iv == k.iv)
return kSymopNrTable[i].rotationalNr;
}
throw runtime_error("Symmetry operation was not found in table, cannot find rotational number");
}
// --------------------------------------------------------------------
// And unfortunately, clipper does not know all the spacegroup names mentioned in symop.lib
int GetSpacegroupNumber(std::string spacegroup)
{
......@@ -277,141 +93,4 @@ int GetSpacegroupNumber(std::string spacegroup)
return result;
}
// -----------------------------------------------------------------------
std::string SpacegroupToHall(std::string spacegroup)
{
int nr = GetSpacegroupNumber(spacegroup);
// yeah, sucks, I know, might be looping three times this way
string result;
for (auto& sp: kSpaceGroups)
{
if (sp.nr == nr)
{
result = sp.Hall;
break;
}
}
if (result.empty())
throw runtime_error("Spacegroup name " + spacegroup + " was not found in table");
return result;
}
// clipper::Spgr_descr GetCCP4SpacegroupDescr(int nr)
// {
// const size_t N = sizeof(kSymopNrTable) / sizeof(SymopDataBlock);
// int32_t L = 0, R = static_cast<int32_t>(N - 1);
// while (L <= R)
// {
// int32_t i = (L + R) / 2;
// if (kSymopNrTable[i].spacegroupNr < nr)
// L = i + 1;
// else
// R = i - 1;
// }
// if (L < 0 or L >= N)
// throw runtime_error("Invalid spacegroup number");
// clipper::Spgr_descr::Symop_codes symop_codes;
// for (size_t i = L; i < N and kSymopNrTable[i].spacegroupNr == nr; ++i)
// {
// auto& symop = kSymopNrTable[i];
// clipper::ftype m[4][4] = {
// { symop.rt.rot_0_0, symop.rt.rot_0_1, symop.rt.rot_0_2, static_cast<float>(symop.rt.trn_0_0) / symop.rt.trn_0_1 },
// { symop.rt.rot_1_0, symop.rt.rot_1_1, symop.rt.rot_1_2, static_cast<float>(symop.rt.trn_1_0) / symop.rt.trn_1_1 },
// { symop.rt.rot_2_0, symop.rt.rot_2_1, symop.rt.rot_2_2, static_cast<float>(symop.rt.trn_2_0) / symop.rt.trn_2_1 },
// { 0, 0, 0, 1 }
// };
// symop_codes.emplace_back(clipper::Symop(m));
// }
// return clipper::Spgr_descr(symop_codes);
// }
clipper::Spgr_descr GetCCP4SpacegroupDescr(int nr)
{
for (auto& sg: kSpaceGroups)
{
if (sg.nr == nr)
return clipper::Spgr_descr(sg.Hall, clipper::Spgr_descr::Hall);
}
throw runtime_error("Invalid spacegroup number: " + to_string(nr));
}
// -----------------------------------------------------------------------
SymmetryAtomIteratorFactory::SymmetryAtomIteratorFactory(const Structure& p, int spacegroupNr, const clipper::Cell& cell)
: mSpacegroupNr(spacegroupNr)
, mSpacegroup(GetCCP4SpacegroupDescr(spacegroupNr))
, mD(CalculateOffsetForCell(p, mSpacegroup, cell))
, mRtOrth(AlternativeSites(mSpacegroup, cell))
, mCell(cell)
{
}
string SymmetryAtomIteratorFactory::symop_mmcif(const Atom& a) const
{
string result;
if (not a.isSymmetryCopy())
result = "1_555";
else
{
auto rtop_o = a.symop();
for (int i = 0; i < mSpacegroup.num_symops(); ++i)
{
const auto& symop = mSpacegroup.symop(i);
for (int u: { -1, 0, 1})
for (int v: { -1, 0, 1})
for (int w: { -1, 0, 1})
{
// if (i == 0 and u == 0 and v == 0 and w == 0)
// continue;
auto rtop = clipper::RTop_frac(
symop.rot(), symop.trn() + clipper::Vec3<>(u, v, w)
).rtop_orth(mCell);
if (rtop.rot().equals(rtop_o.rot(), 0.00001) and rtop.trn().equals(rtop_o.trn(), 0.000001))
{
// gotcha
auto rtop_f = rtop.rtop_frac(mCell);
int rnr = GetRotationalIndexNumber(mSpacegroupNr, rtop_f);
uint32_t t[3] = {
static_cast<uint32_t>(5 + static_cast<int>(rint(rtop_f.trn()[0]))),
static_cast<uint32_t>(5 + static_cast<int>(rint(rtop_f.trn()[1]))),
static_cast<uint32_t>(5 + static_cast<int>(rint(rtop_f.trn()[2])))
};
if (t[0] > 9 or t[1] > 9 or t[2] > 9)
throw runtime_error("Symmetry operation has an out-of-range translation.");
result += to_string(rnr) + "_"
+ to_string(t[0])
+ to_string(t[1])
+ to_string(t[2]);
return result;
}
}
}
}
return result;
}
}
......@@ -27,7 +27,8 @@
#define BOOST_TEST_MODULE LibCifPP_Test
#include <boost/test/included/unit_test.hpp>
#include "cif++/DistanceMap.hpp"
// #include "cif++/DistanceMap.hpp"
#include "cif++/Cif++.hpp"
// --------------------------------------------------------------------
......@@ -49,7 +50,7 @@ cif::File operator""_cf(const char* text, size_t length)
BOOST_AUTO_TEST_CASE(ut1)
{
using namespace mmcif;
// using namespace mmcif;
auto f = R"(data_TEST
#
......@@ -88,16 +89,17 @@ _test.name
BOOST_AUTO_TEST_CASE(ut2)
{
using namespace mmcif;
// using namespace mmcif;
auto f = R"(data_TEST
#
loop_
_test.id
_test.name
1 aap
2 noot
3 mies
_test.value
1 aap 1.0
2 noot 1.1
3 mies 1.2
)"_cf;
auto& db = f.firstDatablock();
......@@ -113,9 +115,14 @@ _test.name
BOOST_CHECK(++n == 1);
BOOST_CHECK(r["id"].as<int>() == 1);
BOOST_CHECK(r["name"].as<std::string>() == "aap");
BOOST_CHECK(r["value"].as<float>() == 1.0);
}
auto t = test.find(cif::Key("id") == 1);
BOOST_CHECK(not t.empty());
BOOST_CHECK(t.front()["name"].as<std::string>() == "aap");
auto t2 = test.find(cif::Key("value") == 1.2);
BOOST_CHECK(not t2.empty());
BOOST_CHECK(t2.front()["name"].as<std::string>() == "mies");
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment