Commit f5016403 by Maarten L. Hekkelman

refactored mmcif::File

parent c8f66ae6
Version 3.0.5
- mmcif::Structure redesign. It is now a wrapper around a cif::Datablock.
Version 3.0.4 Version 3.0.4
- Fix in mmCIF parser, now correctly handles the unquoted - Fix in mmCIF parser, now correctly handles the unquoted
string ?? string ??
......
...@@ -2169,17 +2169,21 @@ class File ...@@ -2169,17 +2169,21 @@ class File
File(); File();
File(std::istream &is, bool validate = false); File(std::istream &is, bool validate = false);
File(const std::filesystem::path &path, bool validate = false); File(const std::filesystem::path &path, bool validate = false);
File(const char *data, size_t length); // good luck trying to find out what it is...
File(File &&rhs); File(File &&rhs);
File(const File &rhs) = delete; File(const File &rhs) = delete;
File &operator=(const File &rhs) = delete; File &operator=(const File &rhs) = delete;
~File(); virtual ~File();
virtual void load(const std::filesystem::path &p);
virtual void save(const std::filesystem::path &p);
void load(const std::filesystem::path &p); virtual void load(std::istream &is);
void save(const std::filesystem::path &p); virtual void save(std::ostream &os);
void load(std::istream &is); virtual void load(const char *data, std::size_t length);
void save(std::ostream &os);
/// \brief Load only the data block \a datablock from the mmCIF file /// \brief Load only the data block \a datablock from the mmCIF file
void load(std::istream &is, const std::string &datablock); void load(std::istream &is, const std::string &datablock);
...@@ -2211,6 +2215,12 @@ class File ...@@ -2211,6 +2215,12 @@ class File
void append(Datablock *e); void append(Datablock *e);
Datablock &emplace(std::string_view name)
{
append(new Datablock(name));
return back();
}
Datablock *get(std::string_view name) const; Datablock *get(std::string_view name) const;
Datablock &operator[](std::string_view name); Datablock &operator[](std::string_view name);
...@@ -2248,6 +2258,9 @@ class File ...@@ -2248,6 +2258,9 @@ class File
iterator begin() const; iterator begin() const;
iterator end() const; iterator end() const;
Datablock &front();
Datablock &back();
bool empty() const { return mHead == nullptr; } bool empty() const { return mHead == nullptr; }
const Validator &getValidator() const; const Validator &getValidator() const;
......
...@@ -221,7 +221,7 @@ struct PointF ...@@ -221,7 +221,7 @@ struct PointF
FType length() const FType length() const
{ {
return sqrt(mX * mX + mY * mY + mZ * mZ); return std::sqrt(mX * mX + mY * mY + mZ * mZ);
} }
}; };
...@@ -285,7 +285,7 @@ inline double DistanceSquared(const PointF<F> &a, const PointF<F> &b) ...@@ -285,7 +285,7 @@ inline double DistanceSquared(const PointF<F> &a, const PointF<F> &b)
template <typename F> template <typename F>
inline double Distance(const PointF<F> &a, const PointF<F> &b) inline double Distance(const PointF<F> &a, const PointF<F> &b)
{ {
return sqrt( return std::sqrt(
(a.mX - b.mX) * (a.mX - b.mX) + (a.mX - b.mX) * (a.mX - b.mX) +
(a.mY - b.mY) * (a.mY - b.mY) + (a.mY - b.mY) * (a.mY - b.mY) +
(a.mZ - b.mZ) * (a.mZ - b.mZ)); (a.mZ - b.mZ) * (a.mZ - b.mZ));
...@@ -332,10 +332,10 @@ double DihedralAngle(const PointF<F> &p1, const PointF<F> &p2, const PointF<F> & ...@@ -332,10 +332,10 @@ double DihedralAngle(const PointF<F> &p1, const PointF<F> &p2, const PointF<F> &
double result = 360; double result = 360;
if (u > 0 and v > 0) if (u > 0 and v > 0)
{ {
u = DotProduct(p, x) / sqrt(u); u = DotProduct(p, x) / std::sqrt(u);
v = DotProduct(p, y) / sqrt(v); v = DotProduct(p, y) / std::sqrt(v);
if (u != 0 or v != 0) if (u != 0 or v != 0)
result = atan2(v, u) * 180 / kPI; result = std::atan2(v, u) * 180 / kPI;
} }
return result; return result;
...@@ -351,7 +351,7 @@ double CosinusAngle(const PointF<F> &p1, const PointF<F> &p2, const PointF<F> &p ...@@ -351,7 +351,7 @@ double CosinusAngle(const PointF<F> &p1, const PointF<F> &p2, const PointF<F> &p
double x = DotProduct(v12, v12) * DotProduct(v34, v34); double x = DotProduct(v12, v12) * DotProduct(v34, v34);
if (x > 0) if (x > 0)
result = DotProduct(v12, v34) / sqrt(x); result = DotProduct(v12, v34) / std::sqrt(x);
return result; return result;
} }
...@@ -436,9 +436,9 @@ class SphericalDots ...@@ -436,9 +436,9 @@ class SphericalDots
double lat = std::asin((2.0 * i) / P); double lat = std::asin((2.0 * i) / P);
double lon = std::fmod(i, kGoldenRatio) * 2 * kPI / kGoldenRatio; double lon = std::fmod(i, kGoldenRatio) * 2 * kPI / kGoldenRatio;
p->mX = sin(lon) * cos(lat); p->mX = std::sin(lon) * std::cos(lat);
p->mY = cos(lon) * cos(lat); p->mY = std::cos(lon) * std::cos(lat);
p->mZ = sin(lat); p->mZ = std::sin(lat);
++p; ++p;
} }
......
/*- /*-
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
* *
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met: * modification, are permitted provided that the following conditions are met:
* *
* 1. Redistributions of source code must retain the above copyright notice, this * 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer * list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice, * 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation * this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution. * and/or other materials provided with the distribution.
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
...@@ -34,16 +34,16 @@ ...@@ -34,16 +34,16 @@
#include "cif++/Point.hpp" #include "cif++/Point.hpp"
/* /*
To modify a structure, you will have to use actions. To modify a structure, you will have to use actions.
The currently supported actions are: The currently supported actions are:
// - Move atom to new location // - Move atom to new location
- Remove atom - Remove atom
// - Add new atom that was formerly missing // - Add new atom that was formerly missing
// - Add alternate Residue // - Add alternate Residue
- -
*/ */
namespace mmcif namespace mmcif
...@@ -61,7 +61,6 @@ class File; ...@@ -61,7 +61,6 @@ class File;
class Atom class Atom
{ {
private: private:
struct AtomImpl : public std::enable_shared_from_this<AtomImpl> struct AtomImpl : public std::enable_shared_from_this<AtomImpl>
{ {
AtomImpl(cif::Datablock &db, const std::string &id, cif::Row row); AtomImpl(cif::Datablock &db, const std::string &id, cif::Row row);
...@@ -99,7 +98,7 @@ class Atom ...@@ -99,7 +98,7 @@ class Atom
int mRefcount; int mRefcount;
cif::Row mRow; cif::Row mRow;
mutable std::vector<std::tuple<std::string,cif::detail::ItemReference>> mCachedRefs; mutable std::vector<std::tuple<std::string, cif::detail::ItemReference>> mCachedRefs;
mutable const Compound *mCompound = nullptr; mutable const Compound *mCompound = nullptr;
...@@ -110,14 +109,17 @@ class Atom ...@@ -110,14 +109,17 @@ class Atom
}; };
public: public:
Atom() {} Atom() {}
Atom(std::shared_ptr<AtomImpl> impl) Atom(std::shared_ptr<AtomImpl> impl)
: mImpl(impl) {} : mImpl(impl)
{
}
Atom(const Atom &rhs) Atom(const Atom &rhs)
: mImpl(rhs.mImpl) {} : mImpl(rhs.mImpl)
{
}
Atom(cif::Datablock &db, cif::Row &row); Atom(cif::Datablock &db, cif::Row &row);
...@@ -152,10 +154,10 @@ class Atom ...@@ -152,10 +154,10 @@ class Atom
set_property(name, std::to_string(value)); set_property(name, std::to_string(value));
} }
const std::string &id() const { return impl().mID; } const std::string &id() const { return impl().mID; }
AtomType type() const { return impl().mType; } AtomType type() const { return impl().mType; }
Point location() const { return impl().mLocation; } Point location() const { return impl().mLocation; }
void location(Point p) void location(Point p)
{ {
if (not mImpl) if (not mImpl)
...@@ -176,33 +178,33 @@ class Atom ...@@ -176,33 +178,33 @@ class Atom
void translateRotateAndTranslate(Point t1, Quaternion q, Point t2); void translateRotateAndTranslate(Point t1, Quaternion q, Point t2);
// for direct access to underlying data, be careful! // for direct access to underlying data, be careful!
const cif::Row getRow() const { return impl().mRow; } const cif::Row getRow() const { return impl().mRow; }
const cif::Row getRowAniso() const; const cif::Row getRowAniso() const;
bool isSymmetryCopy() const { return impl().mSymmetryCopy; } bool isSymmetryCopy() const { return impl().mSymmetryCopy; }
std::string symmetry() const { return impl().mSymmetryOperator; } std::string symmetry() const { return impl().mSymmetryOperator; }
const Compound &comp() const { return impl().comp(); } const Compound &comp() const { return impl().comp(); }
bool isWater() const { return impl().mCompID == "HOH" or impl().mCompID == "H2O" or impl().mCompID == "WAT"; } bool isWater() const { return impl().mCompID == "HOH" or impl().mCompID == "H2O" or impl().mCompID == "WAT"; }
int charge() const; int charge() const;
float uIso() const; float uIso() const;
bool getAnisoU(float anisou[6]) const { return impl().getAnisoU(anisou); } bool getAnisoU(float anisou[6]) const { return impl().getAnisoU(anisou); }
float occupancy() const; float occupancy() const;
// specifications // specifications
const std::string& labelAtomID() const { return impl().mAtomID; } const std::string &labelAtomID() const { return impl().mAtomID; }
const std::string& labelCompID() const { return impl().mCompID; } const std::string &labelCompID() const { return impl().mCompID; }
const std::string& labelAsymID() const { return impl().mAsymID; } const std::string &labelAsymID() const { return impl().mAsymID; }
std::string labelEntityID() const; std::string labelEntityID() const;
int labelSeqID() const { return impl().mSeqID; } int labelSeqID() const { return impl().mSeqID; }
const std::string& labelAltID() const { return impl().mAltID; } const std::string &labelAltID() const { return impl().mAltID; }
bool isAlternate() const { return not impl().mAltID.empty(); } bool isAlternate() const { return not impl().mAltID.empty(); }
std::string authAtomID() const; std::string authAtomID() const;
std::string authCompID() const; std::string authCompID() const;
std::string authAsymID() const; std::string authAsymID() const;
const std::string& authSeqID() const { return impl().mAuthSeqID; } const std::string &authSeqID() const { return impl().mAuthSeqID; }
std::string pdbxAuthInsCode() const; std::string pdbxAuthInsCode() const;
std::string pdbxAuthAltID() const; std::string pdbxAuthAltID() const;
...@@ -225,7 +227,7 @@ class Atom ...@@ -225,7 +227,7 @@ class Atom
std::swap(mImpl, b.mImpl); std::swap(mImpl, b.mImpl);
} }
int compare(const Atom &b) const { return impl().compare(*b.mImpl); } int compare(const Atom &b) const { return impl().compare(*b.mImpl); }
bool operator<(const Atom &rhs) const bool operator<(const Atom &rhs) const
{ {
...@@ -487,31 +489,33 @@ class Polymer : public std::vector<Monomer> ...@@ -487,31 +489,33 @@ class Polymer : public std::vector<Monomer>
// file is a reference to the data stored in e.g. the cif file. // file is a reference to the data stored in e.g. the cif file.
// This object is not copyable. // This object is not copyable.
class File : public std::enable_shared_from_this<File> class File : public cif::File
{ {
public: public:
File(); File() {}
File(const std::filesystem::path &path);
File(const char *data, size_t length); // good luck trying to find out what it is...
~File();
File(const File &) = delete; File(const std::filesystem::path &path)
File &operator=(const File &) = delete; {
load(path);
cif::Datablock& createDatablock(const std::string_view name); }
void load(const std::filesystem::path &path); File(const char *data, size_t length)
void save(const std::filesystem::path &path); {
load(data, length);
}
Structure *model(size_t nr = 1); File(const File &) = delete;
File &operator=(const File &) = delete;
struct FileImpl &impl() const { return *mImpl; } void load(const std::filesystem::path &p) override;
void save(const std::filesystem::path &p) override;
cif::Datablock &data(); void load(std::istream &is) override;
cif::File &file();
using cif::File::save;
using cif::File::load;
private: cif::Datablock &data() { return front(); }
struct FileImpl *mImpl;
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
...@@ -531,14 +535,18 @@ inline bool operator&(StructureOpenOptions a, StructureOpenOptions b) ...@@ -531,14 +535,18 @@ inline bool operator&(StructureOpenOptions a, StructureOpenOptions b)
class Structure class Structure
{ {
public: public:
Structure(File &p, size_t modelNr = 1, StructureOpenOptions options = {}); Structure(File &p, size_t modelNr = 1, StructureOpenOptions options = {})
Structure &operator=(const Structure &) = delete; : Structure(p.firstDatablock(), modelNr, options)
~Structure(); {
}
Structure(cif::Datablock &db, size_t modelNr = 1, StructureOpenOptions options = {});
// Create a read-only clone of the current structure (for multithreaded calculations that move atoms) // Create a read-only clone of the current structure (for multithreaded calculations that move atoms)
Structure(const Structure &); Structure(const Structure &);
File &getFile() const; Structure &operator=(const Structure &) = delete;
~Structure();
const AtomView &atoms() const { return mAtoms; } const AtomView &atoms() const { return mAtoms; }
AtomView waters() const; AtomView waters() const;
...@@ -655,8 +663,15 @@ class Structure ...@@ -655,8 +663,15 @@ class Structure
void cleanupEmptyCategories(); void cleanupEmptyCategories();
/// \brief Direct access to underlying data /// \brief Direct access to underlying data
cif::Category &category(std::string_view name) const; cif::Category &category(std::string_view name) const
cif::Datablock &datablock() const; {
return mDb[name];
}
cif::Datablock &datablock() const
{
return mDb;
}
private: private:
friend Polymer; friend Polymer;
...@@ -671,7 +686,7 @@ class Structure ...@@ -671,7 +686,7 @@ class Structure
void loadAtomsForModel(StructureOpenOptions options); void loadAtomsForModel(StructureOpenOptions options);
File &mFile; cif::Datablock &mDb;
size_t mModelNr; size_t mModelNr;
AtomView mAtoms; AtomView mAtoms;
std::vector<size_t> mAtomIndex; std::vector<size_t> mAtomIndex;
......
...@@ -233,7 +233,7 @@ BondMap::BondMap(const Structure &p) ...@@ -233,7 +233,7 @@ BondMap::BondMap(const Structure &p)
link[b].insert(a); link[b].insert(a);
}; };
cif::Datablock &db = p.getFile().data(); cif::Datablock &db = p.datablock();
// collect all compounds first // collect all compounds first
std::set<std::string> compounds; std::set<std::string> compounds;
......
...@@ -2408,7 +2408,7 @@ namespace detail ...@@ -2408,7 +2408,7 @@ namespace detail
size_t writeValue(std::ostream &os, std::string value, size_t offset, size_t width) size_t writeValue(std::ostream &os, std::string value, size_t offset, size_t width)
{ {
if (value.find('\n') != std::string::npos or width == 0 or value.length() >= 132) // write as text field if (value.find('\n') != std::string::npos or width == 0 or value.length() > 132) // write as text field
{ {
ba::replace_all(value, "\n;", "\n\\;"); ba::replace_all(value, "\n;", "\n\\;");
...@@ -2511,7 +2511,7 @@ void Category::write(std::ostream &os, const std::vector<size_t> &order, bool in ...@@ -2511,7 +2511,7 @@ void Category::write(std::ostream &os, const std::vector<size_t> &order, bool in
if (not isUnquotedString(v->mText)) if (not isUnquotedString(v->mText))
l += 2; l += 2;
if (l >= 132) if (l > 132)
continue; continue;
if (columnWidths[v->mColumnIndex] < l + 1) if (columnWidths[v->mColumnIndex] < l + 1)
...@@ -2547,7 +2547,7 @@ void Category::write(std::ostream &os, const std::vector<size_t> &order, bool in ...@@ -2547,7 +2547,7 @@ void Category::write(std::ostream &os, const std::vector<size_t> &order, bool in
if (l < w) if (l < w)
l = w; l = w;
if (offset + l >= 132 and offset > 0) if (offset + l > 132 and offset > 0)
{ {
os << std::endl; os << std::endl;
offset = 0; offset = 0;
...@@ -2555,7 +2555,7 @@ void Category::write(std::ostream &os, const std::vector<size_t> &order, bool in ...@@ -2555,7 +2555,7 @@ void Category::write(std::ostream &os, const std::vector<size_t> &order, bool in
offset = detail::writeValue(os, s, offset, w); offset = detail::writeValue(os, s, offset, w);
if (offset >= 132) if (offset > 132)
{ {
os << std::endl; os << std::endl;
offset = 0; offset = 0;
...@@ -3367,6 +3367,11 @@ File::File(const std::filesystem::path &path, bool validate) ...@@ -3367,6 +3367,11 @@ File::File(const std::filesystem::path &path, bool validate)
} }
} }
File::File(const char *data, std::size_t length)
{
load(data, length);
}
File::File(File &&rhs) File::File(File &&rhs)
: mHead(nullptr) : mHead(nullptr)
, mValidator(nullptr) , mValidator(nullptr)
...@@ -3483,6 +3488,25 @@ void File::load(std::istream &is, const std::string &datablock) ...@@ -3483,6 +3488,25 @@ void File::load(std::istream &is, const std::string &datablock)
} }
} }
void File::load(const char *data, std::size_t length)
{
bool gzipped = length > 2 and data[0] == static_cast<char>(0x1f) and data[1] == static_cast<char>(0x8b);
struct membuf : public std::streambuf
{
membuf(char *data, size_t length) { this->setg(data, data, data + length); }
} buffer(const_cast<char *>(data), length);
std::istream is(&buffer);
io::filtering_stream<io::input> in;
if (gzipped)
in.push(io::gzip_decompressor());
in.push(is);
load(is);
}
void File::save(std::ostream &os) void File::save(std::ostream &os)
{ {
Datablock *e = mHead; Datablock *e = mHead;
...@@ -3523,6 +3547,21 @@ Datablock &File::operator[](std::string_view name) ...@@ -3523,6 +3547,21 @@ Datablock &File::operator[](std::string_view name)
return *result; return *result;
} }
Datablock &File::front()
{
assert(mHead);
return *mHead;
}
Datablock &File::back()
{
assert(mHead);
auto *block = mHead;
while (block->mNext != nullptr)
block = block->mNext;
return *block;
}
bool File::isValid() bool File::isValid()
{ {
if (mValidator == nullptr) if (mValidator == nullptr)
......
...@@ -301,7 +301,7 @@ std::tuple<double,Point> QuaternionToAngleAxis(Quaternion q) ...@@ -301,7 +301,7 @@ std::tuple<double,Point> QuaternionToAngleAxis(Quaternion q)
q = Normalize(q); q = Normalize(q);
// angle: // angle:
double angle = 2 * acos(q.R_component_1()); double angle = 2 * std::acos(q.R_component_1());
angle = angle * 180 / kPI; angle = angle * 180 / kPI;
// axis: // axis:
......
...@@ -535,7 +535,7 @@ double CalculateHBondEnergy(Res &inDonor, Res &inAcceptor) ...@@ -535,7 +535,7 @@ double CalculateHBondEnergy(Res &inDonor, Res &inAcceptor)
result = kCouplingConstant / distanceHO - kCouplingConstant / distanceHC + kCouplingConstant / distanceNC - kCouplingConstant / distanceNO; result = kCouplingConstant / distanceHO - kCouplingConstant / distanceHC + kCouplingConstant / distanceNC - kCouplingConstant / distanceNO;
// DSSP compatibility mode: // DSSP compatibility mode:
result = round(result * 1000) / 1000; result = std::round(result * 1000) / 1000;
if (result < kMinHBondEnergy) if (result < kMinHBondEnergy)
result = kMinHBondEnergy; result = kMinHBondEnergy;
...@@ -1230,7 +1230,7 @@ DSSPImpl::DSSPImpl(const Structure &s, int min_poly_proline_stretch_length) ...@@ -1230,7 +1230,7 @@ DSSPImpl::DSSPImpl(const Structure &s, int min_poly_proline_stretch_length)
void DSSPImpl::calculateSecondaryStructure() void DSSPImpl::calculateSecondaryStructure()
{ {
auto &db = mStructure.getFile().data(); auto &db = mStructure.datablock();
for (auto r : db["struct_conn"].find(cif::Key("conn_type_id") == "disulf")) for (auto r : db["struct_conn"].find(cif::Key("conn_type_id") == "disulf"))
{ {
std::string asym1, asym2; std::string asym1, asym2;
......
...@@ -50,157 +50,6 @@ namespace mmcif ...@@ -50,157 +50,6 @@ namespace mmcif
{ {
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// FileImpl
struct FileImpl
{
cif::File mData;
cif::Datablock *mDb = nullptr;
void load_data(const char *data, size_t length);
void load(const std::filesystem::path &path);
void save(const std::filesystem::path &path);
};
void FileImpl::load_data(const char *data, size_t length)
{
bool gzipped = length > 2 and data[0] == static_cast<char>(0x1f) and data[1] == static_cast<char>(0x8b);
try
{
// First try mmCIF
struct membuf : public std::streambuf
{
membuf(char *data, size_t length) { this->setg(data, data, data + length); }
} buffer(const_cast<char *>(data), length);
std::istream is(&buffer);
io::filtering_stream<io::input> in;
if (gzipped)
in.push(io::gzip_decompressor());
in.push(is);
mData.load(in);
}
catch (const cif::CifParserError &e)
{
// First try mmCIF
struct membuf : public std::streambuf
{
membuf(char *data, size_t length) { this->setg(data, data, data + length); }
} buffer(const_cast<char *>(data), length);
std::istream is(&buffer);
io::filtering_stream<io::input> in;
if (gzipped)
in.push(io::gzip_decompressor());
in.push(is);
ReadPDBFile(in, mData);
}
// Yes, we've parsed the data. Now locate the datablock.
mDb = &mData.firstDatablock();
// And validate, otherwise lots of functionality won't work
// if (mData.getValidator() == nullptr)
mData.loadDictionary("mmcif_pdbx_v50");
if (not mData.isValid() and cif::VERBOSE >= 0)
std::cerr << "Invalid mmCIF file" << (cif::VERBOSE > 0 ? "." : " use --verbose option to see errors") << std::endl;
}
void FileImpl::load(const std::filesystem::path &path)
{
std::ifstream inFile(path, std::ios_base::in | std::ios_base::binary);
if (not inFile.is_open())
throw std::runtime_error("No such file: " + path.string());
io::filtering_stream<io::input> in;
std::string ext = path.extension().string();
if (path.extension() == ".gz")
{
in.push(io::gzip_decompressor());
ext = path.stem().extension().string();
}
in.push(inFile);
try
{
// OK, we've got the file, now create a protein
if (ext == ".cif")
mData.load(in);
else if (ext == ".pdb" or ext == ".ent")
ReadPDBFile(in, mData);
else
{
try
{
if (cif::VERBOSE > 0)
std::cerr << "unrecognized file extension, trying cif" << std::endl;
mData.load(in);
}
catch (const cif::CifParserError &e)
{
if (cif::VERBOSE > 0)
std::cerr << "Not cif, trying plain old PDB" << std::endl;
// pffft...
in.reset();
if (inFile.is_open())
inFile.seekg(0);
else
inFile.open(path, std::ios_base::in | std::ios::binary);
if (path.extension() == ".gz")
in.push(io::gzip_decompressor());
in.push(inFile);
ReadPDBFile(in, mData);
}
}
}
catch (const std::exception &ex)
{
if (cif::VERBOSE >= 0)
std::cerr << "Error trying to load file " << path << std::endl;
throw;
}
// Yes, we've parsed the data. Now locate the datablock.
mDb = &mData.firstDatablock();
// And validate, otherwise lots of functionality won't work
// if (mData.getValidator() == nullptr)
mData.loadDictionary("mmcif_pdbx_v50");
if (not mData.isValid() and cif::VERBOSE >= 0)
std::cerr << "Invalid mmCIF file" << (cif::VERBOSE > 0 ? "." : " use --verbose option to see errors") << std::endl;
}
void FileImpl::save(const std::filesystem::path &path)
{
std::ofstream outFile(path, std::ios_base::out | std::ios_base::binary);
io::filtering_stream<io::output> out;
if (path.extension() == ".gz")
out.push(io::gzip_compressor());
out.push(outFile);
if (path.extension() == ".pdb")
WritePDBFile(out, mData);
else
mData.save(out);
}
// --------------------------------------------------------------------
// Atom // Atom
Atom::AtomImpl::AtomImpl(cif::Datablock &db, const std::string &id, cif::Row row) Atom::AtomImpl::AtomImpl(cif::Datablock &db, const std::string &id, cif::Row row)
...@@ -1131,10 +980,10 @@ bool Monomer::areBonded(const Monomer &a, const Monomer &b, float errorMargin) ...@@ -1131,10 +980,10 @@ bool Monomer::areBonded(const Monomer &a, const Monomer &b, float errorMargin)
auto distanceCACA = Distance(atoms[0], atoms[3]); auto distanceCACA = Distance(atoms[0], atoms[3]);
double omega = DihedralAngle(atoms[0], atoms[1], atoms[2], atoms[3]); double omega = DihedralAngle(atoms[0], atoms[1], atoms[2], atoms[3]);
bool cis = abs(omega) <= 30.0; bool cis = std::abs(omega) <= 30.0;
float maxCACADistance = cis ? 3.0f : 3.8f; float maxCACADistance = cis ? 3.0f : 3.8f;
result = abs(distanceCACA - maxCACADistance) < errorMargin; result = std::abs(distanceCACA - maxCACADistance) < errorMargin;
} }
catch (...) catch (...)
{ {
...@@ -1242,7 +1091,7 @@ int Polymer::Distance(const Monomer &a, const Monomer &b) const ...@@ -1242,7 +1091,7 @@ int Polymer::Distance(const Monomer &a, const Monomer &b) const
ixb = ix, ++f; ixb = ix, ++f;
if (f == 2) if (f == 2)
{ {
result = abs(ixa - ixb); result = std::abs(ixa - ixb);
break; break;
} }
} }
...@@ -1254,81 +1103,120 @@ int Polymer::Distance(const Monomer &a, const Monomer &b) const ...@@ -1254,81 +1103,120 @@ int Polymer::Distance(const Monomer &a, const Monomer &b) const
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// File // File
File::File() void File::load(const std::filesystem::path &path)
: mImpl(new FileImpl)
{ {
} std::ifstream inFile(path, std::ios_base::in | std::ios_base::binary);
if (not inFile.is_open())
throw std::runtime_error("No such file: " + path.string());
File::File(const char *data, size_t length) io::filtering_stream<io::input> in;
: mImpl(new FileImpl) std::string ext = path.extension().string();
{
mImpl->load_data(data, length);
}
File::File(const std::filesystem::path &File) if (path.extension() == ".gz")
: mImpl(new FileImpl) {
{ in.push(io::gzip_decompressor());
load(File); ext = path.stem().extension().string();
} }
File::~File() in.push(inFile);
{
delete mImpl;
}
cif::Datablock &File::createDatablock(const std::string_view name) try
{ {
auto db = new cif::Datablock(name); // OK, we've got the file, now create a protein
if (ext == ".cif")
load(in);
else if (ext == ".pdb" or ext == ".ent")
ReadPDBFile(in, *this);
else
{
try
{
if (cif::VERBOSE > 0)
std::cerr << "unrecognized file extension, trying cif" << std::endl;
mImpl->mData.append(db); cif::File::load(in);
mImpl->mDb = db; }
catch (const cif::CifParserError &e)
{
if (cif::VERBOSE > 0)
std::cerr << "Not cif, trying plain old PDB" << std::endl;
return *mImpl->mDb; // pffft...
} in.reset();
void File::load(const std::filesystem::path &p) if (inFile.is_open())
{ inFile.seekg(0);
mImpl->load(p); else
} inFile.open(path, std::ios_base::in | std::ios::binary);
void File::save(const std::filesystem::path &file) if (path.extension() == ".gz")
{ in.push(io::gzip_decompressor());
mImpl->save(file);
in.push(inFile);
ReadPDBFile(in, *this);
}
}
}
catch (const std::exception &ex)
{
if (cif::VERBOSE >= 0)
std::cerr << "Error trying to load file " << path << std::endl;
throw;
}
// validate, otherwise lots of functionality won't work
loadDictionary("mmcif_pdbx_v50");
if (not isValid() and cif::VERBOSE >= 0)
std::cerr << "Invalid mmCIF file" << (cif::VERBOSE > 0 ? "." : " use --verbose option to see errors") << std::endl;
} }
cif::Datablock &File::data() void File::load(std::istream &is)
{ {
assert(mImpl); try
assert(mImpl->mDb); {
cif::File::load(is);
if (mImpl == nullptr or mImpl->mDb == nullptr) }
throw std::runtime_error("No data loaded"); catch (const cif::CifParserError &e)
{
ReadPDBFile(is, *this);
}
return *mImpl->mDb; // validate, otherwise lots of functionality won't work
loadDictionary("mmcif_pdbx_v50");
if (not isValid() and cif::VERBOSE >= 0)
std::cerr << "Invalid mmCIF file" << (cif::VERBOSE > 0 ? "." : " use --verbose option to see errors") << std::endl;
} }
cif::File &File::file() void File::save(const std::filesystem::path &path)
{ {
assert(mImpl); fs::path file = path.filename();
std::ofstream outFile(path, std::ios_base::out | std::ios_base::binary);
io::filtering_stream<io::output> out;
if (file.extension() == ".gz")
{
out.push(io::gzip_compressor());
file.replace_extension("");
}
if (mImpl == nullptr) out.push(outFile);
throw std::runtime_error("No data loaded");
return mImpl->mData; if (file.extension() == ".pdb")
WritePDBFile(out, *this);
else
cif::File::save(out);
} }
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// Structure // Structure
Structure::Structure(File &f, size_t modelNr, StructureOpenOptions options) Structure::Structure(cif::Datablock &db, size_t modelNr, StructureOpenOptions options)
: mFile(f) : mDb(db)
, mModelNr(modelNr) , mModelNr(modelNr)
{ {
auto db = mFile.impl().mDb; auto &atomCat = db["atom_site"];
if (db == nullptr)
throw std::logic_error("Empty file!");
auto &atomCat = (*db)["atom_site"];
loadAtomsForModel(options); loadAtomsForModel(options);
...@@ -1357,8 +1245,8 @@ Structure::Structure(File &f, size_t modelNr, StructureOpenOptions options) ...@@ -1357,8 +1245,8 @@ Structure::Structure(File &f, size_t modelNr, StructureOpenOptions options)
void Structure::loadAtomsForModel(StructureOpenOptions options) void Structure::loadAtomsForModel(StructureOpenOptions options)
{ {
auto db = mFile.impl().mDb; auto &db = datablock();
auto &atomCat = (*db)["atom_site"]; auto &atomCat = db["atom_site"];
for (auto &a : atomCat) for (auto &a : atomCat)
{ {
...@@ -1373,12 +1261,12 @@ void Structure::loadAtomsForModel(StructureOpenOptions options) ...@@ -1373,12 +1261,12 @@ void Structure::loadAtomsForModel(StructureOpenOptions options)
if ((options bitand StructureOpenOptions::SkipHydrogen) and type_symbol == "H") if ((options bitand StructureOpenOptions::SkipHydrogen) and type_symbol == "H")
continue; continue;
mAtoms.emplace_back(std::make_shared<Atom::AtomImpl>(*db, id, a)); mAtoms.emplace_back(std::make_shared<Atom::AtomImpl>(db, id, a));
} }
} }
Structure::Structure(const Structure &s) Structure::Structure(const Structure &s)
: mFile(s.mFile) : mDb(s.mDb)
, mModelNr(s.mModelNr) , mModelNr(s.mModelNr)
{ {
mAtoms.reserve(s.mAtoms.size()); mAtoms.reserve(s.mAtoms.size());
...@@ -1677,21 +1565,10 @@ const Residue &Structure::getResidue(const mmcif::Atom &atom) const ...@@ -1677,21 +1565,10 @@ const Residue &Structure::getResidue(const mmcif::Atom &atom) const
return getResidue(atom.labelAsymID(), atom.labelCompID(), atom.labelSeqID()); return getResidue(atom.labelAsymID(), atom.labelCompID(), atom.labelSeqID());
} }
File &Structure::getFile() const
{
return mFile;
}
cif::Category &Structure::category(std::string_view name) const
{
auto &db = datablock();
return db[name];
}
std::tuple<char, int, char> Structure::MapLabelToAuth( std::tuple<char, int, char> Structure::MapLabelToAuth(
const std::string &asymID, int seqID) const const std::string &asymID, int seqID) const
{ {
auto &db = *getFile().impl().mDb; auto &db = datablock();
std::tuple<char, int, char> result; std::tuple<char, int, char> result;
bool found = false; bool found = false;
...@@ -1841,11 +1718,6 @@ std::tuple<std::string, int, std::string> Structure::MapPDBToLabel(const std::st ...@@ -1841,11 +1718,6 @@ std::tuple<std::string, int, std::string> Structure::MapPDBToLabel(const std::st
return result; return result;
} }
cif::Datablock &Structure::datablock() const
{
return *mFile.impl().mDb;
}
std::string Structure::insertCompound(const std::string &compoundID, bool isEntity) std::string Structure::insertCompound(const std::string &compoundID, bool isEntity)
{ {
using namespace cif::literals; using namespace cif::literals;
...@@ -1854,7 +1726,7 @@ std::string Structure::insertCompound(const std::string &compoundID, bool isEnti ...@@ -1854,7 +1726,7 @@ std::string Structure::insertCompound(const std::string &compoundID, bool isEnti
if (compound == nullptr) if (compound == nullptr)
throw std::runtime_error("Trying to insert unknown compound " + compoundID + " (not found in CCD)"); throw std::runtime_error("Trying to insert unknown compound " + compoundID + " (not found in CCD)");
cif::Datablock &db = *mFile.impl().mDb; cif::Datablock &db = datablock();
auto &chemComp = db["chem_comp"]; auto &chemComp = db["chem_comp"];
auto r = chemComp.find(cif::Key("id") == compoundID); auto r = chemComp.find(cif::Key("id") == compoundID);
...@@ -1899,7 +1771,7 @@ std::string Structure::insertCompound(const std::string &compoundID, bool isEnti ...@@ -1899,7 +1771,7 @@ std::string Structure::insertCompound(const std::string &compoundID, bool isEnti
void Structure::removeAtom(Atom &a) void Structure::removeAtom(Atom &a)
{ {
cif::Datablock &db = *mFile.impl().mDb; cif::Datablock &db = datablock();
auto &atomSites = db["atom_site"]; auto &atomSites = db["atom_site"];
...@@ -1922,12 +1794,11 @@ void Structure::removeAtom(Atom &a) ...@@ -1922,12 +1794,11 @@ void Structure::removeAtom(Atom &a)
void Structure::removeResidue(const std::string &asym_id, int seq_id) void Structure::removeResidue(const std::string &asym_id, int seq_id)
{ {
} }
void Structure::swapAtoms(Atom &a1, Atom &a2) void Structure::swapAtoms(Atom &a1, Atom &a2)
{ {
cif::Datablock &db = *mFile.impl().mDb; cif::Datablock &db = datablock();
auto &atomSites = db["atom_site"]; auto &atomSites = db["atom_site"];
auto rs1 = atomSites.find(cif::Key("id") == a1.id()); auto rs1 = atomSites.find(cif::Key("id") == a1.id());
...@@ -1963,7 +1834,7 @@ void Structure::changeResidue(Residue &res, const std::string &newCompound, ...@@ -1963,7 +1834,7 @@ void Structure::changeResidue(Residue &res, const std::string &newCompound,
{ {
using namespace cif::literals; using namespace cif::literals;
cif::Datablock &db = *mFile.impl().mDb; cif::Datablock &db = datablock();
std::string asymID = res.asymID(); std::string asymID = res.asymID();
const auto compound = CompoundFactory::instance().create(newCompound); const auto compound = CompoundFactory::instance().create(newCompound);
...@@ -2073,7 +1944,7 @@ std::string Structure::createNonpoly(const std::string &entity_id, const std::ve ...@@ -2073,7 +1944,7 @@ std::string Structure::createNonpoly(const std::string &entity_id, const std::ve
{ {
using namespace cif::literals; using namespace cif::literals;
cif::Datablock &db = *mFile.impl().mDb; cif::Datablock &db = datablock();
auto &struct_asym = db["struct_asym"]; auto &struct_asym = db["struct_asym"];
std::string asym_id = struct_asym.getUniqueID(); std::string asym_id = struct_asym.getUniqueID();
...@@ -2093,8 +1964,7 @@ std::string Structure::createNonpoly(const std::string &entity_id, const std::ve ...@@ -2093,8 +1964,7 @@ std::string Structure::createNonpoly(const std::string &entity_id, const std::ve
{ {
auto atom_id = atom_site.getUniqueID(""); auto atom_id = atom_site.getUniqueID("");
auto &&[row, inserted] = atom_site.emplace({ auto &&[row, inserted] = atom_site.emplace({{"group_PDB", atom.get_property<std::string>("group_PDB")},
{"group_PDB", atom.get_property<std::string>("group_PDB")},
{"id", atom_id}, {"id", atom_id},
{"type_symbol", atom.get_property<std::string>("type_symbol")}, {"type_symbol", atom.get_property<std::string>("type_symbol")},
{"label_atom_id", atom.get_property<std::string>("label_atom_id")}, {"label_atom_id", atom.get_property<std::string>("label_atom_id")},
...@@ -2127,7 +1997,7 @@ std::string Structure::createNonpoly(const std::string &entity_id, std::vector<s ...@@ -2127,7 +1997,7 @@ std::string Structure::createNonpoly(const std::string &entity_id, std::vector<s
{ {
using namespace cif::literals; using namespace cif::literals;
cif::Datablock &db = *mFile.impl().mDb; cif::Datablock &db = datablock();
auto &struct_asym = db["struct_asym"]; auto &struct_asym = db["struct_asym"];
std::string asym_id = struct_asym.getUniqueID(); std::string asym_id = struct_asym.getUniqueID();
...@@ -2143,23 +2013,27 @@ std::string Structure::createNonpoly(const std::string &entity_id, std::vector<s ...@@ -2143,23 +2013,27 @@ std::string Structure::createNonpoly(const std::string &entity_id, std::vector<s
auto &res = mNonPolymers.emplace_back(*this, comp_id, asym_id); auto &res = mNonPolymers.emplace_back(*this, comp_id, asym_id);
auto appendUnlessSet = [](std::vector<cif::Item> &ai, cif::Item &&i)
{
if (find_if(ai.begin(), ai.end(), [name = i.name()](cif::Item &ci) { return ci.name() == name; }) == ai.end())
ai.emplace_back(std::move(i));
};
for (auto &atom : atom_info) for (auto &atom : atom_info)
{ {
auto atom_id = atom_site.getUniqueID(""); auto atom_id = atom_site.getUniqueID("");
atom.insert(atom.end(), { appendUnlessSet(atom, { "group_PDB", "HETATM"} );
{"group_PDB", "HETATM"}, appendUnlessSet(atom, { "id", atom_id} );
{"id", atom_id}, appendUnlessSet(atom, { "label_comp_id", comp_id} );
{"label_comp_id", comp_id}, appendUnlessSet(atom, { "label_asym_id", asym_id} );
{"label_asym_id", asym_id}, appendUnlessSet(atom, { "label_seq_id", ""} );
{"label_seq_id", ""}, appendUnlessSet(atom, { "label_entity_id", entity_id} );
{"label_entity_id", entity_id}, appendUnlessSet(atom, { "auth_comp_id", comp_id} );
{"auth_comp_id", comp_id}, appendUnlessSet(atom, { "auth_asym_id", asym_id} );
{"auth_asym_id", asym_id}, appendUnlessSet(atom, { "auth_seq_id", ""} );
{"auth_seq_id", ""}, appendUnlessSet(atom, { "pdbx_PDB_model_num", 1} );
{"pdbx_PDB_model_num", 1}, appendUnlessSet(atom, { "label_alt_id", ""} );
{"label_alt_id", ""}
});
auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end()); auto &&[row, inserted] = atom_site.emplace(atom.begin(), atom.end());
...@@ -2174,7 +2048,7 @@ void Structure::cleanupEmptyCategories() ...@@ -2174,7 +2048,7 @@ void Structure::cleanupEmptyCategories()
{ {
using namespace cif::literals; using namespace cif::literals;
cif::Datablock &db = *mFile.impl().mDb; cif::Datablock &db = datablock();
auto &atomSite = db["atom_site"]; auto &atomSite = db["atom_site"];
......
...@@ -37,7 +37,7 @@ int main(int argc, char* argv[]) ...@@ -37,7 +37,7 @@ int main(int argc, char* argv[])
structure.cleanupEmptyCategories(); structure.cleanupEmptyCategories();
f.file().save(std::cout); f.save(std::cout);
} }
catch (const std::exception& e) catch (const std::exception& e)
{ {
......
...@@ -78,8 +78,8 @@ BOOST_AUTO_TEST_CASE(create_nonpoly_1) ...@@ -78,8 +78,8 @@ BOOST_AUTO_TEST_CASE(create_nonpoly_1)
cif::VERBOSE = 1; cif::VERBOSE = 1;
mmcif::File file; mmcif::File file;
file.file().loadDictionary("mmcif_pdbx_v50.dic"); file.loadDictionary("mmcif_pdbx_v50.dic");
file.createDatablock("TEST"); // create a datablock file.emplace("TEST"); // create a datablock
mmcif::Structure structure(file); mmcif::Structure structure(file);
...@@ -171,12 +171,12 @@ _struct_asym.details ? ...@@ -171,12 +171,12 @@ _struct_asym.details ?
expected.loadDictionary("mmcif_pdbx_v50.dic"); expected.loadDictionary("mmcif_pdbx_v50.dic");
if (not (expected.firstDatablock() == structure.getFile().data())) if (not (expected.firstDatablock() == structure.datablock()))
{ {
BOOST_TEST(false); BOOST_TEST(false);
std::cout << expected.firstDatablock() << std::endl std::cout << expected.firstDatablock() << std::endl
<< std::endl << std::endl
<< structure.getFile().data() << std::endl; << structure.datablock() << std::endl;
} }
} }
......
...@@ -1626,7 +1626,7 @@ PRO OXT HXT SING N N 17 ...@@ -1626,7 +1626,7 @@ PRO OXT HXT SING N N 17
mmcif::File file(example.string()); mmcif::File file(example.string());
mmcif::Structure structure(file); mmcif::Structure structure(file);
(void)file.file().isValid(); (void)file.isValid();
mmcif::BondMap bm(structure); mmcif::BondMap bm(structure);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment