Commit fd638cbd by root

init

parents
# Object files
*.o
*.ko
*.obj
*.elf
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
# Debug files
*.dSYM/
*.su
# The build dir
build/*
# Generated folders
docs/html/*
examples/out/*
examples/out_json_ref/*
[submodule "Catch2"]
path = submodules/Catch2
url = https://github.com/catchorg/Catch2
[submodule "msgpack-c"]
path = submodules/msgpack-c
url = https://github.com/msgpack/msgpack-c
[submodule "mmtf_spec"]
path = submodules/mmtf_spec
url = https://github.com/rcsb/mmtf
language: cpp
sudo: false
dist: trusty
linux64_addons:
addons: &linux64
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-4.8
linux32_addons:
addons: &linux32
apt:
sources:
- ubuntu-toolchain-r-test
packages:
- g++-4.8
- g++-4.8-multilib
- linux-libc-dev:i386
- libc6-dev-i386
linux64_cpp17addons:
addons: &linux64cpp17
apt:
sources:
- ubuntu-toolchain-r-test
# Set empty values for allow_failures to work
env: TEST_COMMAND=$TRAVIS_BUILD_DIR/ci/build_and_run_tests.sh
matrix:
fast_finish: true
include:
- os: linux
env: EMSCRIPTEN=ON TEST_COMMAND=$TRAVIS_BUILD_DIR/ci/build_and_run_tests.sh
addons: *linux64
- os: linux
compiler: clang
addons: *linux64
- os: linux
compiler: gcc
env: ARCH=x86 CMAKE_EXTRA=-DHAVE_LIBM=/lib32/libm.so.6 TEST_COMMAND=$TRAVIS_BUILD_DIR/ci/build_and_run_tests.sh
addons: *linux32
- os: osx
compiler: clang
- os: linux
compiler: gcc
env: CMAKE_EXTRA="-DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS='-march=native'" TEST_COMMAND=$TRAVIS_BUILD_DIR/ci/build_and_run_tests.sh
addons: *linux64cpp17
dist: bionic
- os: linux
compiler: gcc
addons: *linux64cpp17
dist: bionic
before_install:
# Setting environement
- cd $TRAVIS_BUILD_DIR
- source ci/setup-travis.sh
- $CC --version
- $CXX --version
script:
- echo $TEST_COMMAND
- (eval "$TEST_COMMAND")
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/),
and this project adheres to [Semantic Versioning](https://semver.org/).
## [Unreleased]
## v1.1.0 - 2022-10-03
### Added
- New mapDecoderFrom.. functions to decode only part of an MMTF file
- Support for extra fields in MMTF files according to the
[latest MMTF specification](https://github.com/rcsb/mmtf/pull/36).
- Support for binary strategy 16 (Run-length encoded 8-bit array),
bondResonanceList field and optional groupType.bondAtomList &
groupType.bondOrderList according to the proposed version 1.1 of the
[MMTF specification](https://github.com/rcsb/mmtf/pull/35).
- New methods to find polymer chains and HETATM following discussions in
[rcsb/mmtf#28](https://github.com/rcsb/mmtf/issues/28).
- Altered submodule locations [rcsb/mmtf-cpp#37](https://github.com/rcsb/mmtf-cpp/pull/37)
from the base directory to the new submodules directory.
## v1.0.0 - 2019-02-05
### Added
- Initial release including decoder and encoder for the
[MMTF specification 1.0](https://github.com/rcsb/mmtf/blob/v1.0/spec.md).
[Unreleased]: https://github.com/rcsb/mmtf-cpp/compare/v1.1.0...HEAD
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
project(mmtf-cpp VERSION 1.0.0 LANGUAGES CXX)
option(mmtf_build_local "Use the submodule dependencies for building" OFF)
option(mmtf_build_examples "Build the examples" OFF)
add_library(MMTFcpp INTERFACE)
target_compile_features(MMTFcpp INTERFACE cxx_auto_type)
target_include_directories(MMTFcpp INTERFACE
${CMAKE_CURRENT_SOURCE_DIR}/include
)
if (mmtf_build_local)
# use header only
set(MSGPACKC_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/submodules/msgpack-c/include)
add_library(msgpackc INTERFACE)
target_include_directories(msgpackc INTERFACE ${MSGPACKC_INCLUDE_DIR})
if (BUILD_TESTS)
set(CATCH_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/submodules/Catch2/single_include)
add_library(Catch INTERFACE)
target_include_directories(Catch INTERFACE ${CATCH_INCLUDE_DIR})
endif()
endif()
if (NOT TARGET msgpackc)
find_package(msgpack)
endif()
target_link_libraries(MMTFcpp INTERFACE msgpackc)
if (BUILD_TESTS)
enable_testing()
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tests)
endif()
if (mmtf_build_examples)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/examples)
endif()
install(
DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/
DESTINATION "include"
FILES_MATCHING PATTERN "*.hpp")
The MIT license applies to all files unless otherwise noted.
Copyright 2017, University of California San Diego
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
<!--- Batch image URLs generated at https://shields.io -->
[![Release](https://img.shields.io/github/v/release/rcsb/mmtf-cpp)](https://github.com/rcsb/mmtf-cpp/releases)
[![License](https://img.shields.io/github/license/rcsb/mmtf-cpp)](https://github.com/rcsb/mmtf-cpp/blob/master/LICENSE)
[![Build Status (Travis)](https://img.shields.io/travis/com/rcsb/mmtf-cpp/master)](https://app.travis-ci.com/github/rcsb/mmtf-cpp)
The <b>m</b>acro<b>m</b>olecular <b>t</b>ransmission <b>f</b>ormat
([MMTF](http://mmtf.rcsb.org)) is a binary encoding of biological structures.
This repository holds the C++-03 compatible API, encoding and decoding
libraries. The MMTF specification can be found
[here](https://github.com/rcsb/mmtf/blob/HEAD/spec.md/).
## Prerequisites for using MMTF in C++
You need the headers of the MessagePack C++ library (version 2.1.5 or newer).
If you do not have them on your machine already, you can download the "include"
directory from the
[MessagePack GitHub repository](https://github.com/msgpack/msgpack-c).
## How to use MMTF
You only need to include the mmtf.hpp header in your code to use MMTF.
For instance a minimal example to load an MMTF file looks as follows:
```C
#include <mmtf.hpp>
int main(int argc, char** argv) {
mmtf::StructureData data;
mmtf::decodeFromFile(data, "test.mmtf");
return 0;
}
```
The C++ MMTF library is header only so you do not need to compile it. If you
have a source file named `demo.cpp` (e.g. including the code above), you can
generate an executable `demo.exe` as follows:
```bash
g++ -I<MSGPACK_INCLUDE_PATH> -I<MMTF_INCLUDE_PATH> demo.cpp -o demo.exe
```
Here, `<MSGPACK_INCLUDE_PATH>` and `<MMTF_INCLUDE_PATH>` are the paths to the
"include" directories of MessagePack and this library respectively.
For your more complicated projects, a `CMakeLists.txt` is included for you.
## Installation
You can also perform a system wide installation with `cmake` and `ninja` (or `make`).
To do so:
```bash
mkdir build
cd build
cmake -G Ninja ..
sudo ninja install
```
`cmake` automatically sets the installation directory to `/usr/local/include`, if you want to install it to another `*include/` directory
run `cmake` with the command:
```bash
cmake -G Ninja -DCMAKE_INSTALL_PREFIX=/home/me/local ..
```
Be aware that `/include` is added to the end of `DCMAKE_INSTALL_PREFIX` and that is where your files are installed (i.e. the above would install at `/home/me/local/include/`).
## Examples and tests
To build the tests + examples we recommend using the following lines:
```bash
# download Catch2 testing framework, msgpack-c, and the mmtf-spec test-dataset
git submodule update --init --recursive
mkdir build
cd build
cmake -G Ninja -DBUILD_TESTS=ON -Dmmtf_build_local=ON -Dmmtf_build_examples=ON ..
ninja
chmod +x ./tests/mmtf_tests
./tests/mmtf_tests
```
Example codes:
- mmtf_demo.cpp: Loads an MMTF file and checks internal consistency using
mmtf::StructureData::hasConsistentData.
```bash
./examples/mmtf_demo ../submodules/mmtf_spec/test-suite/mmtf/173D.mmtf
```
- traverse.cpp: Loads an MMTF file and dumps it in human-readable forms.
```bash
./examples/traverse ../submodules/mmtf_spec/test-suite/mmtf/173D.mmtf
./examples/traverse ../submodules/mmtf_spec/test-suite/mmtf/173D.mmtf json
./examples/traverse ../submodules/mmtf_spec/test-suite/mmtf/173D.mmtf print
```
- print_as_pdb.cpp: Loads an MMTF file and prints it in pdb format.
```bash
./examples/print_as_pdb ../submodules/mmtf_spec/test-suite/mmtf/173D.mmtf
```
## Benchmark
Using the following simple code:
```cpp
#include <mmtf.hpp>
int main(int argc, char** argv)
{
for (int i=1; i<argc; ++i) {
mmtf::StructureData sd;
mmtf::decodeFromFile(sd, argv[i]);
}
}
```
compiled via:
```
g++ -Ofast -Immtf/include -Imsgpack/include decode_all_mmtf.cpp
```
We are able to load 153,987 mmtf files (current size of the pdb) or 14.3GB from an SSD in 211.3 seconds (averaged over 4 runs with minimal differences between the runs).
## Code documentation
You can generate a [doxygen](http://www.doxygen.org) based documentation of the
library by calling `doxygen` in the docs folder. You will need doxygen 1.8.11 or
later for that. Open `docs/html/index.html` to see the generated documentation.
version: "{build}"
os: Visual Studio 2015
environment:
matrix:
- generator: MinGW Makefiles
CXX_PATH: 'C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin'
ARCH: x64
- generator: MinGW Makefiles
CXX_PATH: 'C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin'
ARCH: x86
- generator: Visual Studio 14 2015 Win64
ARCH: x64
- generator: Visual Studio 14 2015
ARCH: x86
clone_folder: c:\mmtf-cpp
# Uncomment the following lines to enable remote desktop access to Appveyor
# after a failed build.
# init:
# - ps: iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
# on_failure:
# - ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
install:
- git submodule update --init --recursive
- ps: . .\ci\setup-appveyor.ps1
build_script:
- cd C:\mmtf-cpp
- mkdir build
- cd build
- ps: echo $env:CMAKE_ARGUMENTS
- cmake %CMAKE_ARGUMENTS% ..
- cmake --build . --config Debug -- %BUILD_ARGUMENTS%
test_script:
- ctest --build-config Debug --timeout 300 --output-on-failure
set -e
cd $TRAVIS_BUILD_DIR
mkdir build && cd build
$CMAKE_CONFIGURE cmake $CMAKE_ARGS $CMAKE_EXTRA ..
make -j2
ctest -j2 --output-on-failure
bash $TRAVIS_BUILD_DIR/ci/travis-test-example.sh
cd $TRAVIS_BUILD_DIR
if ("$env:CXX_PATH" -ne "") {
$env:PATH += ";$env:CXX_PATH"
}
$env:CMAKE_ARGUMENTS = "-G `"$env:generator`""
$env:CMAKE_ARGUMENTS += " -Dmmtf_build_local=ON"
$env:CMAKE_ARGUMENTS += " -DBUILD_TESTS=ON"
if ($env:generator -Match "Visual Studio") {
$env:BUILD_ARGUMENTS="/verbosity:minimal /m:2"
} else {
$env:BUILD_ARGUMENTS="-j2"
}
if ($env:generator -eq "MinGW Makefiles") {
# Remove sh.exe from git in the PATH for MinGW to work
$env:PATH = ($env:PATH.Split(';') | Where-Object { $_ -ne 'C:\Program Files\Git\usr\bin' }) -join ';'
}
#!/bin/bash
export CMAKE_ARGS="-DCMAKE_BUILD_TYPE=debug -DBUILD_TESTS=ON -Dmmtf_build_local=ON"
if [[ "$EMSCRIPTEN" == "ON" ]]; then
# Install a Travis compatible emscripten SDK
wget https://github.com/chemfiles/emscripten-sdk/archive/master.tar.gz
tar xf master.tar.gz
./emscripten-sdk-master/emsdk activate
source ./emscripten-sdk-master/emsdk_env.sh
export CMAKE_CONFIGURE='emcmake'
export CMAKE_ARGS="$CMAKE_ARGS -DTEST_RUNNER=node -DCMAKE_BUILD_TYPE=release"
# Install a modern cmake
cd $HOME
wget https://cmake.org/files/v3.9/cmake-3.9.3-Linux-x86_64.tar.gz
tar xf cmake-3.9.3-Linux-x86_64.tar.gz
export PATH=$HOME/cmake-3.9.3-Linux-x86_64/bin:$PATH
export CC=emcc
export CXX=em++
return
fi
if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
if [[ "$TRAVIS_DIST" == "trusty" ]]; then
if [[ "$CC" == "gcc" ]]; then
export CC=gcc-4.8
export CXX=g++-4.8
fi
fi
if [[ "$TRAVIS_DIST" == "bionic" ]]; then
if [[ "$CC" == "gcc" ]]; then
export CC=gcc-7
export CXX=g++-7
fi
fi
fi
if [[ "$ARCH" == "x86" ]]; then
export CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_CXX_FLAGS=-m32 -DCMAKE_C_FLAGS=-m32"
fi
#!/bin/bash
# Test compilation of example code using C++03 if possible
# -> only expected to work in Linux or Mac with CXX set to g++ or clang++
# -> expects TRAVIS_BUILD_DIR, EMSCRIPTEN, CXX to be set
# abort on error and exit with proper exit code
set -e
# test example
cd $TRAVIS_BUILD_DIR/examples
if [ -z "$EMSCRIPTEN" ]; then
# Compile with C++03 forced
$CXX -I"../submodules/msgpack-c/include" -I"../include" -std=c++03 -O2 \
-o read_and_write read_and_write.cpp
./read_and_write ../submodules/mmtf_spec/test-suite/mmtf/3NJW.mmtf test.mmtf
else
# Cannot do C++03 here and need to embed input file for running it with node
cp ../submodules/mmtf_spec/test-suite/mmtf/3NJW.mmtf .
$CXX -I"../submodules/msgpack-c/include" -I"../include" -O2 \
-o read_and_write.js read_and_write.cpp --embed-file 3NJW.mmtf
node read_and_write.js 3NJW.mmtf test.mmtf
fi
This source diff could not be displayed because it is too large. You can view the blob instead.
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
SET(executables mmtf_demo traverse print_as_pdb tableexport read_and_write)
foreach(exe ${executables})
add_executable(${exe} ${exe}.cpp)
target_compile_features(${exe} PRIVATE cxx_auto_type)
if(WIN32)
target_link_libraries(${exe} MMTFcpp ws2_32)
else()
target_link_libraries(${exe} MMTFcpp)
endif()
endforeach(exe)
#!/bin/bash
if [ "$3" == "" ]; then
echo "USAGE: compile_and_run.sh <MSGPACK_INCLUDE_PATH> <MAIN_FILE> <MMTF_FILE>"
echo "-> for MAIN_FILE = test.cpp, we compile and run test.exe <MMTF_FILE>"
exit 1
fi
MAIN_FILENAME="${2%.*}"
./compile_target.sh $1 $MAIN_FILENAME.cpp &&
./$MAIN_FILENAME.exe $3
#!/bin/bash
if [ "$3" == "" ]; then
echo "USAGE: compile_read_and_write.sh <MSGPACK_INCLUDE_PATH> <MMTF_FILE_IN> <MMTF_FILE_OUT>"
echo "-> read <MMTF_FILE_IN> and write as <MMTF_FILE_OUT>"
exit 1
fi
./compile_target.sh $1 read_and_write.cpp &&
./read_and_write.exe $2 $3
#!/bin/bash
if [ "$2" == "" ]; then
echo "USAGE: compile_target.sh <MSGPACK_INCLUDE_PATH> <MAIN_FILE>"
echo "-> for MAIN_FILE = test.cpp, this produces executable test.exe"
exit 1
fi
MAIN_FILENAME="${2%.*}"
g++ -I$1 -I"../include" -O2 -o $MAIN_FILENAME.exe $MAIN_FILENAME.cpp
#include <mmtf.hpp>
#include <iostream>
#include <string>
int main(int argc, char** argv) {
// check arguments
if (argc < 2) {
printf("USAGE: mmtf_demo <mmtffile>\n");
return 1;
}
// decode MMTF file
std::string filename(argv[1]);
mmtf::StructureData data;
mmtf::decodeFromFile(data, filename);
// check if the data is self-consistent
if (data.hasConsistentData(true)) {
std::cout << "Successfully read " << filename << ".\n";
return 0;
} else {
std::cout << "Inconsistent data in " << filename << ".\n";
return 1;
}
}
// *************************************************************************
//
// Licensed under the MIT License (see accompanying LICENSE file).
//
// The authors of this code is Daniel Farrell
//
// NOTE: This wasn't made to be a well tested error-free executable... More
// of an example of how you might iterate over an mmtf file in pdb style.
// Please DO NOT USE THIS IN PRODUCTION.
// *************************************************************************
#include <mmtf.hpp>
#include <iostream>
#include <string>
std::string print_sd_as_pdb(mmtf::StructureData const & sd, bool index_at_0) {
std::ostringstream out;
int modelIndex = 0;
int chainIndex = 0;
int groupIndex = 0;
int atomIndex = 0;
//# traverse models
for (int i = 0; i < sd.numModels; i++, modelIndex++) {
// # traverse chains
for (int j = 0; j < sd.chainsPerModel[modelIndex]; j++, chainIndex++) {
// # traverse groups
for (int k = 0; k < sd.groupsPerChain[chainIndex]; k++, groupIndex++) {
const mmtf::GroupType& group =
sd.groupList[sd.groupTypeList[groupIndex]];
int groupAtomCount = group.atomNameList.size();
for (int l = 0; l < groupAtomCount; l++, atomIndex++) {
// ATOM or HETATM
if (mmtf::is_hetatm(chainIndex, sd.entityList, group))
out << "HETATM";
else
out << "ATOM ";
// Atom serial
if (index_at_0 || mmtf::isDefaultValue(sd.atomIdList)) {
out << std::setfill(' ') << std::internal << std::setw(5) <<
std::right << atomIndex+1;
} else {
out << std::setfill(' ') << std::internal << std::setw(5) <<
std::right << sd.atomIdList[atomIndex] << " ";
}
// Atom name
out << std::left << std::setw(4) << std::setfill(' ') << group.atomNameList[l];
// Group name
out << group.groupName << " ";
// Chain
out << sd.chainIdList[chainIndex];
// Group serial
out << std::setfill(' ') << std::right << std::setw(4) << sd.groupIdList[groupIndex];
out << " ";
// x, y, z
out << std::setfill(' ') << std::fixed << std::setprecision(3) << std::setw(8) << std::right;
out << sd.xCoordList[atomIndex];
out << std::setfill(' ') << std::fixed << std::setprecision(3) << std::setw(8) << std::right;
out << sd.yCoordList[atomIndex];
out << std::setfill(' ') << std::fixed << std::setprecision(3) << std::setw(8) << std::right;
out << sd.zCoordList[atomIndex];
// Occupancy
out << std::setfill(' ') << std::fixed << std::setprecision(2) << std::setw(6) << std::right;
if ( !mmtf::isDefaultValue(sd.occupancyList) ) {
out << sd.occupancyList[atomIndex];
} else out << " ";
out << " ";
// Element
out << std::right << std::setw(2) << std::setfill(' ') << group.elementList[l] << " \n";
}
}
}
}
out << "END";
return out.str();
}
int main(int argc, char** argv) {
// check arguments
if (argc != 2) {
std::cout << "USAGE: ./print_sd_as_pdb <in mmtf file>" << std::endl;
return 1;
}
mmtf::StructureData d;
mmtf::decodeFromFile(d, argv[1]);
std::cout << print_sd_as_pdb(d, false) << std::endl;
}
// *************************************************************************
//
// Licensed under the MIT License (see accompanying LICENSE file).
//
// The authors of this code is Daniel Farrell
//
// *************************************************************************
#include <mmtf.hpp>
int main(int argc, char** argv) {
// check arguments
if (argc != 3) {
printf("USAGE: read_and_write <in mmtf file> <out mmtf file>\n");
return 1;
}
// decode->encode->decode
mmtf::StructureData d;
mmtf::decodeFromFile(d, argv[1]);
mmtf::StructureData example(d);
mmtf::encodeToFile(example, argv[2]);
mmtf::StructureData d2;
mmtf::decodeFromFile(d2, argv[2]);
if (d == d2) {
std::cout << "Found read+write yielded equal mmtf files" << std::endl;
return 0;
}
else {
std::cout << "Found read+write yielded NON-equal mmtf files" << std::endl;
return 1;
}
}
// *************************************************************************
//
// Licensed under the MIT License (see accompanying LICENSE file).
//
// The authors of this code is Thomas Holder
//
// *************************************************************************
//
// This example demonstrates how to export molecular data which is given
// as atom and bond tables.
//
// *************************************************************************
#include <mmtf.hpp>
#include <mmtf/export_helpers.hpp>
#include <iostream>
#include <string>
#define ARRAYEND(a) ((a) + sizeof(a) / sizeof(a)[0])
/*
* Atom table of example "ALA-GLY-ALA" tripeptide
*/
struct Atom {
std::string chain;
int residue_number;
std::string residue_name;
std::string atom_name;
std::string element;
int formal_charge;
float x, y, z;
} atomarray[] = {
{"A", 1, "ALA", "N", "N", 0, -0.677, -1.230, -0.491},
{"A", 1, "ALA", "CA", "C", 0, -0.001, 0.064, -0.491},
{"A", 1, "ALA", "C", "C", 0, 1.499, -0.110, -0.491},
{"A", 1, "ALA", "O", "O", 0, 2.030, -1.227, -0.502},
{"A", 1, "ALA", "CB", "C", 0, -0.509, 0.856, 0.727},
{"A", 2, "GLY", "N", "N", 0, 2.250, 0.939, -0.479},
{"A", 2, "GLY", "CA", "C", 0, 3.700, 0.771, -0.479},
{"A", 2, "GLY", "C", "C", 0, 4.400, 2.108, -0.463},
{"A", 2, "GLY", "O", "O", 0, 3.775, 3.173, -0.453},
{"A", 3, "ALA", "N", "N", 0, 5.689, 2.140, -0.462},
{"A", 3, "ALA", "CA", "C", 0, 6.365, 3.434, -0.447},
{"A", 3, "ALA", "C", "C", 0, 7.865, 3.260, -0.447},
{"A", 3, "ALA", "O", "O", 0, 8.396, 2.144, -0.470},
{"A", 3, "ALA", "CB", "C", 0, 5.855, 4.213, 0.778}
};
/*
* Bond table for "ALA-GLY-ALA" tripeptide
*/
struct Bond {
size_t atom1;
size_t atom2;
int8_t order;
} bondarray[] = {
{0, 1, 1},
{1, 2, 1},
{1, 4, 1},
{2, 3, 2},
{2, 5, 1},
{5, 6, 1},
{6, 7, 1},
{7, 8, 2},
{7, 9, 1},
{9, 10, 1},
{10, 11, 1},
{10, 13, 1},
{11, 12, 2}
};
int main(int argc, char** argv)
{
mmtf::StructureData data;
mmtf::GroupType* residue = NULL;
const Atom* prevatom = NULL;
// start new model
data.chainsPerModel.push_back(0);
// add atoms
for (const Atom* atom = atomarray; atom != ARRAYEND(atomarray); ++atom) {
data.xCoordList.push_back(atom->x);
data.yCoordList.push_back(atom->y);
data.zCoordList.push_back(atom->z);
bool is_same_residue = false;
bool is_same_chain = prevatom && prevatom->chain == atom->chain;
if (!is_same_chain) {
data.chainsPerModel.back() += 1;
data.groupsPerChain.push_back(0); // increment with every group
data.chainIdList.push_back(atom->chain);
} else {
is_same_residue = prevatom && prevatom->residue_number == atom->residue_number;
}
if (!is_same_residue) {
data.groupsPerChain.back() += 1;
data.groupTypeList.push_back(data.groupList.size());
data.groupIdList.push_back(atom->residue_number);
data.groupList.resize(data.groupList.size() + 1);
residue = &data.groupList.back();
residue->groupName = atom->residue_name;
}
residue->formalChargeList.push_back(atom->formal_charge);
residue->atomNameList.push_back(atom->atom_name);
residue->elementList.push_back(atom->element);
prevatom = atom;
}
data.numAtoms = data.xCoordList.size();
data.numGroups = data.groupIdList.size();
data.numChains = data.chainIdList.size();
data.numModels = data.chainsPerModel.size();
// construct the BondAdder after adding atoms was completed
mmtf::BondAdder bondadder(data);
// add bonds
for (const Bond* bond = bondarray; bond != ARRAYEND(bondarray); ++bond) {
bondadder(bond->atom1, bond->atom2, bond->order);
}
std::cout << "INFO numBonds (total): " << data.numBonds << std::endl;
std::cout << "INFO numBonds (inter-residue): " << (data.bondAtomList.size() / 2) << std::endl;
std::cout << "INFO groupList.size() before compression: " << data.groupList.size() << std::endl;
mmtf::compressGroupList(data);
std::cout << "INFO groupList.size() after compression: " << data.groupList.size() << std::endl;
// write to file if filename provided
if (argc != 2) {
printf("USAGE: export <out mmtf file>\n");
} else {
mmtf::encodeToFile(data, argv[1]);
}
return 0;
}
// vi:sw=2:expandtab
// *************************************************************************
//
// Licensed under the MIT License (see accompanying LICENSE file).
//
// The authors of this code are: Gerardo Tauriello
//
// The code is heavily based on traverse.c written by Julien Ferte from the
// mmtf_c project
//
// *************************************************************************
#include <mmtf.hpp>
// C-style libraries used here to keep it close to traverse.c
#include <stdio.h>
#include <string.h>
#include <assert.h>
// *************************************************************************
// JSON-style
// *************************************************************************
// printf for strings
template<typename T>
void printval(const char* pff, const T& val) {
printf(pff, val);
}
template<>
void printval(const char* pff, const std::string& val) {
printf(pff, val.c_str());
}
// print vector only
template<typename T>
void printvec(const char* pff, const T* v, const size_t N) {
printf("[");
for (size_t i = 0; i < N; ++i) {
if (i > 0) printf(", ");
printval(pff, v[i]);
}
printf("]");
}
// writing with no check if it's set or not
template<typename T>
void printreq(const char* label, const char* pff,
const T* v, const size_t N, bool last = false) {
printf("%s: ", label);
printvec(pff, v, N);
printf("%s\n", (last) ? "" : ",");
}
template<typename T>
void printreq(const char* label, const char* pff,
const std::vector<T>& v, bool last = false) {
if (v.empty()) {
printf("%s: []%s\n", label, (last) ? "" : ",");
} else {
printreq(label, pff, &v[0], v.size(), last);
}
}
void printreq(const char* label, const std::string& s, bool last = false) {
printf("%s: \"%s\"%s\n", label, s.c_str(), (last) ? "" : ",");
}
void printreq(const char* label, const float& f, bool last = false) {
printf("%s: %g%s\n", label, f, (last) ? "" : ",");
}
// write with check if set
template<typename T>
void printopt(const char* label, const T& val, bool last = false) {
if (!mmtf::isDefaultValue(val)) {
printreq(label, val, last);
} else {
printf("%s: null%s\n", label, (last) ? "" : ",");
}
}
template<typename T>
void printopt(const char* label, const char* pff,
const std::vector<T>& vec, bool last = false) {
if (!mmtf::isDefaultValue(vec)) {
printreq(label, pff, vec, last);
} else {
printf("%s: null%s\n", label, (last) ? "" : ",");
}
}
// JSON style printing of parts (not very elegant hack here)
template<typename T>
void json_print(const T& t);
template<typename T>
void json_print_list(const char* label, const T* v, const size_t N,
int indent, bool last = false) {
printf("%s: [", label);
for (size_t i = 0; i < N; ++i) {
if (i > 0) printf(",\n");
else printf("\n");
json_print(v[i]);
}
printf("\n%*s%s\n", indent+1, "]", (last) ? "" : ",");
}
template<typename T>
void json_print_list(const char* label, const std::vector<T>& v,
int indent, bool last = false) {
json_print_list(label, &v[0], v.size(), indent, last);
}
// for optional vectors
template<typename T>
void json_print_opt(const char* label, const std::vector<T>& v,
int indent, bool last = false) {
if (!mmtf::isDefaultValue(v)) {
json_print_list(label, &v[0], v.size(), indent, last);
} else {
printf("%s: null%s\n", label, (last) ? "" : ",");
}
}
// just aimed for ncsOperatorList
template<>
void json_print(const std::vector<float>& list) {
printf(" ");
printvec("%g", &list[0], list.size());
}
template<>
void json_print(const mmtf::Transform& transform) {
printf(" {\n");
printreq(" \"chainIndexList\"", "%d", transform.chainIndexList);
printreq(" \"matrix\"", "%g", transform.matrix, 16, true);
printf(" }");
}
template<>
void json_print(const mmtf::BioAssembly& ba) {
printf(" {\n");
printreq(" \"name\"", ba.name);
json_print_list(" \"transformList\"", ba.transformList, 3, true);
printf(" }");
}
template<>
void json_print(const mmtf::Entity& ent) {
printf(" {\n");
printreq(" \"chainIndexList\"", "%d", ent.chainIndexList);
printreq(" \"description\"", ent.description);
printreq(" \"type\"", ent.type);
printreq(" \"sequence\"", ent.sequence, true);
printf(" }");
}
template<>
void json_print(const mmtf::GroupType& group) {
printf(" {\n");
printreq(" \"formalChargeList\"", "%d", group.formalChargeList);
printreq(" \"atomNameList\"", "\"%s\"", group.atomNameList);
printopt(" \"bondAtomList\"", "%d", group.bondAtomList);
printopt(" \"bondOrderList\"", "%d", group.bondOrderList);
printopt(" \"bondResonanceList\"", "%d", group.bondResonanceList);
printreq(" \"groupName\"", group.groupName);
printf(" \"singleLetterCode\": \"%c\",\n", group.singleLetterCode);
printreq(" \"chemCompType\"", group.chemCompType, true);
printf(" }");
}
/**
* @brief Raw output of the whole thing json style.
*/
template<>
void json_print(const mmtf::StructureData& example) {
printf("{\n");
// generic info
printreq(" \"mmtfVersion\"", example.mmtfVersion);
printreq(" \"mmtfProducer\"", example.mmtfProducer);
printopt(" \"unitCell\"", "%g", example.unitCell);
printopt(" \"spaceGroup\"", example.spaceGroup);
printopt(" \"structureId\"", example.structureId);
printopt(" \"title\"", example.title);
printopt(" \"depositionDate\"", example.depositionDate);
printopt(" \"releaseDate\"", example.releaseDate);
// json_print_opt(" \"ncsOperatorList\"", example.ncsOperatorList, 1);
json_print_opt(" \"bioAssemblyList\"", example.bioAssemblyList, 1);
json_print_opt(" \"entityList\"", example.entityList, 1);
printopt(" \"experimentalMethods\"", "\"%s\"", example.experimentalMethods);
printopt(" \"resolution\"", example.resolution);
printopt(" \"rFree\"", example.rFree);
printopt(" \"rWork\"", example.rWork);
printf(" \"numBonds\": %d,\n", example.numBonds);
printf(" \"numAtoms\": %d,\n", example.numAtoms);
printf(" \"numGroups\": %d,\n", example.numGroups);
printf(" \"numChains\": %d,\n", example.numChains);
printf(" \"numModels\": %d,\n", example.numModels);
json_print_list(" \"groupList\"", example.groupList, 1);
printopt(" \"bondAtomList\"", "%d", example.bondAtomList);
printopt(" \"bondOrderList\"", "%d", example.bondOrderList);
printopt(" \"bondResonanceList\"", "%d", example.bondResonanceList);
printreq(" \"xCoordList\"", "%g", example.xCoordList);
printreq(" \"yCoordList\"", "%g", example.yCoordList);
printreq(" \"zCoordList\"", "%g", example.zCoordList);
printopt(" \"bFactorList\"", "%g", example.bFactorList);
printopt(" \"atomIdList\"", "%d", example.atomIdList);
printopt(" \"altLocList\"", "%d", example.altLocList);
printopt(" \"occupancyList\"", "%g", example.occupancyList);
printreq(" \"groupIdList\"", "%d", example.groupIdList);
printreq(" \"groupTypeList\"", "%d", example.groupTypeList);
printopt(" \"secStructList\"", "%d", example.secStructList);
printopt(" \"insCodeList\"", "%d", example.insCodeList);
printopt(" \"sequenceIndexList\"", "%d", example.sequenceIndexList);
printreq(" \"chainIdList\"", "\"%s\"", example.chainIdList);
printopt(" \"chainNameList\"", "\"%s\"", example.chainNameList);
printreq(" \"groupsPerChain\"", "%d", example.groupsPerChain);
printreq(" \"chainsPerModel\"", "%d", example.chainsPerModel, true);
printf("}\n");
}
// *************************************************************************
// Verbose style
// *************************************************************************
/**
* @brief If any value from
* \link mmtf::StructureData::insCodeList insCodeList \endlink or
* \link mmtf::StructureData::altLocList altLocList \endlink is empty,
* it would would replace the character to a dot
* @param c character which needs to be checked
* @return The c character if it is not empty otherwise a dot
*/
char safechar(char c) {
return (c < ' ') ? '.' : c;
}
// helper for optional entries (printval(pff, vec[i]) only if vec given)
template<typename T>
void printvalo(const char* pff, const std::vector<T>& vec, size_t i) {
if (!mmtf::isDefaultValue(vec)) printval(pff, vec[i]);
}
// helper for char entries (do safeprint of vec[i])
template<>
void printvalo(const char* pff, const std::vector<char>& vec, size_t i) {
if (!mmtf::isDefaultValue(vec)) printval(pff, safechar(vec[i]));
}
/**
* @brief This is the main traversal function to read out all the contents of
* mmtf::StructureData
* @param example Any existing mmtf::StructureData which you want to read
* @return void
*/
void traverse_main(const mmtf::StructureData& example) {
// generic info
printreq("mmtfVersion", example.mmtfVersion, true);
printreq("mmtfProducer", example.mmtfProducer, true);
printopt("unitCell", "%g", example.unitCell, true);
printopt("spaceGroup", example.spaceGroup, true);
printopt("structureId", example.structureId, true);
printopt("title", example.title, true);
printopt("depositionDate", example.depositionDate, true);
printopt("releaseDate", example.releaseDate, true);
for (size_t i = 0; i < example.ncsOperatorList.size(); ++i) {
printf("ncsOperatorList[%d]", int(i));
printreq("", "%g", example.ncsOperatorList[i], true);
}
for (size_t i = 0; i < example.bioAssemblyList.size(); ++i) {
printf("bioAssemblyIndex: %d\n", int(i));
const mmtf::BioAssembly& ba = example.bioAssemblyList[i];
printreq(" name", ba.name, true);
for (size_t j = 0; j < ba.transformList.size(); ++j) {
printf(" bioAssemblyTransformIndex: %d\n", int(j));
const mmtf::Transform & transform = ba.transformList[j];
printreq(" chainIndexList", "%d", transform.chainIndexList, true);
printreq(" matrix", "%g", transform.matrix, 16, true);
}
}
for (size_t i = 0; i < example.entityList.size(); ++i) {
printf("entityIndex: %d\n", int(i));
const mmtf::Entity& ent = example.entityList[i];
printreq(" chainIndexList", "%d", ent.chainIndexList, true);
printreq(" description", ent.description, true);
printreq(" type", ent.type, true);
printreq(" sequence", ent.sequence, true);
}
for (size_t i = 0; i < example.experimentalMethods.size(); ++i) {
printf("experimentalMethod %d: %s\n", int(i),
example.experimentalMethods[i].c_str());
}
printopt("resolution", example.resolution, true);
printopt("rFree", example.rFree, true);
printopt("rWork", example.rWork, true);
printf("numBonds: %d\n", example.numBonds);
printf("numAtoms: %d\n", example.numAtoms);
printf("numGroups: %d\n", example.numGroups);
printf("numChains: %d\n", example.numChains);
printf("numModels: %d\n", example.numModels);
// # initialize index counters
int modelIndex = 0;
int chainIndex = 0;
int groupIndex = 0;
int atomIndex = 0;
//# traverse models
int i;
for (i = 0; i < example.numModels; i++) {
int modelChainCount = example.chainsPerModel[i];
printf("modelIndex: %d\n", modelIndex);
// # traverse chains
int j;
for (j = 0; j < modelChainCount; j++) {
printf(" chainIndex : %d\n", chainIndex);
printval(" Chain id: %s\n", example.chainIdList[chainIndex]);
printvalo(" Chain name: %s\n", example.chainNameList, chainIndex);
int chainGroupCount = example.groupsPerChain[chainIndex];
// # traverse groups
int k;
for (k = 0; k < chainGroupCount; k++) {
printf(" groupIndex: %d\n", groupIndex);
printf(" groupId: %d\n", example.groupIdList[groupIndex]);
printvalo(" insCodeList: %c\n", example.insCodeList,
groupIndex);
printvalo(" secStruc: %d\n", example.secStructList,
groupIndex);
printvalo(" seqIndex: %i\n", example.sequenceIndexList,
groupIndex);
printf(" groupType: %d\n", example.groupTypeList[groupIndex]);
const mmtf::GroupType& group =
example.groupList[example.groupTypeList[groupIndex]];
printval(" Group name: %s\n", group.groupName);
printf(" Single letter code: %c\n", group.singleLetterCode);
printval(" Chem comp type: %s\n", group.chemCompType);
int atomOffset = atomIndex;
int l;
for (l = 0; l < group.bondAtomList.size(); l += 2) {
printf(" Atom id One: %d\n",
(atomOffset + group.bondAtomList[l]));
printf(" Atom id Two: %d\n",
(atomOffset + group.bondAtomList[l + 1]));
printvalo(" Bond order: %d\n",
group.bondOrderList, l / 2);
printvalo(" Bond resonance: %d\n",
group.bondResonanceList, l / 2);
}
int groupAtomCount = group.atomNameList.size();
for (l = 0; l < groupAtomCount; l++) {
printf(" atomIndex: %d\n", atomIndex);
printf(" x coord: %.3f\n",
example.xCoordList[atomIndex]);
printf(" y coord: %.3f\n",
example.yCoordList[atomIndex]);
printf(" z coord: %.3f\n",
example.zCoordList[atomIndex]);
printvalo(" b factor: %.2f\n", example.bFactorList,
atomIndex);
printvalo(" atom id: %d\n", example.atomIdList,
atomIndex);
printvalo(" altLocList: %c\n", example.altLocList,
atomIndex);
printvalo(" occupancy: %.2f\n", example.occupancyList,
atomIndex);
printf(" charge: %d\n", group.formalChargeList[l]);
printval(" atom name: %s\n", group.atomNameList[l]);
printval(" element: %s\n", group.elementList[l]);
atomIndex++;
}
groupIndex++;
}
chainIndex++;
}
modelIndex++;
}
printf("Number of inter group bonds: %d\n",
(int) example.bondAtomList.size() / 2);
for (i = 0; i < example.bondAtomList.size(); i += 2) {
printf(" Atom One: %d\n", example.bondAtomList[i]);
printf(" Atom Two: %d\n", example.bondAtomList[i + 1]);
printvalo(" Bond order: %d\n", example.bondOrderList, i / 2);
printvalo(" Bond resonance: %d\n", example.bondResonanceList, i / 2);
}
}
// *************************************************************************
// PDB style
// *************************************************************************
/**
* @brief Read out the contents of mmtf::StructureData in a PDB-like fashion
* Columns are in order:
* ATOM/HETATM AtomId Element AtomName AltLoc GroupId GroupType
* InsCode ChainName x y z B-factor Occupancy Charge
* @param example Any existing mmtf::StructureData which you want to read
*/
void traverse_pdb_like(const mmtf::StructureData& example) {
int modelIndex = 0;
int chainIndex = 0;
int groupIndex = 0;
int atomIndex = 0;
//# traverse models
for (int i = 0; i < example.numModels; i++, modelIndex++) {
// # traverse chains
for (int j = 0; j < example.chainsPerModel[modelIndex]; j++, chainIndex++) {
// # traverse groups
for (int k = 0; k < example.groupsPerChain[chainIndex]; k++, groupIndex++) {
const mmtf::GroupType& group =
example.groupList[example.groupTypeList[groupIndex]];
int groupAtomCount = group.atomNameList.size();
for (int l = 0; l < groupAtomCount; l++, atomIndex++) {
// ATOM or HETATM
if (mmtf::is_hetatm(chainIndex, example.entityList, group))
printf("HETATM ");
else
printf("ATOM ");
// Atom serial
printvalo("%d ", example.atomIdList, atomIndex);
// Atom name
printval("%s ", group.atomNameList[l]);
// Alternate location
printvalo("%c ", example.altLocList, atomIndex);
// Group name
printval("%s ", group.groupName);
// Chain
printvalo("%s ", example.chainNameList, chainIndex);
// Group serial
printf("%d ", example.groupIdList[groupIndex]);
// Insertion code
printvalo("%c ", example.insCodeList, groupIndex);
// x, y, z
printf("%.3f ", example.xCoordList[atomIndex]);
printf("%.3f ", example.yCoordList[atomIndex]);
printf("%.3f ", example.zCoordList[atomIndex]);
// B-factor
printvalo("%.2f ", example.bFactorList, atomIndex);
// Occupancy
printvalo("%.2f ", example.occupancyList, atomIndex);
// Element
printval("%s ", group.elementList[l]);
// Charge
printf("%d \n", group.formalChargeList[l]);
}
}
}
}
}
int main(int argc, char** argv) {
// check arguments
if (argc < 2) {
printf("USAGE: traverse <mmtffile> [<style>]\n");
printf("-> if style ommited, we output a PDB-like output\n");
printf("-> if style == 'json', we output a json-dump of all data\n");
printf("-> if style == 'print', we output a built-in tab delimited format\n");
printf("-> else, we output all the data in a verbose, readable form\n");
return 1;
}
// decode MMTF file
mmtf::StructureData example;
mmtf::decodeFromFile(example, argv[1]);
// traverse it according to user specified style
if (argc > 2) {
if (strcmp(argv[2], "json") == 0) {
json_print(example);
} else if (strcmp(argv[2], "print") == 0) {
std::cout << example.print() << std::endl;
} else {
traverse_main(example);
}
} else {
traverse_pdb_like(example);
}
return 0;
}
// *************************************************************************
//
// Licensed under the MIT License (see accompanying LICENSE file).
//
// The authors of this code are: Gerardo Tauriello
//
// Based on mmtf_c developed by Julien Ferte (http://www.julienferte.com/),
// Anthony Bradley, Thomas Holder with contributions from Yana Valasatava,
// Gazal Kalyan, Alexander Rose.
//
// *************************************************************************
// single include for all MMTF needs
#include "mmtf/decoder.hpp"
#include "mmtf/encoder.hpp"
// *************************************************************************
//
// Licensed under the MIT License (see accompanying LICENSE file).
//
// The authors of this code are: Gerardo Tauriello, and Daniel Farrell.
//
// Based on mmtf_c developed by Julien Ferte (http://www.julienferte.com/),
// Anthony Bradley, Thomas Holder with contributions from Yana Valasatava,
// Gazal Kalyan, Alexander Rose
//
// *************************************************************************
#ifndef MMTF_BINARY_DECODER_H
#define MMTF_BINARY_DECODER_H
#include "structure_data.hpp"
#include "errors.hpp"
#include <msgpack.hpp>
#include <cstring> // low level mem
#include <sstream>
#include <limits>
#include <algorithm>
namespace mmtf {
/**
* @brief Helper class to decode msgpack binary into a vector.
*/
class BinaryDecoder {
public:
/**
* @brief Initialize object given a msgpack object.
* Reads out binary header to prepare for call of decode.
* @param[in] obj Object to decode.
* @param[in] key Key used to report errors.
* @warning This uses a pointer to the data of obj. obj must stay alive
* while this instance exists or it will result in undefined behavior.
* @throw mmtf::DecodeError if obj is not a binary or is too short.
*/
BinaryDecoder(const msgpack::object& obj,
const std::string& key = "UNNAMED_BINARY");
/**
* @brief Initialize object given a msgpack binary string.
* Reads out binary header to prepare for call of decode.
* @param[in] str Object to decode.
* @param[in] key Key used to report errors.
* @warning This uses a pointer to the data of str. str must stay alive
* while this instance exists or it will result in undefined behavior.
* @throw mmtf::DecodeError if obj is not a binary or is too short.
*/
BinaryDecoder(const std::string& str,
const std::string& key = "UNNAMED_BINARY");
/**
* @brief Decode binary msgpack object into the given target.
*
* @param[out] target Store decoded vector into this field.
*
* @tparam T Can be one of:
* - std::vector<float> (strategies: 1, 9, 10, 11, 12, 13)
* - std::vector<int8_t> (strategies: 2, 16)
* - std::vector<int16_t> (strategies: 3)
* - std::vector<int32_t> (strategies: 4, 7, 8, 14, 15)
* - std::vector<std::string> (strategies: 5)
* - std::vector<char> (strategies: 6)
*
* @throw mmtf::DecodeError if we fail to decode.
*/
template<typename T>
void decode(T& target) const;
private:
// for error reporting
std::string key_;
// data from binary header
int32_t strategy_;
int32_t length_;
int32_t parameter_;
const char* encodedData_;
uint32_t encodedDataLength_; // max. size for binary is 2^32 - 1
// helper function for constructors
void
initFromData(const char * str_data,
const std::size_t len);
// check length consistency (throws)
void checkLength_(int32_t exp_length) const;
// check if binary data is divisible by x (throws)
void checkDivisibleBy_(int32_t item_size) const;
// byte decoders
void decodeFromBytes_(std::vector<float>& output) const;
void decodeFromBytes_(std::vector<int8_t>& output) const;
void decodeFromBytes_(std::vector<int16_t>& output) const;
void decodeFromBytes_(std::vector<int32_t>& output) const;
// special one: decode to vector of strings
void decodeFromBytes_(std::vector<std::string>& output) const;
// run length decoding
// -> Int and IntOut can be any integer types
// -> Int values are blindly converted to IntOut
template<typename Int, typename IntOut>
void runLengthDecode_(const std::vector<Int>& input,
std::vector<IntOut>& output) const;
// delta decoding -> Int can be any integer type
template<typename Int>
void deltaDecode_(const std::vector<Int>& input, std::vector<Int>& output) const;
// variant doing it in-place
template<typename Int>
void deltaDecode_(std::vector<Int>& in_out) const;
// recursive indexing decode -> SmallInt must be smaller than Int
template<typename SmallInt, typename Int>
void recursiveIndexDecode_(const std::vector<SmallInt>& input,
std::vector<Int>& output) const;
// decode integer to float -> Int can be any integer type
template<typename Int>
void decodeDivide_(const std::vector<Int>& input, float const divisor,
std::vector<float>& output) const;
};
// *************************************************************************
// IMPLEMENTATION
// *************************************************************************
// helpers in anonymous namespace (only visible in this file)
namespace {
// byteorder functions ("ntohl" etc.)
#ifdef WIN32
#include <winsock2.h>
#else
#include <arpa/inet.h>
#endif
#ifndef __EMSCRIPTEN__
void assignBigendian4(void* dst, const char* src) {
uint32_t tmp;
std::memcpy(&tmp, src, sizeof(uint32_t));
tmp = ntohl(tmp);
std::memcpy(dst, &tmp, sizeof(uint32_t));
}
void assignBigendian2(void* dst, const char* src) {
uint16_t tmp;
std::memcpy(&tmp, src, sizeof(uint16_t));
tmp = ntohs(tmp);
std::memcpy(dst, &tmp, sizeof(uint16_t));
}
#else
// Need to avoid how emscripten handles memory
// Note that this will only work on little endian machines, but this should not be a major
// an issue as Emscripten only supports little endian hardware.
// see: https://kripken.github.io/emscripten-site/docs/porting/guidelines/portability_guidelines.html
void assignBigendian4(void* dst, const char* src) {
((uint8_t*)dst)[0] = src[3];
((uint8_t*)dst)[1] = src[2];
((uint8_t*)dst)[2] = src[1];
((uint8_t*)dst)[3] = src[0];
}
void assignBigendian2(void* dst, const char* src) {
((uint8_t*)dst)[0] = src[1];
((uint8_t*)dst)[1] = src[0];
}
#endif
void arrayCopyBigendian4(void* dst, const char* src, size_t n) {
for (size_t i = 0; i < n; i += 4) {
assignBigendian4(((char*)dst) + i, src + i);
}
}
void arrayCopyBigendian2(void* dst, const char* src, size_t n) {
for (size_t i = 0; i < n; i += 2) {
assignBigendian2(((char*)dst) + i, src + i);
}
}
} // anon ns
// note this does not set key_, you must set it in ctor
inline void BinaryDecoder::initFromData(const char * bytes, std::size_t const len) {
assignBigendian4(&strategy_, bytes);
assignBigendian4(&length_, bytes + 4);
assignBigendian4(&parameter_, bytes + 8);
encodedData_ = bytes + 12;
encodedDataLength_ = len - 12;
}
inline BinaryDecoder::BinaryDecoder(const msgpack::object& obj,
const std::string& key)
: key_(key) {
// sanity checks
if (obj.type != msgpack::type::BIN) {
throw DecodeError("The '" + key + "' entry is not binary data");
}
if (obj.via.bin.size < 12) {
std::stringstream err;
err << "The '" + key + "' entry is too short " << obj.via.bin.size;
throw DecodeError(err.str());
}
this->initFromData(obj.via.bin.ptr, obj.via.bin.size);
}
inline BinaryDecoder::BinaryDecoder(const std::string& str,
const std::string& key)
: key_(key) {
this->initFromData(str.data(), str.size());
}
template<typename T>
void BinaryDecoder::decode(T&) const {
throw mmtf::DecodeError("Invalid target type for binary '" + key_ + "'");
}
template<>
inline void BinaryDecoder::decode(std::vector<float>& output) const {
// check strategy to parse
switch (strategy_) {
case 1: {
decodeFromBytes_(output);
break;
}
case 9: {
std::vector<int32_t> step1;
std::vector<int32_t> step2;
decodeFromBytes_(step1);
runLengthDecode_(step1, step2);
decodeDivide_(step2, static_cast<float>(parameter_), output);
break;
}
case 10: {
std::vector<int16_t> step1;
std::vector<int32_t> step2;
decodeFromBytes_(step1);
recursiveIndexDecode_(step1, step2);
deltaDecode_(step2);
decodeDivide_(step2, static_cast<float>(parameter_), output);
break;
}
case 11: {
std::vector<int16_t> step1;
decodeFromBytes_(step1);
decodeDivide_(step1, static_cast<float>(parameter_), output);
break;
}
case 12: {
std::vector<int16_t> step1;
std::vector<int32_t> step2;
decodeFromBytes_(step1);
recursiveIndexDecode_(step1, step2);
decodeDivide_(step2, static_cast<float>(parameter_), output);
break;
}
case 13: {
std::vector<int8_t> step1;
std::vector<int32_t> step2;
decodeFromBytes_(step1);
recursiveIndexDecode_(step1, step2);
decodeDivide_(step2, static_cast<float>(parameter_), output);
break;
}
default: {
std::stringstream err;
err << "Invalid strategy " << strategy_ << " for binary '" + key_
<< "': does not decode to float array";
throw DecodeError(err.str());
}
}
// check size
checkLength_(output.size());
}
template<>
inline void BinaryDecoder::decode(std::vector<int8_t>& output) const {
// check strategy to parse
switch (strategy_) {
case 2: {
decodeFromBytes_(output);
break;
}
case 16: {
std::vector<int32_t> step1;
decodeFromBytes_(step1);
runLengthDecode_(step1, output);
break;
}
default: {
std::stringstream err;
err << "Invalid strategy " << strategy_ << " for binary '" + key_
<< "': does not decode to int8 array";
throw DecodeError(err.str());
}
}
// check size
checkLength_(output.size());
}
template<>
inline void BinaryDecoder::decode(std::vector<int16_t>& output) const {
// check strategy to parse
switch (strategy_) {
case 3: {
decodeFromBytes_(output);
break;
}
default: {
std::stringstream err;
err << "Invalid strategy " << strategy_ << " for binary '" + key_
<< "': does not decode to int16 array";
throw DecodeError(err.str());
}
}
// check size
checkLength_(output.size());
}
template<>
inline void BinaryDecoder::decode(std::vector<int32_t>& output) const {
// check strategy to parse
switch (strategy_) {
case 4: {
decodeFromBytes_(output);
break;
}
case 7: {
std::vector<int32_t> step1;
decodeFromBytes_(step1);
runLengthDecode_(step1, output);
break;
}
case 8: {
std::vector<int32_t> step1;
decodeFromBytes_(step1);
runLengthDecode_(step1, output);
deltaDecode_(output);
break;
}
case 14: {
std::vector<int16_t> step1;
decodeFromBytes_(step1);
recursiveIndexDecode_(step1, output);
break;
}
case 15: {
std::vector<int8_t> step1;
decodeFromBytes_(step1);
recursiveIndexDecode_(step1, output);
break;
}
default: {
std::stringstream err;
err << "Invalid strategy " << strategy_ << " for binary '" + key_
<< "': does not decode to int32 array";
throw DecodeError(err.str());
}
}
// check size
checkLength_(output.size());
}
template<>
inline void BinaryDecoder::decode(std::vector<std::string>& output) const {
// check strategy to parse
switch (strategy_) {
case 5: {
decodeFromBytes_(output);
break;
}
default: {
std::stringstream err;
err << "Invalid strategy " << strategy_ << " for binary '" + key_
<< "': does not decode to string array";
throw DecodeError(err.str());
}
}
// check size
checkLength_(output.size());
}
template<>
inline void BinaryDecoder::decode(std::vector<char>& output) const {
// check strategy to parse
switch (strategy_) {
case 6: {
std::vector<int32_t> step1;
decodeFromBytes_(step1);
runLengthDecode_(step1, output);
break;
}
default: {
std::stringstream err;
err << "Invalid strategy " << strategy_ << " for binary '" + key_
<< "': does not decode to string array";
throw DecodeError(err.str());
}
}
// check size
checkLength_(output.size());
}
// checks
inline void BinaryDecoder::checkLength_(int32_t exp_length) const {
if (length_ != exp_length) {
std::stringstream err;
err << "Length mismatch for binary '" + key_ + "': "
<< length_ << " vs " << exp_length;
throw DecodeError(err.str());
}
}
inline void BinaryDecoder::checkDivisibleBy_(int32_t item_size) const {
if (encodedDataLength_ % item_size != 0) {
std::stringstream err;
err << "Binary length of '" + key_ + "': "
<< encodedDataLength_ << " is not a multiple of " << item_size;
throw DecodeError(err.str());
}
}
// byte decoders
inline void BinaryDecoder::decodeFromBytes_(std::vector<float>& output) const {
checkDivisibleBy_(4);
// prepare memory
output.resize(encodedDataLength_ / 4);
// get data
if(!output.empty()) {
arrayCopyBigendian4(&output[0], encodedData_, encodedDataLength_);
}
}
inline void BinaryDecoder::decodeFromBytes_(std::vector<int8_t>& output) const {
// prepare memory
output.resize(encodedDataLength_);
// get data
if (!output.empty()) {
memcpy(&output[0], encodedData_, encodedDataLength_);
}
}
inline void BinaryDecoder::decodeFromBytes_(std::vector<int16_t>& output) const {
checkDivisibleBy_(2);
// prepare memory
output.resize(encodedDataLength_ / 2);
// get data
if (!output.empty()) {
arrayCopyBigendian2(&output[0], encodedData_, encodedDataLength_);
}
}
inline void BinaryDecoder::decodeFromBytes_(std::vector<int32_t>& output) const {
checkDivisibleBy_(4);
// prepare memory
output.resize(encodedDataLength_ / 4);
// get data
if (!output.empty()) {
arrayCopyBigendian4(&output[0], encodedData_, encodedDataLength_);
}
}
// special one: decode to vector of strings
inline void BinaryDecoder::decodeFromBytes_(std::vector<std::string>& output) const {
char NULL_BYTE = 0x00;
// check parameter
const int32_t str_len = parameter_;
checkDivisibleBy_(str_len);
// prepare memory
output.resize(encodedDataLength_ / str_len);
// get data
for (size_t i = 0; i < output.size(); ++i) {
output[i].assign(encodedData_ + i * str_len, str_len);
output[i].erase(std::remove(output[i].begin(), output[i].end(), NULL_BYTE), output[i].end());
}
}
// run length decoding
template<typename Int, typename IntOut>
void BinaryDecoder::runLengthDecode_(const std::vector<Int>& input,
std::vector<IntOut>& output) const {
// we work with pairs of numbers
checkDivisibleBy_(2);
// find out size of resulting vector (for speed)
size_t out_size = 0;
for (size_t i = 0; i < input.size(); i += 2) {
out_size += input[i + 1];
}
// reserve space (for speed)
output.clear();
output.reserve(out_size);
// get data
for (size_t i = 0; i < input.size(); i += 2) {
const IntOut value = IntOut(input[i]);
const Int number = input[i+1];
for (Int j = 0; j < number; ++j) {
output.push_back(value);
}
}
}
// delta decoding
template<typename Int>
void BinaryDecoder::deltaDecode_(const std::vector<Int>& input,
std::vector<Int>& output) const {
// reserve space (for speed)
output.clear();
if (input.empty()) return; // ensure we have some values
output.reserve(input.size());
// get data
output.push_back(input[0]);
for (size_t i = 1; i < input.size(); ++i) {
output.push_back(output[i - 1] + input[i]);
}
}
template<typename Int>
void BinaryDecoder::deltaDecode_(std::vector<Int>& in_out) const {
for (size_t i = 1; i < in_out.size(); ++i) {
in_out[i] = in_out[i - 1] + in_out[i];
}
}
// recursive indexing decode
template<typename SmallInt, typename Int>
void BinaryDecoder::recursiveIndexDecode_(const std::vector<SmallInt>& input,
std::vector<Int>& output) const {
// get limits
const SmallInt min_int = std::numeric_limits<SmallInt>::min();
const SmallInt max_int = std::numeric_limits<SmallInt>::max();
// find out size of resulting vector (for speed)
size_t out_size = 0;
for (size_t i = 0; i < input.size(); ++i) {
if (input[i] != min_int && input[i] != max_int) ++out_size;
}
// reserve space (for speed)
output.clear();
output.reserve(out_size);
// get data
Int cur_val = 0;
for (size_t i = 0; i < input.size(); ++i) {
cur_val += input[i];
if (input[i] != min_int && input[i] != max_int) {
output.push_back(cur_val);
cur_val = 0;
}
}
}
// decode integer to float
template<typename Int>
void BinaryDecoder::decodeDivide_(const std::vector<Int>& input, float const divisor,
std::vector<float>& output) const {
// reserve space and get inverted divisor (for speed)
output.clear();
output.reserve(input.size());
float inv_div = float(1) / divisor;
// get data
for (size_t i = 0; i < input.size(); ++i) {
output.push_back(float(input[i]) * inv_div);
}
}
} // mmtf namespace
#endif
// *************************************************************************
//
// Licensed under the MIT License (see accompanying LICENSE file).
//
// The author of this code is: Daniel Farrell
//
// Based on mmtf_python, adapted to c++ standards 2018
//
// *************************************************************************
#ifndef MMTF_BINARY_ENCODER_H
#define MMTF_BINARY_ENCODER_H
#include <math.h>
#include <vector>
#include <string>
#include <sstream>
// byteorder functions
#ifdef WIN32
#include <winsock2.h>
#else
#include <arpa/inet.h>
#endif
namespace mmtf {
// *************************************************************************
// PRIVATE FUNCTIONS (only visible in this header)
// *************************************************************************
namespace { // private helpers
/**
* @brief Convert floats to ints via multiplier.
* @param[in] vec_in vector of floats
* @param[in] multiplier multiply float vector by this
* @return vec_out vec_in * multiplier and rounded
*/
inline std::vector<int32_t> convertFloatsToInts(std::vector<float> const & vec_in,
int multiplier);
/**
* @brief mmtf delta encode a vector of ints.
* @param[in] vec_in vector of ints
* @return vec_out delta encoded int vector
*/
inline std::vector<int32_t> deltaEncode(std::vector<int32_t> const & vec_in);
/**
* @brief mmtf run length encode a vector of ints.
* @param[in] vec_in vector of ints convertable to int32_t
* @return vec_out run length encoded int vector
*/
template<typename Int>
inline std::vector<int32_t> runLengthEncode(std::vector<Int> const & vec_in );
/**
* @brief mmtf recursive index encode a vector of ints.
* @param[in] vec_in vector of ints
* @param[in] max maximum value of signed 16bit int
* @param[in] min minimum value of signed 16bit int
* @return vec_out recursive index encoded vector
*/
inline std::vector<int32_t> recursiveIndexEncode(std::vector<int32_t> const & vec_in,
int max=32767, int min=-32768);
/**
* @brief Add mmtf header to a stream
* @param[in] ss stringstream to add a header to
* @param[in] array_size size of array you're adding
* @param[in] codec the codec type number you're using to encode
* @param[in] param the param for the codec you're using (default 0)
*/
inline void add_header(std::stringstream & ss, uint32_t array_size, uint32_t codec, uint32_t param=0);
/**
* @brief Convert stringstream to CharVector
* @param[in] ss ss to convert
* @return converted ss
*/
inline std::vector<char> stringstreamToCharVector(std::stringstream & ss);
} // anon ns
// *************************************************************************
// PUBLIC FUNCTIONS
// *************************************************************************
/** Encode 8 bit int to bytes encoding (type 2)
* @param[in] vec_in Vector of ints to encode
* @return Char vector of encoded bytes
*/
inline std::vector<char> encodeInt8ToByte(std::vector<int8_t> vec_in);
/** Encode 4 bytes to int encoding (type 4)
* @param[in] vec_in Vector of ints to encode
* @return Char vector of encoded bytes
*/
inline std::vector<char> encodeFourByteInt(std::vector<int32_t> const & vec_in);
/** Encode string vector encoding (type 5)
* @param[in] in_sv Vector of strings to encode
* @param[in] CHAIN_LEN Maximum length of string
* @return Char vector of encoded bytes
*/
inline std::vector<char> encodeStringVector(std::vector<std::string> const & in_sv, int32_t const CHAIN_LEN);
/** Encode Run Length Char encoding (type 6)
* @param[in] in_cv Vector of chars to encode
* @return Char vector of encoded bytes
*/
inline std::vector<char> encodeRunLengthChar(std::vector<char> const & in_cv);
/** Encode Run Length Delta Int encoding (type 8)
* @param[in] int_vec Vector of ints to encode
* @return Char vector of encoded bytes
*/
inline std::vector<char> encodeRunLengthDeltaInt(std::vector<int32_t> int_vec);
/** Encode Run Length Float encoding (type 9)
* @param[in] floats_in Vector of floats to encode
* @param[in] multiplier Multiplier to convert float to int
* @return Char vector of encoded bytes
*/
inline std::vector<char> encodeRunLengthFloat(std::vector<float> const & floats_in, int32_t const multiplier);
/** Encode Delta Recursive Float encoding (type 10)
* @param[in] floats_in Vector of floats to encode
* @param[in] multiplier Multiplier to convert float to int
* @return Char vector of encoded bytes
*/
inline std::vector<char> encodeDeltaRecursiveFloat(std::vector<float> const & floats_in, int32_t const multiplier);
/** Encode Run-Length 8bit int encoding (type 16)
* @param[in] int8_vec Vector of ints to encode
* @return Char vector of encoded bytes
*/
inline std::vector<char> encodeRunLengthInt8(std::vector<int8_t> const & int8_vec);
// *************************************************************************
// IMPLEMENTATION
// *************************************************************************
namespace { // private helpers
inline std::vector<int32_t> convertFloatsToInts(std::vector<float> const & vec_in,
int const multiplier) {
std::vector<int32_t> vec_out;
for (size_t i=0; i<vec_in.size(); ++i) {
vec_out.push_back(static_cast<int32_t>(round(vec_in[i]*multiplier)));
}
return vec_out;
}
inline std::vector<int32_t> deltaEncode(std::vector<int32_t> const & vec_in) {
std::vector<int32_t> vec_out;
if (vec_in.size() == 0) return vec_out;
vec_out.push_back(vec_in[0]);
for (int32_t i=1; i< (int)vec_in.size(); ++i) {
vec_out.push_back(vec_in[i]-vec_in[i-1]);
}
return vec_out;
}
template<typename Int>
inline std::vector<int32_t> runLengthEncode(std::vector<Int> const & vec_in ) {
std::vector<int32_t> ret;
if (vec_in.size()==0) return ret;
Int curr = vec_in[0];
ret.push_back((int32_t)curr);
int32_t counter = 1;
for (std::size_t i = 1; i < vec_in.size(); ++i) {
if ( vec_in[i] == curr ) {
++counter;
} else {
ret.push_back(counter);
ret.push_back((int32_t)vec_in[i]);
curr = vec_in[i];
counter = 1;
}
}
ret.push_back(counter);
return ret;
}
inline std::vector<int32_t> recursiveIndexEncode(
std::vector<int32_t> const & vec_in,
int max /* =32767 */, int min /*=-32768 */) {
std::vector<int32_t> vec_out;
for (int32_t i=0; i< (int)vec_in.size(); ++i) {
int32_t x = vec_in[i];
if ( x >= 0 ) {
while (x >= max) {
vec_out.push_back(max);
x -= max;
}
} else {
while (x <= min) {
vec_out.push_back(min);
x += std::abs(min);
}
}
vec_out.push_back(x);
}
return vec_out;
}
inline void add_header(std::stringstream & ss, uint32_t array_size, uint32_t codec, uint32_t param /* =0 */) {
uint32_t be_codec = htonl(codec);
uint32_t be_array_size = htonl(array_size);
uint32_t be_param = htonl(param);
ss.write(reinterpret_cast< char * >(&be_codec), sizeof(be_codec));
ss.write(reinterpret_cast< char * >(&be_array_size), sizeof(be_array_size));
ss.write(reinterpret_cast< char * >(&be_param), sizeof(be_param));
}
inline std::vector<char> stringstreamToCharVector(std::stringstream & ss) {
std::string s = ss.str();
std::vector<char> ret(s.begin(), s.end());
return ret;
}
} // anon ns
inline std::vector<char> encodeInt8ToByte(std::vector<int8_t> vec_in) {
std::stringstream ss;
add_header(ss, vec_in.size(), 2, 0);
for (size_t i=0; i<vec_in.size(); ++i) {
ss.write(reinterpret_cast< char * >(&vec_in[i]), sizeof(vec_in[i]));
}
return stringstreamToCharVector(ss);
}
inline std::vector<char> encodeFourByteInt(std::vector<int32_t> const & vec_in) {
std::stringstream ss;
add_header(ss, vec_in.size(), 4, 0);
for (size_t i=0; i<vec_in.size(); ++i) {
int32_t be_x = htonl(vec_in[i]);
ss.write(reinterpret_cast< char * >(&be_x), sizeof(be_x));
}
return stringstreamToCharVector(ss);
}
inline std::vector<char> encodeStringVector(std::vector<std::string> const & in_sv, int32_t const CHAIN_LEN) {
char NULL_BYTE = 0x00;
std::stringstream ss;
add_header(ss, in_sv.size(), 5, CHAIN_LEN);
std::vector<char> char_vec;
for (size_t i=0; i<in_sv.size(); ++i) {
char_vec.insert(char_vec.end(), in_sv[i].begin(), in_sv[i].end());
for (size_t j=0; j<CHAIN_LEN-in_sv[i].size(); ++j) {
char_vec.push_back(NULL_BYTE);
}
}
for (size_t i=0; i<char_vec.size(); ++i) {
ss.write(reinterpret_cast< char * >(&char_vec[i]), sizeof(char_vec[i]));
}
return stringstreamToCharVector(ss);
}
inline std::vector<char> encodeRunLengthChar(std::vector<char> const & in_cv) {
std::stringstream ss;
add_header(ss, in_cv.size(), 6, 0);
std::vector<int32_t> int_vec = runLengthEncode(in_cv);
for (size_t i=0; i<int_vec.size(); ++i) {
int32_t temp = htonl(int_vec[i]);
ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
}
return stringstreamToCharVector(ss);
}
inline std::vector<char> encodeRunLengthDeltaInt(std::vector<int32_t> int_vec) {
std::stringstream ss;
add_header(ss, int_vec.size(), 8, 0);
int_vec = deltaEncode(int_vec);
int_vec = runLengthEncode(int_vec);
for (size_t i=0; i<int_vec.size(); ++i) {
int32_t temp = htonl(int_vec[i]);
ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
}
return stringstreamToCharVector(ss);
}
inline std::vector<char> encodeRunLengthFloat(std::vector<float> const & floats_in, int32_t const multiplier) {
std::stringstream ss;
add_header(ss, floats_in.size(), 9, multiplier);
std::vector<int32_t> int_vec = convertFloatsToInts(floats_in, multiplier);
int_vec = runLengthEncode(int_vec);
for (size_t i=0; i<int_vec.size(); ++i) {
int32_t temp = htonl(int_vec[i]);
ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
}
return stringstreamToCharVector(ss);
}
inline std::vector<char> encodeDeltaRecursiveFloat(std::vector<float> const & floats_in, int32_t const multiplier) {
std::stringstream ss;
add_header(ss, floats_in.size(), 10, multiplier);
std::vector<int32_t> int_vec = convertFloatsToInts(floats_in, multiplier);
int_vec = deltaEncode(int_vec);
int_vec = recursiveIndexEncode(int_vec);
for (size_t i=0; i<int_vec.size(); ++i) {
int16_t temp = htons(int_vec[i]);
ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
}
return stringstreamToCharVector(ss);
}
inline std::vector<char> encodeRunLengthInt8(std::vector<int8_t> const & int8_vec) {
std::stringstream ss;
add_header(ss, int8_vec.size(), 16, 0);
std::vector<int32_t> const int_vec = runLengthEncode(int8_vec);
for (size_t i=0; i<int_vec.size(); ++i) {
int32_t temp = htonl(int_vec[i]);
ss.write(reinterpret_cast< char * >(&temp), sizeof(temp));
}
return stringstreamToCharVector(ss);
}
} // mmtf namespace
#endif
// *************************************************************************
//
// Licensed under the MIT License (see accompanying LICENSE file).
//
// The authors of this code are: Gabriel Studer, Gerardo Tauriello
//
// Based on mmtf_c developed by Julien Ferte (http://www.julienferte.com/),
// Anthony Bradley, Thomas Holder with contributions from Yana Valasatava,
// Gazal Kalyan, Alexander Rose.
//
// *************************************************************************
#ifndef MMTF_DECODER_H
#define MMTF_DECODER_H
#include "structure_data.hpp"
#include "errors.hpp"
#include "msgpack_decoders.hpp"
#include "map_decoder.hpp"
#include <msgpack.hpp>
#include <fstream>
#include <sstream>
#include <string>
namespace mmtf {
/**
* @brief Decode an MMTF data structure from a mapDecoder.
* @param[out] data MMTF data structure to be filled
* @param[in] mapDecoder MapDecoder holding raw mmtf data
* @throw mmtf::DecodeError if an error occured
*/
inline void decodeFromMapDecoder(StructureData& data, MapDecoder& mapDecoder);
/**
* @brief Decode an MMTF data structure from a byte buffer.
* @param[out] data MMTF data structure to be filled
* @param[in] buffer File contents
* @param[in] size Size of buffer
* @throw mmtf::DecodeError if an error occured
*/
inline void decodeFromBuffer(StructureData& data, const char* buffer,
size_t size);
/**
* @brief Decode an MMTF data structure from a stream.
*
* Note that the full stream is read from start to end before decoding it!
* Use ::decodeFromBuffer if you wish to use just part of the stream.
*
* @param[out] data MMTF data structure to be filled
* @param[in] stream Stream that holds mmtf data
* @tparam Stream Any stream type compatible to std::istream
* @throw mmtf::DecodeError if an error occured
*/
template <typename Stream>
inline void decodeFromStream(StructureData& data, Stream& stream);
/**
* @brief Decode an MMTF data structure from an existing file.
* @param[out] data MMTF data structure to be filled
* @param[in] filename Path to file to load
* @throw mmtf::DecodeError if an error occured
*/
inline void decodeFromFile(StructureData& data, const std::string& filename);
/**
* @brief Get a mapDecoder for un-decoded MMTF data
* @param[out] mapDecoder MapDecoder to hold raw mmtf data
*
* Other parameters and behavior are as in ::decodeFromBuffer, but this doesn't
* decode the MMTF content.
*/
inline void mapDecoderFromBuffer(MapDecoder& mapDecoder, const char* buffer,
std::size_t size);
/**
* @brief Get a mapDecoder into an un-decoded MMTF data
* @param[out] mapDecoder MapDecoder to hold raw mmtf data
*
* Other parameters and behavior are as in ::decodeFromStream, but this doesn't
* decode the MMTF content.
*/
template <typename Stream>
inline void mapDecoderFromStream(MapDecoder& mapDecoder, Stream& stream);
/**
* @brief Get a mapDecoder into an un-decoded MMTF data
* @param[out] mapDecoder MapDecoder to hold raw mmtf data
*
* Other parameters and behavior are as in ::decodeFromFile, but this doesn't
* decode the MMTF content.
*/
inline void mapDecoderFromFile(MapDecoder& mapDecoder,
const std::string& filename);
// *************************************************************************
// IMPLEMENTATION
// *************************************************************************
inline void decodeFromMapDecoder(StructureData& data, MapDecoder& md) {
mmtf::impl::decodeFromMapDecoder(data, md);
}
inline void decodeFromBuffer(StructureData& data, const char* buffer,
size_t size) {
MapDecoder md;
mapDecoderFromBuffer(md, buffer, size);
decodeFromMapDecoder(data, md);
}
template <typename Stream>
inline void decodeFromStream(StructureData& data, Stream& stream) {
MapDecoder md;
mapDecoderFromStream(md, stream);
decodeFromMapDecoder(data, md);
}
inline void decodeFromFile(StructureData& data, const std::string& filename) {
MapDecoder md;
mapDecoderFromFile(md, filename);
decodeFromMapDecoder(data, md);
}
inline void mapDecoderFromBuffer(MapDecoder& mapDecoder, const char* buffer,
std::size_t size) {
mapDecoder.initFromBuffer(buffer, size);
}
template <typename Stream>
inline void mapDecoderFromStream(MapDecoder& mapDecoder, Stream& stream) {
// parse straight into string buffer
std::string buffer;
stream.seekg(0, std::ios::end);
buffer.resize(stream.tellg());
stream.seekg(0, std::ios::beg);
if (!buffer.empty()) stream.read(&buffer[0], buffer.size());
mapDecoderFromBuffer(mapDecoder, buffer.data(), buffer.size());
}
inline void mapDecoderFromFile(MapDecoder& mapDecoder,
const std::string& filename) {
// read file as binary
std::ifstream ifs(filename.c_str(), std::ifstream::in | std::ios::binary);
if (!ifs.is_open()) {
throw DecodeError("Could not open file: " + filename);
}
mapDecoderFromStream(mapDecoder, ifs);
}
} // mmtf namespace
#endif
// *************************************************************************
//
// Licensed under the MIT License (see accompanying LICENSE file).
//
// The author of this code is: Daniel Farrell
//
// Based on mmtf_python, adapted to c++ standards 2018
//
// *************************************************************************
#ifndef MMTF_ENCODER_H
#define MMTF_ENCODER_H
#include "structure_data.hpp"
#include "errors.hpp"
#include "msgpack_encoders.hpp"
#include "binary_encoder.hpp"
#include <string>
#include <fstream>
namespace mmtf {
/**
* @brief Encode an MMTF data structure into a file.
* @param[in] data MMTF data structure to be stored
* @param[in] filename Path to file to load
* @param[in] coord_divider Divisor for coordinates
* @param[in] occupancy_b_factor_divider Divisor for occupancy and b-factor
* @param[in] chain_name_max_length Max. length for chain name strings
* @throw mmtf::EncodeError if an error occurred
*
* Common settings for the divisors are the default values for a loss-less
* encoding and both set to 10 for a lossy variant.
*/
inline void encodeToFile(const StructureData& data,
const std::string& filename, int32_t coord_divider = 1000,
int32_t occupancy_b_factor_divider = 100,
int32_t chain_name_max_length = 4);
/**
* @brief Encode an MMTF data structure into a stream.
* @param[in] data MMTF data structure to be stored
* @param[in] stream Stream to encode to
* @tparam Stream Any stream type compatible to std::ostream
*
* Other parameters and behavior are as in ::encodeToFile, but this enables you
* to store the data to other types of storage.
*/
template <typename Stream>
inline void encodeToStream(const StructureData& data, Stream& stream,
int32_t coord_divider = 1000, int32_t occupancy_b_factor_divider = 100,
int32_t chain_name_max_length = 4);
/**
* @brief Encode an MMTF data structure into a map of msgpack objects.
* @param[in] data MMTF data structure to be stored
* @param[in] m_zone msgpack::zone object to use
* @return Object which can be modified and passed to msgpack::pack
*
* Other parameters and behavior are as in ::encodeToFile, but this enables you
* to add additional fields before packing.
*/
inline std::map<std::string, msgpack::object>
encodeToMap(const StructureData& data, msgpack::zone& m_zone,
int32_t coord_divider = 1000, int32_t occupancy_b_factor_divider = 100,
int32_t chain_name_max_length = 4);
// *************************************************************************
// IMPLEMENTATION
// *************************************************************************
inline void encodeToFile(const StructureData& data,
const std::string& filename, int32_t coord_divider,
int32_t occupancy_b_factor_divider, int32_t chain_name_max_length) {
// encode to a file
std::ofstream ofs(filename.c_str(), std::ios::binary | std::ios::out );
if ( !ofs ) {
throw EncodeError("Could not open >" + filename + "< for writing, exiting.");
}
encodeToStream(data, ofs, coord_divider,
occupancy_b_factor_divider, chain_name_max_length);
}
template <typename Stream>
inline void encodeToStream(const StructureData& data, Stream& stream,
int32_t coord_divider, int32_t occupancy_b_factor_divider,
int32_t chain_name_max_length) {
msgpack::pack(stream, encodeToMap(data, data.msgpack_zone, coord_divider,
occupancy_b_factor_divider, chain_name_max_length));
}
inline std::map<std::string, msgpack::object>
encodeToMap(const StructureData& data, msgpack::zone& m_zone,
int32_t coord_divider, int32_t occupancy_b_factor_divider,
int32_t chain_name_max_length) {
if (!data.hasConsistentData(true, chain_name_max_length)) {
throw mmtf::EncodeError("mmtf EncoderError, StructureData does not have Consistent data... exiting!");
}
std::map<std::string, msgpack::object> data_map;
// std::string
data_map["mmtfVersion"] = msgpack::object(data.mmtfVersion, m_zone);
data_map["mmtfProducer"] = msgpack::object(data.mmtfProducer, m_zone);
if (!mmtf::isDefaultValue(data.spaceGroup)) {
data_map["spaceGroup"] = msgpack::object(data.spaceGroup, m_zone);
}
if (!mmtf::isDefaultValue(data.structureId)) {
data_map["structureId"] = msgpack::object(data.structureId, m_zone);
}
if (!mmtf::isDefaultValue(data.title)) {
data_map["title"] = msgpack::object(data.title, m_zone);
}
if (!mmtf::isDefaultValue(data.depositionDate)) {
data_map["depositionDate"] = msgpack::object(data.depositionDate, m_zone);
}
if (!mmtf::isDefaultValue(data.releaseDate)) {
data_map["releaseDate"] = msgpack::object(data.releaseDate, m_zone);
}
// std::vector<std::string>
data_map["chainIdList"] = msgpack::object(mmtf::encodeStringVector(data.chainIdList, chain_name_max_length), m_zone);
if (!mmtf::isDefaultValue(data.chainNameList)) {
data_map["chainNameList"] = msgpack::object(mmtf::encodeStringVector(data.chainNameList, chain_name_max_length), m_zone);
}
if (!mmtf::isDefaultValue(data.experimentalMethods)) {
data_map["experimentalMethods"] =
msgpack::object(data.experimentalMethods, m_zone);
}
// std::vector<char>
if (!mmtf::isDefaultValue(data.altLocList)) {
data_map["altLocList"] =
msgpack::object(mmtf::encodeRunLengthChar(data.altLocList), m_zone);
}
if (!mmtf::isDefaultValue(data.insCodeList)) {
data_map["insCodeList"] = msgpack::object(mmtf::encodeRunLengthChar(data.insCodeList), m_zone);
}
// std::vector<int8_t>
if (!mmtf::isDefaultValue(data.bondOrderList)) {
data_map["bondOrderList"] =
msgpack::object(mmtf::encodeInt8ToByte(data.bondOrderList), m_zone);
}
// std::vector<int8_t>
if (!mmtf::isDefaultValue(data.bondResonanceList)) {
data_map["bondResonanceList"] =
msgpack::object(mmtf::encodeRunLengthInt8(data.bondResonanceList), m_zone);
}
if (!mmtf::isDefaultValue(data.secStructList)) {
data_map["secStructList"] =
msgpack::object(mmtf::encodeInt8ToByte(data.secStructList), m_zone);
}
// int32_t
data_map["numBonds"] = msgpack::object(data.numBonds, m_zone);
data_map["numAtoms"] = msgpack::object(data.numAtoms, m_zone);
data_map["numGroups"] = msgpack::object(data.numGroups, m_zone);
data_map["numChains"] = msgpack::object(data.numChains, m_zone);
data_map["numModels"] = msgpack::object(data.numModels, m_zone);
// std::vector<int32_t>
data_map["groupTypeList"] = msgpack::object(mmtf::encodeFourByteInt(data.groupTypeList), m_zone);
data_map["groupIdList"] = msgpack::object(mmtf::encodeRunLengthDeltaInt(data.groupIdList), m_zone);
data_map["groupsPerChain"] = msgpack::object(data.groupsPerChain, m_zone);
data_map["chainsPerModel"] = msgpack::object(data.chainsPerModel, m_zone);
if (!mmtf::isDefaultValue(data.bondAtomList)) {
data_map["bondAtomList"] = msgpack::object(mmtf::encodeFourByteInt(data.bondAtomList), m_zone);
}
if (!mmtf::isDefaultValue(data.atomIdList)) {
data_map["atomIdList"] = msgpack::object(mmtf::encodeRunLengthDeltaInt(data.atomIdList), m_zone);
}
if (!mmtf::isDefaultValue(data.sequenceIndexList)) {
data_map["sequenceIndexList"] = msgpack::object(mmtf::encodeRunLengthDeltaInt(data.sequenceIndexList), m_zone);
}
// float
if (!mmtf::isDefaultValue(data.resolution)) {
data_map["resolution"] = msgpack::object(data.resolution, m_zone);
}
if (!mmtf::isDefaultValue(data.rFree)) {
data_map["rFree"] = msgpack::object(data.rFree, m_zone);
}
if (!mmtf::isDefaultValue(data.rWork)) {
data_map["rWork"] = msgpack::object(data.rWork, m_zone);
}
// std::vector<float>
data_map["xCoordList"] = msgpack::object(mmtf::encodeDeltaRecursiveFloat(data.xCoordList, coord_divider), m_zone);
data_map["yCoordList"] = msgpack::object(mmtf::encodeDeltaRecursiveFloat(data.yCoordList, coord_divider), m_zone);
data_map["zCoordList"] = msgpack::object(mmtf::encodeDeltaRecursiveFloat(data.zCoordList, coord_divider), m_zone);
if (!mmtf::isDefaultValue(data.bFactorList)) {
data_map["bFactorList"] = msgpack::object(mmtf::encodeDeltaRecursiveFloat(data.bFactorList, occupancy_b_factor_divider), m_zone);
}
if (!mmtf::isDefaultValue(data.occupancyList)) {
data_map["occupancyList"] = msgpack::object(mmtf::encodeRunLengthFloat(data.occupancyList, occupancy_b_factor_divider), m_zone);
}
if (!mmtf::isDefaultValue(data.unitCell)) {
data_map["unitCell"] = msgpack::object(data.unitCell, m_zone);
}
// std::vector<GroupType>
data_map["groupList"] = msgpack::object(data.groupList, m_zone);
// std::vector<BioAssembly>
if (!mmtf::isDefaultValue(data.bioAssemblyList)) {
data_map["bioAssemblyList"] = msgpack::object(data.bioAssemblyList, m_zone);
}
// std::vector<Entity>
if (!mmtf::isDefaultValue(data.entityList)) {
data_map["entityList"] = msgpack::object(data.entityList, m_zone);
}
// std::vector<std::vector<float>>
if (!mmtf::isDefaultValue(data.ncsOperatorList)) {
data_map["ncsOperatorList"] = msgpack::object(data.ncsOperatorList, m_zone);
}
// extraProperties
if (!mmtf::isDefaultValue(data.bondProperties)) {
data_map["bondProperties"] = msgpack::object(data.bondProperties, m_zone);
}
if (!mmtf::isDefaultValue(data.atomProperties)) {
data_map["atomProperties"] = msgpack::object(data.atomProperties, m_zone);
}
if (!mmtf::isDefaultValue(data.groupProperties)) {
data_map["groupProperties"] = msgpack::object(data.groupProperties, m_zone);
}
if (!mmtf::isDefaultValue(data.chainProperties)) {
data_map["chainProperties"] = msgpack::object(data.chainProperties, m_zone);
}
if (!mmtf::isDefaultValue(data.modelProperties)) {
data_map["modelProperties"] = msgpack::object(data.modelProperties, m_zone);
}
if (!mmtf::isDefaultValue(data.extraProperties)) {
data_map["extraProperties"] = msgpack::object(data.extraProperties, m_zone);
}
return data_map;
}
} // mmtf namespace
#endif
// *************************************************************************
//
// Licensed under the MIT License (see accompanying LICENSE file).
//
// The authors of this code are: Gerardo Tauriello
//
// Based on mmtf_c developed by Julien Ferte (http://www.julienferte.com/),
// Anthony Bradley, Thomas Holder with contributions from Yana Valasatava,
// Gazal Kalyan, Alexander Rose. Updated 2018 by Daniel Farrell.
//
// *************************************************************************
#ifndef MMTF_ERRORS_H
#define MMTF_ERRORS_H
#include <stdexcept>
namespace mmtf {
/**
* @brief Exception thrown when failing during decoding.
*/
class DecodeError: public std::runtime_error {
public:
DecodeError(const std::string& m): std::runtime_error(m) { }
};
/**
* @brief Exception thrown when failing during encoding.
*/
class EncodeError: public std::runtime_error {
public:
EncodeError(const std::string& m): std::runtime_error(m) { }
};
} // mmtf namespace
#endif
// *************************************************************************
//
// Licensed under the MIT License (see accompanying LICENSE file).
//
// The authors of this code are: Thomas Holder
//
// *************************************************************************
//
// Helper functions and classes for exporting MMTF data.
// See "examples/tableexport.cpp" for example usage.
//
// *************************************************************************
#ifndef MMTF_EXPORT_HELPERS_H
#define MMTF_EXPORT_HELPERS_H
#include "errors.hpp"
#include "structure_data.hpp"
#include <vector>
namespace mmtf
{
/**
* @brief Helper class for adding bonds to a group-redundant system
*
* @pre Atoms already exist in the system
*
* @pre groupTypeList has no duplicates (otherwise adding an inter-residue
* bond will add it to all residues with the same group type)
*/
class BondAdder {
StructureData* m_data;
std::vector<int32_t> m_atom2groupType;
std::vector<int32_t> m_atomOffsets;
public:
/**
* @param[in,out] data Consistent system with atoms
*
* @throw mmtf::EncodeError if groupTypeList has duplicates
*/
BondAdder(StructureData& data)
: m_data(&data), m_atomOffsets(data.groupTypeList.size(), -1)
{
m_atom2groupType.reserve(data.numAtoms);
for (size_t i = 0; i < data.groupTypeList.size(); ++i) {
int32_t groupType = data.groupTypeList[i];
// sanity check
if (m_atomOffsets[groupType] != -1) {
throw EncodeError("groupTypeList has duplicates");
}
size_t atomOffset = m_atom2groupType.size();
size_t groupSize = data.groupList[groupType].atomNameList.size();
m_atomOffsets[groupType] = atomOffset;
m_atom2groupType.resize(atomOffset + groupSize, groupType);
}
}
/**
* @brief Add one bond
*
* @param[in] atom1 Atom index 1 (zero-based)
* @param[in] atom2 Atom index 2 (zero-based)
* @param[in] order Bond order
*
* @return False if atom indices out of bounds
*/
bool operator()(int32_t atom1, int32_t atom2, int8_t order)
{
if (atom1 >= m_atom2groupType.size() ||
atom2 >= m_atom2groupType.size())
return false;
if (m_atom2groupType[atom1] == m_atom2groupType[atom2]) {
int32_t groupType = m_atom2groupType[atom1];
GroupType& group = m_data->groupList[groupType];
group.bondAtomList.push_back(atom1 - m_atomOffsets[groupType]);
group.bondAtomList.push_back(atom2 - m_atomOffsets[groupType]);
group.bondOrderList.push_back(order);
} else {
m_data->bondAtomList.push_back(atom1);
m_data->bondAtomList.push_back(atom2);
m_data->bondOrderList.push_back(order);
}
++m_data->numBonds;
return true;
}
};
/**
* @brief Eliminate redundant groups from groupList
*
* Modifies groupList and groupTypeList
*
* @param[in,out] data Consistent system
*/
inline void compressGroupList(StructureData& data)
{
size_t n_old = data.groupList.size();
size_t i_free = 0;
std::vector<size_t> idremap(n_old, 0);
for (size_t i = 1; i < n_old; ++i) {
size_t i_found = 0;
while (i_found < i && !(data.groupList[i] == data.groupList[i_found])) {
++i_found;
}
if (i_found == i) {
if (i_free != 0) {
data.groupList[i_free] = data.groupList[i]; // std::move possible with C++11
i_found = i_free;
++i_free;
}
} else if (i_free == 0) {
i_free = i;
}
idremap[i] = i_found;
}
if (i_free != 0) {
data.groupList.resize(i_free);
for (size_t i = 0; i < data.groupTypeList.size(); ++i) {
data.groupTypeList[i] = idremap[data.groupTypeList[i]];
}
}
}
} // namespace mmtf
#endif
// vi:sw=2:expandtab
// *************************************************************************
//
// Licensed under the MIT License (see accompanying LICENSE file).
//
// The authors of this code are: Gabriel Studer, Gerardo Tauriello, and
// Daniel Farrell.
//
// Based on mmtf_c developed by Julien Ferte (http://www.julienferte.com/),
// Anthony Bradley, Thomas Holder with contributions from Yana Valasatava,
// Gazal Kalyan, Alexander Rose.
//
// *************************************************************************
#ifndef MMTF_MAP_DECODER_H
#define MMTF_MAP_DECODER_H
#include "structure_data.hpp"
#include "binary_decoder.hpp"
#include "errors.hpp"
#include <msgpack.hpp>
#include <map>
#include <iostream>
namespace mmtf {
/**
* @brief Helper class to decode msgpack maps into object fields.
* Class cannot be copied as it contains unique pointers to msgpack data.
*/
class MapDecoder {
public:
/**
* @brief Construct empty decoder. Use init-functions to fill it.
*/
MapDecoder() {}
/**
* @brief Construct decoder given a msgpack::object.
* Reads out all key-value pairs and converts key to string if possible
* (warns otherwise).
* @throw mmtf::DecodeError if obj is not a map.
*/
MapDecoder(const msgpack::object& obj);
/**
* @brief Construct decoder given a string to msgpack::object map.
* Reads out all key-value pairs and converts key to string if possible
* (warns otherwise).
*/
MapDecoder(const std::map<std::string, msgpack::object>& map_in);
/**
* @brief Initialize given a msgpack::object.
* Clears internal data and has same effect as
* MapDecoder::MapDecoder(const msgpack::object&).
*/
void initFromObject(const msgpack::object& obj);
/**
* @brief Initialize from byte buffer of given size.
* Unpacks data and then same effect as MapDecoder::initFromObject.
*/
void initFromBuffer(const char* buffer, size_t size);
/**
* @brief Extract value from map and decode into target.
*
* @param[in] key Key into msgpack map.
* @param[in] required True if field is required by MMTF specs.
* @param[out] target Store decoded value into this field.
*
* If msgpack type is not as expected, we write a warning to stderr.
* If conversion to the target type fails, msgpack throws an exception.
* If a required field is missing in the map or if binary decoding fails,
* we throw an mmtf::DecodeError.
*/
template<typename T>
void decode(const std::string& key, bool required, T& target) const;
/**
* @brief Don't decode, but instead just copy map-contents onto a zone
* for later decoding/processing you should use this when you have
* nested msgpack objects.
* Note: There is some copy overhead here. If speed becomes an issue
* we should come up with a way to keep the original handle alive.
* @param[in] key Key into msgpack map.
* @param[in] required True if field is required by MMTF specs or you
* @param[out] target std::map<std::string, msgpack::object> that holds access to data on zone.
* @param[in] zone msgpack::zone to copy data onto
*
* If msgpack type is not as expected, we write a warning to stderr.
* If conversion to the target type fails, msgpack throws an exception.
* If a required field is missing in the map or if binary decoding fails,
* we throw an mmtf::DecodeError.
*/
void
copy_decode(const std::string& key, bool required,
std::map<std::string, msgpack::object>& target,
msgpack::zone& zone) const;
/**
* @brief Check if there are any keys, that were not decoded.
* This is to be called after all expected fields have been decoded.
* A warning is written to stderr for each non-decoded key.
*/
void checkExtraKeys() const;
private:
// when constructed with byte buffer, we keep unpacked object
// NOTE: this contains a unique pointer to msgpack data (cannot copy)
msgpack::object_handle object_handle_;
// key-value pairs extracted from msgpack map
typedef std::map<std::string, const msgpack::object*> data_map_type_;
data_map_type_ data_map_;
// set of keys that were successfully decoded
mutable std::set<std::string> decoded_keys_;
/**
* @brief Initialize object given an object
* helper function used by constructors
*/
void init_from_msgpack_obj(const msgpack::object& obj);
// type checking (note: doesn't check array elements)
// -> only writes warning to cerr
// -> exception thrown by msgpack if conversion fails
void checkType_(const std::string& key, msgpack::type::object_type type,
const float& target) const;
void checkType_(const std::string& key, msgpack::type::object_type type,
const int32_t& target) const;
void checkType_(const std::string& key, msgpack::type::object_type type,
const char& target) const;
void checkType_(const std::string& key, msgpack::type::object_type type,
const std::string& target) const;
template <typename T>
void checkType_(const std::string& key, msgpack::type::object_type type,
const std::vector<T>& target) const;
template <typename T>
void checkType_(const std::string& key, msgpack::type::object_type type,
const T& target) const;
};
// *************************************************************************
// IMPLEMENTATION
// *************************************************************************
inline MapDecoder::MapDecoder(const msgpack::object& obj) {
init_from_msgpack_obj(obj);
}
inline MapDecoder::MapDecoder(const std::map<std::string, msgpack::object>& map_in) {
std::map<std::string, msgpack::object>::const_iterator it;
for (it = map_in.begin(); it != map_in.end(); ++it) {
data_map_[it->first] = &(it->second);
}
}
inline void MapDecoder::initFromObject(const msgpack::object& obj) {
data_map_.clear();
decoded_keys_.clear();
init_from_msgpack_obj(obj);
}
inline void MapDecoder::initFromBuffer(const char* buffer, std::size_t size) {
msgpack::unpack(object_handle_, buffer, size);
initFromObject(object_handle_.get());
}
void
inline MapDecoder::copy_decode(const std::string& key, bool required,
std::map<std::string, msgpack::object>& target,
msgpack::zone & zone) const {
// note: cost of O(M*log(N)) string comparisons (M parsed, N in map)
data_map_type_::const_iterator it = data_map_.find(key);
if (it != data_map_.end()) {
decoded_keys_.insert(key);
// expensive copy here
msgpack::object tmp_object(*it->second, zone);
tmp_object.convert(target);
}
else if (required) {
throw DecodeError("MsgPack MAP does not contain required entry "
+ key);
}
}
template<typename T>
inline void MapDecoder::decode(const std::string& key, bool required, T& target) const {
// note: cost of O(M*log(N)) string comparisons (M parsed, N in map)
data_map_type_::const_iterator it = data_map_.find(key);
if (it != data_map_.end()) {
checkType_(key, it->second->type, target);
if (it->second->type == msgpack::type::BIN) {
BinaryDecoder bd(*it->second, key);
bd.decode(target);
} else {
it->second->convert(target);
}
decoded_keys_.insert(key);
}
else if (required) {
throw DecodeError("MsgPack MAP does not contain required entry "
+ key);
}
}
inline void MapDecoder::checkExtraKeys() const {
// note: cost of O(N*log(M))) string comparisons (M parsed, N in map)
// simple set difference algorithm
data_map_type_::const_iterator map_it;
std::set<std::string>::const_iterator parsed_it;
for (map_it = data_map_.begin(); map_it != data_map_.end(); ++map_it) {
parsed_it = decoded_keys_.find(map_it->first);
if (parsed_it == decoded_keys_.end()) {
std::cerr << "Warning: Found non-parsed key " << map_it->first
<< " in MsgPack MAP.\n";
}
}
}
inline void MapDecoder::init_from_msgpack_obj(const msgpack::object& obj) {
// sanity checks
if (obj.type != msgpack::type::MAP) {
throw DecodeError("Expected msgpack type to be MAP");
}
// get data
msgpack::object_kv* current_key_value = obj.via.map.ptr;
msgpack::object_kv* last_key_value = current_key_value + obj.via.map.size;
for (; current_key_value != last_key_value; ++current_key_value) {
msgpack::object* key = &(current_key_value->key);
msgpack::object* value = &(current_key_value->val);
if (key->type == msgpack::type::STR) {
std::string data_map_key(key->via.str.ptr, key->via.str.size);
data_map_[data_map_key] = value;
} else {
std::cerr << "Warning: Found non-string key type " << key->type
<< "! Skipping..." << std::endl;
}
}
}
inline void MapDecoder::checkType_(const std::string& key,
msgpack::type::object_type type,
const float&) const {
if (type != msgpack::type::FLOAT32 && type != msgpack::type::FLOAT64) {
std::cerr << "Warning: Non-float type " << type << " found for "
"entry " << key << std::endl;
}
}
inline void MapDecoder::checkType_(const std::string& key,
msgpack::type::object_type type,
const int32_t&) const {
if ( type != msgpack::type::POSITIVE_INTEGER
&& type != msgpack::type::NEGATIVE_INTEGER) {
std::cerr << "Warning: Non-int type " << type << " found for "
"entry " << key << std::endl;
}
}
inline void MapDecoder::checkType_(const std::string& key,
msgpack::type::object_type type,
const char&) const {
if (type != msgpack::type::STR) {
std::cerr << "Warning: Non-string type " << type << " found for "
"entry " << key << std::endl;
}
}
inline void MapDecoder::checkType_(const std::string& key,
msgpack::type::object_type type,
const std::string&) const {
if (type != msgpack::type::STR) {
std::cerr << "Warning: Non-string type " << type << " found for "
"entry " << key << std::endl;
}
}
template <typename T>
void MapDecoder::checkType_(const std::string& key,
msgpack::type::object_type type,
const std::vector<T>&) const {
if (type != msgpack::type::ARRAY && type != msgpack::type::BIN) {
std::cerr << "Warning: Non-array type " << type << " found for "
"entry " << key << std::endl;
}
}
template <typename T>
void MapDecoder::checkType_(const std::string&,
msgpack::type::object_type,
const T &) const {
// Do nothing -- allow all through
}
} // mmtf namespace
#endif
// *************************************************************************
//
// Licensed under the MIT License (see accompanying LICENSE file).
//
// The authors of this code are: Gabriel Studer, Gerardo Tauriello, and
// Daniel Farrell.
//
// Based on mmtf_c developed by Julien Ferte (http://www.julienferte.com/),
// Anthony Bradley, Thomas Holder with contributions from Yana Valasatava,
// Gazal Kalyan, Alexander Rose.
//
// *************************************************************************
#ifndef MMTF_MSGPACK_DECODERS_H
#define MMTF_MSGPACK_DECODERS_H
#include "structure_data.hpp"
#include "map_decoder.hpp"
#include "errors.hpp"
#include <msgpack.hpp>
// custom global function used here and in decoder.hpp
namespace mmtf {
namespace impl {
inline void decodeFromMapDecoder(StructureData& data, MapDecoder& md) {
md.decode("mmtfVersion", true, data.mmtfVersion);
// check if version is compatible before continuing
if (!mmtf::isVersionSupported(data.mmtfVersion)) {
throw mmtf::DecodeError("Unsupported MMTF version "
+ data.mmtfVersion);
}
md.decode("mmtfProducer", true, data.mmtfProducer);
md.decode("unitCell", false, data.unitCell);
md.decode("spaceGroup", false, data.spaceGroup);
md.decode("structureId", false, data.structureId);
md.decode("title", false, data.title);
md.decode("depositionDate", false, data.depositionDate);
md.decode("releaseDate", false, data.releaseDate);
md.decode("ncsOperatorList", false, data.ncsOperatorList);
md.decode("bioAssemblyList", false, data.bioAssemblyList);
md.decode("entityList", false, data.entityList);
md.decode("experimentalMethods", false, data.experimentalMethods);
md.decode("resolution", false, data.resolution);
md.decode("rFree", false, data.rFree);
md.decode("rWork", false, data.rWork);
md.decode("numBonds", true, data.numBonds);
md.decode("numAtoms", true, data.numAtoms);
md.decode("numGroups", true, data.numGroups);
md.decode("numChains", true, data.numChains);
md.decode("numModels", true, data.numModels);
md.decode("groupList", true, data.groupList);
md.decode("bondAtomList", false, data.bondAtomList);
md.decode("bondOrderList", false, data.bondOrderList);
md.decode("bondResonanceList", false, data.bondResonanceList);
md.decode("xCoordList", true, data.xCoordList);
md.decode("yCoordList", true, data.yCoordList);
md.decode("zCoordList", true, data.zCoordList);
md.decode("bFactorList", false, data.bFactorList);
md.decode("atomIdList", false, data.atomIdList);
md.decode("altLocList", false, data.altLocList);
md.decode("occupancyList", false, data.occupancyList);
md.decode("groupIdList", true, data.groupIdList);
md.decode("groupTypeList", true, data.groupTypeList);
md.decode("secStructList", false, data.secStructList);
md.decode("insCodeList", false, data.insCodeList);
md.decode("sequenceIndexList", false, data.sequenceIndexList);
md.decode("chainIdList", true, data.chainIdList);
md.decode("chainNameList", false, data.chainNameList);
md.decode("groupsPerChain", true, data.groupsPerChain);
md.decode("chainsPerModel", true, data.chainsPerModel);
// extraProperties (application specific stuff)
// Perform expensive copy if exists.
// Implement outside accessor if speed is necessary
md.copy_decode("bondProperties", false, data.bondProperties,
data.msgpack_zone);
md.copy_decode("atomProperties", false, data.atomProperties,
data.msgpack_zone);
md.copy_decode("groupProperties", false, data.groupProperties,
data.msgpack_zone);
md.copy_decode("chainProperties", false, data.chainProperties,
data.msgpack_zone);
md.copy_decode("modelProperties", false, data.modelProperties,
data.msgpack_zone);
md.copy_decode("extraProperties", false, data.extraProperties,
data.msgpack_zone);
md.checkExtraKeys();
}
}
}
// here we specialize msgpack-c functionality so we can use obj.convert(..)
namespace msgpack {
MSGPACK_API_VERSION_NAMESPACE(MSGPACK_DEFAULT_API_NS) {
namespace adaptor {
// custom specialization for chars stored as strings
template <>
struct convert<char> {
const msgpack::object& operator()(const msgpack::object& obj,
char& c) const {
// extract string
std::string temp;
obj.convert(temp);
// ensure length
if (temp.size() != 1) {
throw mmtf::DecodeError("Observed single letter string not being "
"of length one!");
}
c = temp[0];
return obj;
}
};
template <>
struct convert<mmtf::GroupType> {
const msgpack::object& operator()(const msgpack::object& obj,
mmtf::GroupType& group) const {
mmtf::MapDecoder md(obj);
md.decode("formalChargeList", true, group.formalChargeList);
md.decode("atomNameList", true, group.atomNameList);
md.decode("elementList", true, group.elementList);
md.decode("bondAtomList", false, group.bondAtomList);
md.decode("bondOrderList", false, group.bondOrderList);
md.decode("bondResonanceList", false, group.bondResonanceList);
md.decode("groupName", true, group.groupName);
md.decode("singleLetterCode", true, group.singleLetterCode);
md.decode("chemCompType", true, group.chemCompType);
md.checkExtraKeys();
return obj;
}
};
template <>
struct convert<mmtf::Entity> {
const msgpack::object& operator()(const msgpack::object& obj,
mmtf::Entity& entity) const {
mmtf::MapDecoder md(obj);
md.decode("chainIndexList", true, entity.chainIndexList);
md.decode("description", true, entity.description);
md.decode("type", true, entity.type);
md.decode("sequence", true, entity.sequence);
md.checkExtraKeys();
return obj;
}
};
template <>
struct convert<mmtf::Transform> {
const msgpack::object& operator()(const msgpack::object& obj,
mmtf::Transform& transform) const {
mmtf::MapDecoder md(obj);
md.decode("chainIndexList", true, transform.chainIndexList);
md.decode("matrix", true, transform.matrix);
md.checkExtraKeys();
return obj;
}
};
template <>
struct convert<mmtf::BioAssembly> {
const msgpack::object& operator()(const msgpack::object& obj,
mmtf::BioAssembly& assembly) const {
mmtf::MapDecoder md(obj);
md.decode("transformList", true, assembly.transformList);
md.decode("name", true, assembly.name);
md.checkExtraKeys();
return obj;
}
};
template <>
struct convert<mmtf::StructureData> {
const msgpack::object& operator()(const msgpack::object& obj,
mmtf::StructureData& data) const {
mmtf::MapDecoder md(obj);
mmtf::impl::decodeFromMapDecoder(data, md);
return obj;
}
};
}}} // msgpack::VERSION::adaptor namespace
#endif
// *************************************************************************
//
// Licensed under the MIT License (see accompanying LICENSE file).
//
// The author of this code is: Daniel Farrell
//
// Based on mmtf_python, adapted to c++ standards 2018
//
// *************************************************************************
//
#ifndef MMTF_MSGPACK_ENCODERS_H
#define MMTF_MSGPACK_ENCODERS_H
#include "structure_data.hpp"
#include <msgpack.hpp>
namespace msgpack {
MSGPACK_API_VERSION_NAMESPACE(MSGPACK_DEFAULT_API_NS) {
namespace adaptor {
/* *
* @brief encode a mmtf::GroupType to a msgpack map type.
*
* We must use this method for packing because 'char' is treated as an int
* by msgpack. We cannot use the prepackaged intrusive method.
*
* Also we use this because 3 fields are optional!
*/
template <>
struct object_with_zone<mmtf::GroupType> {
void operator()(msgpack::object::with_zone& o, mmtf::GroupType const& v) const {
int this_size = 9;
bool use_bondAtom = true, use_bondOrder = true, use_bondResonance = true;
if (mmtf::isDefaultValue(v.bondAtomList)) {
use_bondAtom = false;
--this_size;
}
if (mmtf::isDefaultValue(v.bondOrderList)) {
use_bondOrder = false;
--this_size;
}
if (mmtf::isDefaultValue(v.bondResonanceList)) {
use_bondResonance = false;
--this_size;
}
o.type = type::MAP;
o.via.map.size = this_size;
o.via.map.ptr = static_cast<msgpack::object_kv*>(o.zone.allocate_align(sizeof(msgpack::object_kv)*this_size, MSGPACK_ZONE_ALIGNOF(msgpack::object_kv)));
int current_size = 0;
o.via.map.ptr[current_size].key = msgpack::object("formalChargeList", o.zone);
o.via.map.ptr[current_size].val = msgpack::object(v.formalChargeList, o.zone);
++current_size;
o.via.map.ptr[current_size].key = msgpack::object("atomNameList", o.zone);
o.via.map.ptr[current_size].val = msgpack::object(v.atomNameList, o.zone);
++current_size;
o.via.map.ptr[current_size].key = msgpack::object("elementList", o.zone);
o.via.map.ptr[current_size].val = msgpack::object(v.elementList, o.zone);
++current_size;
o.via.map.ptr[current_size].key = msgpack::object("groupName", o.zone);
o.via.map.ptr[current_size].val = msgpack::object(v.groupName, o.zone);
++current_size;
o.via.map.ptr[current_size].key = msgpack::object("singleLetterCode", o.zone);
o.via.map.ptr[current_size].val = msgpack::object(std::string(1,v.singleLetterCode), o.zone);
++current_size;
o.via.map.ptr[current_size].key = msgpack::object("chemCompType", o.zone);
o.via.map.ptr[current_size].val = msgpack::object(v.chemCompType, o.zone);
++current_size;
if (use_bondAtom) {
o.via.map.ptr[current_size].key = msgpack::object("bondAtomList", o.zone);
o.via.map.ptr[current_size].val = msgpack::object(v.bondAtomList, o.zone);
++current_size;
}
if (use_bondOrder) {
o.via.map.ptr[current_size].key = msgpack::object("bondOrderList", o.zone);
o.via.map.ptr[current_size].val = msgpack::object(v.bondOrderList, o.zone);
++current_size;
}
if (use_bondResonance) {
o.via.map.ptr[current_size].key = msgpack::object("bondResonanceList", o.zone);
o.via.map.ptr[current_size].val = msgpack::object(v.bondResonanceList, o.zone);
++current_size;
}
}
};
} // namespace adaptor
} // MSGPACK_API_VERSION_NAMESPACE(MSGPACK_DEFAULT_API_NS)
} // namespace msgpack
#endif
// *************************************************************************
//
// Licensed under the MIT License (see accompanying LICENSE file).
//
// The authors of this code are: Gabriel Studer, Gerardo Tauriello,
// Daniel Farrell.
//
// Based on mmtf_c developed by Julien Ferte (http://www.julienferte.com/),
// Anthony Bradley, Thomas Holder with contributions from Yana Valasatava,
// Gazal Kalyan, Alexander Rose.
//
// *************************************************************************
#ifndef MMTF_STRUCTURE_DATA_H
#define MMTF_STRUCTURE_DATA_H
#include "errors.hpp"
#include <string>
#include <vector>
#include <algorithm>
#include <stdint.h>
#include <sstream>
#include <limits>
#include <msgpack.hpp>
#include <iostream>
#include <iomanip>
namespace mmtf {
/**
* @brief MMTF spec version which this library implements
*/
#define MMTF_SPEC_VERSION_MAJOR 1
#define MMTF_SPEC_VERSION_MINOR 1
/**
* @brief Get string representation of MMTF spec version implemented here
*/
inline std::string getVersionString();
/**
* @brief Check if version is supported (minor revisions ok, major ones not)
* @return true if supported, false if not
*/
inline bool isVersionSupported(const std::string& version_string);
/**
* @brief Group (residue) level data store
*
* https://github.com/rcsb/mmtf/blob/HEAD/spec.md#group-data
*/
struct GroupType { // NOTE: can't use MSGPACK_DEFINE_MAP due to char
std::vector<int32_t> formalChargeList;
std::vector<std::string> atomNameList;
std::vector<std::string> elementList;
std::vector<int32_t> bondAtomList;
std::vector<int8_t> bondOrderList;
std::vector<int8_t> bondResonanceList;
std::string groupName;
char singleLetterCode;
std::string chemCompType;
bool operator==(GroupType const & c) const {
return(
formalChargeList == c.formalChargeList &&
atomNameList == c.atomNameList &&
elementList == c.elementList &&
bondAtomList == c.bondAtomList &&
bondOrderList == c.bondOrderList &&
bondResonanceList == c.bondResonanceList &&
groupName == c.groupName &&
singleLetterCode == c.singleLetterCode &&
chemCompType == c.chemCompType);
}
};
/**
* @brief Entity type.
*
* https://github.com/rcsb/mmtf/blob/HEAD/spec.md#entitylist
*/
struct Entity {
std::vector<int32_t> chainIndexList;
std::string description;
std::string type;
std::string sequence;
bool operator==(Entity const & c) const {
return(
chainIndexList == c.chainIndexList &&
description == c.description &&
type == c.type &&
sequence == c.sequence);
}
MSGPACK_DEFINE_MAP(
chainIndexList,
description,
type,
sequence);
};
/**
* @brief Transformation definition for a set of chains.
*
* https://github.com/rcsb/mmtf/blob/HEAD/spec.md#bioassemblylist
*/
struct Transform {
std::vector<int32_t> chainIndexList;
float matrix[16];
bool operator==(Transform const & c) const {
bool comp = true;
for(size_t i = 16; i--;) {
if ( matrix[i] != c.matrix[i] ) {
comp = false;
break;
}
}
return (chainIndexList == c.chainIndexList && comp);
}
MSGPACK_DEFINE_MAP(chainIndexList, matrix);
};
/**
* @brief Data store for the biological assembly annotation.
*
* https://github.com/rcsb/mmtf/blob/HEAD/spec.md#bioassemblylist
*/
struct BioAssembly {
std::vector<Transform> transformList;
std::string name;
bool operator==(BioAssembly const & c) const {
return (
transformList == c.transformList &&
name == c.name);
}
MSGPACK_DEFINE_MAP(transformList, name);
};
/**
* @brief Top level MMTF data container.
*
* Default values (mmtf::isDefaultValue, mmtf::setDefaultValue) are set in
* constructor and can be used to check if value was never set (only relevant
* for optional values):
* - default for vectors and strings: empty
* - default for numeric types (incl. char): max. value of that type
* - default for numXX = 0
*
* https://github.com/rcsb/mmtf/blob/HEAD/spec.md#fields
*/
struct StructureData {
std::string mmtfVersion;
std::string mmtfProducer;
std::vector<float> unitCell;
std::string spaceGroup;
std::string structureId;
std::string title;
std::string depositionDate;
std::string releaseDate;
std::vector<std::vector<float> > ncsOperatorList;
std::vector<BioAssembly> bioAssemblyList;
std::vector<Entity> entityList;
std::vector<std::string> experimentalMethods;
float resolution;
float rFree;
float rWork;
int32_t numBonds;
int32_t numAtoms;
int32_t numGroups;
int32_t numChains;
int32_t numModels;
std::vector<GroupType> groupList;
std::vector<int32_t> bondAtomList;
std::vector<int8_t> bondOrderList;
std::vector<int8_t> bondResonanceList;
std::vector<float> xCoordList;
std::vector<float> yCoordList;
std::vector<float> zCoordList;
std::vector<float> bFactorList;
std::vector<int32_t> atomIdList;
std::vector<char> altLocList;
std::vector<float> occupancyList;
std::vector<int32_t> groupIdList;
std::vector<int32_t> groupTypeList;
std::vector<int8_t> secStructList;
std::vector<char> insCodeList;
std::vector<int32_t> sequenceIndexList;
std::vector<std::string> chainIdList;
std::vector<std::string> chainNameList;
std::vector<int32_t> groupsPerChain;
std::vector<int32_t> chainsPerModel;
mutable msgpack::zone msgpack_zone;
std::map<std::string, msgpack::object> bondProperties;
std::map<std::string, msgpack::object> atomProperties;
std::map<std::string, msgpack::object> groupProperties;
std::map<std::string, msgpack::object> chainProperties;
std::map<std::string, msgpack::object> modelProperties;
std::map<std::string, msgpack::object> extraProperties;
/**
* @brief Construct object with default values set.
*/
StructureData();
/**
* @brief Overload for copy constructor.
*/
StructureData(const StructureData& obj);
/**
* @brief Overload for assignment operator.
*/
StructureData& operator=(const StructureData& f);
/**
* @brief Check consistency of structural data.
* @param verbose Print first error encountered (if any)
* @param chain_name_max_length Max allowed chain name length
* @return True if all required fields are set and vector sizes and indices
* are consistent.
*/
bool hasConsistentData(bool verbose=false, uint32_t chain_name_max_length = 4) const;
/**
* @brief Read out the contents of mmtf::StructureData in a PDB-like fashion
* Columns are in order:
* ATOM/HETATM AtomId Element AtomName AltLoc GroupId GroupType
* InsCode ChainName x y z B-factor Occupancy Charge
* @param delim what to split columns with
*/
std::string print(std::string delim="\t") const;
/**
* @brief Compare two StructureData classes for equality
* @param c What to compare to
*/
bool operator==(const StructureData& c) const;
/**
* @brief Compare two StructureData classes for inequality
* @param c What to compare to
*/
bool operator!=(const StructureData& c) const {
return !(*this == c);
}
private:
// Helper to copy map data
void copyMapData_(std::map<std::string, msgpack::object>& target,
const std::map<std::string, msgpack::object>& source);
// Helper to copy all data
void copyAllData_(const StructureData& obj);
};
/**
* @brief Get default value for given type.
*/
template <typename T>
inline T getDefaultValue();
/**
* @return True if given value is default.
* @tparam T Can be any numeric type, vector of string
*/
template <typename T>
inline bool isDefaultValue(const T& value);
template <typename T>
inline bool isDefaultValue(const std::vector<T>& value);
template <>
inline bool isDefaultValue(const std::string& value);
template <>
inline bool isDefaultValue(const std::map<std::string, msgpack::object>& value);
/**
* @brief Set default value to given type.
* @tparam T Can be any numeric type (no need for vector or strings here)
*/
template <typename T>
inline void setDefaultValue(T& value);
/**
* @brief Check if chain is a polymer chain.
* @param chain_index Chain index of chain.
* @param entity_list StructureData.entityList with info on given chain.
* @return True if chain is a polymer chain.
* @throw mmtf::DecodeError if chain index doesn't appear in entity list.
*/
inline bool is_polymer(const unsigned int chain_index,
const std::vector<Entity>& entity_list);
/**
* @brief Check if group type consists of HETATM atoms.
* @param type Character string of GroupType.chemCompType.
* @return True if group consists of HETATM atoms.
*
* Relevant threads:
* - https://github.com/rcsb/mmtf/issues/28
* - http://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_chem_comp.type.html
*
* @warning Use the other is_hetatm function whenever possible. This one should
* only be used if you don't have info like an 'entity_list' to work
* with. This can return 'false' if you have an amino acid ligand.
*/
inline bool is_hetatm(const char* type);
/**
* @brief Preferred way to check if group consists of hetatm atoms.
* @param chain_index Chain index of chain where atom belongs to.
* @param entity_list StructureData.entityList with info on given chain.
* @param group_type GroupType of group where atom belongs to.
* @return True if it is a HETATM.
*
* Follows discussion in https://github.com/rcsb/mmtf/issues/28.
*
* Used in StructureData.print to mark atoms as HETATM. This is a shorthand for
* `is_hetatm(type) || !is_polymer(chain_index, entity_list)`.
*
* Efficient code needing this information should preferably use the or
* statement above, precompute the `is_polymer` output for each chain and the
* `is_hetatm(type)` output for each group instead of calling this on each atom.
*
* @see is_polymer, is_hetatm
*/
inline bool is_hetatm(const unsigned int chain_index,
const std::vector<Entity>& entity_list,
const GroupType& group_type);
// *************************************************************************
// IMPLEMENTATION
// *************************************************************************
// helpers in anonymous namespace (only visible in this file)
namespace {
// check optional date string
// -> either default or "YYYY-MM-DD" (only string format checked, not date)
bool isValidDateFormatOptional(const std::string& s) {
// default?
if (isDefaultValue(s)) return true;
// check length
if (s.length() != 10) return false;
// check delimiters
if (s[4] != '-' || s[7] != '-') return false;
// check format
std::istringstream is(s);
int d, m, y;
char dash1, dash2;
if (is >> y >> dash1 >> m >> dash2 >> d) {
return (dash1 == '-' && dash2 == '-');
} else {
return false;
}
}
// check if optional vector has right size
template<typename T>
bool hasRightSizeOptional(const std::vector<T>& v, int exp_size) {
return (isDefaultValue(v) || (int)v.size() == exp_size);
}
// check if all indices in vector are in [0, num-1] (T = integer type)
template<typename T, typename Tnum>
bool hasValidIndices(const T* v, size_t size, Tnum num) {
T tnum = T(num);
for (size_t i = 0; i < size; ++i) {
if (v[i] < T(0) || v[i] >= tnum) return false;
}
return true;
}
template<typename T, typename Tnum>
bool hasValidIndices(const std::vector<T>& v, Tnum num) {
if (v.empty()) return true;
else return hasValidIndices(&v[0], v.size(), num);
}
} // anon ns
// VERSIONING
inline std::string getVersionString() {
std::stringstream version;
version << MMTF_SPEC_VERSION_MAJOR << "." << MMTF_SPEC_VERSION_MINOR;
return version.str();
}
inline bool isVersionSupported(const std::string& version_string) {
std::stringstream ss(version_string);
int major = -1;
return ((ss >> major) && (major <= MMTF_SPEC_VERSION_MAJOR));
}
// DEFAULT VALUES
template <typename T>
inline T getDefaultValue() { return std::numeric_limits<T>::max(); }
template <typename T>
inline bool isDefaultValue(const T& value) {
return (value == getDefaultValue<T>());
}
template <typename T>
inline bool isDefaultValue(const std::vector<T>& value) {
return value.empty();
}
template <>
inline bool isDefaultValue(const std::string& value) {
return value.empty();
}
template <>
inline bool isDefaultValue(const std::map<std::string, msgpack::object>& value) {
return value.empty();
}
template <typename T>
inline void setDefaultValue(T& value) {
value = getDefaultValue<T>();
}
// HELPERS
bool is_polymer(const unsigned int chain_index,
const std::vector<Entity>& entity_list) {
for (std::size_t i = 0; i < entity_list.size(); ++i) {
if ( std::find(entity_list[i].chainIndexList.begin(),
entity_list[i].chainIndexList.end(),
chain_index)
!= entity_list[i].chainIndexList.end()) {
return ( entity_list[i].type == "polymer"
|| entity_list[i].type == "POLYMER");
}
}
std::stringstream err;
err << "'is_polymer' unable to find chain_index: " << chain_index
<< " in entity list";
throw DecodeError(err.str());
}
bool is_hetatm(const char* type) {
const char* hetatm_type[] = {
"D-BETA-PEPTIDE, C-GAMMA LINKING",
"D-GAMMA-PEPTIDE, C-DELTA LINKING",
"D-PEPTIDE COOH CARBOXY TERMINUS",
"D-PEPTIDE NH3 AMINO TERMINUS",
"D-PEPTIDE LINKING",
"D-SACCHARIDE",
"D-SACCHARIDE 1,4 AND 1,4 LINKING",
"D-SACCHARIDE 1,4 AND 1,6 LINKING",
"DNA OH 3 PRIME TERMINUS",
"DNA OH 5 PRIME TERMINUS",
"DNA LINKING",
"L-DNA LINKING",
"L-RNA LINKING",
"L-BETA-PEPTIDE, C-GAMMA LINKING",
"L-GAMMA-PEPTIDE, C-DELTA LINKING",
"L-PEPTIDE COOH CARBOXY TERMINUS",
"L-PEPTIDE NH3 AMINO TERMINUS",
//"L-PEPTIDE LINKING", // All canonical L AA
"L-SACCHARIDE",
"L-SACCHARIDE 1,4 AND 1,4 LINKING",
"L-SACCHARIDE 1,4 AND 1,6 LINKING",
"RNA OH 3 PRIME TERMINUS",
"RNA OH 5 PRIME TERMINUS",
"RNA LINKING",
"NON-POLYMER",
"OTHER",
//"PEPTIDE LINKING", // GLY
"PEPTIDE-LIKE",
"SACCHARIDE",
0 };
for (int i=0; hetatm_type[i]; ++i) {
if (strcmp(type,hetatm_type[i]) == 0) return true;
}
return false;
}
bool is_hetatm(const unsigned int chain_index,
const std::vector<Entity>& entity_list,
const GroupType& group_type) {
return ( is_hetatm(group_type.chemCompType.c_str())
|| !is_polymer(chain_index, entity_list));
}
// CLASS StructureData
inline StructureData::StructureData() {
// no need to do anything with strings and vectors
setDefaultValue(resolution);
setDefaultValue(rFree);
setDefaultValue(rWork);
// numXX set to 0 to have consistent data
numBonds = 0;
numAtoms = 0;
numGroups = 0;
numChains = 0;
numModels = 0;
// set version and producer
mmtfVersion = getVersionString();
mmtfProducer = "mmtf-cpp library (github.com/rcsb/mmtf-cpp)";
}
inline StructureData::StructureData(const StructureData& obj) {
copyAllData_(obj);
}
inline StructureData& StructureData::operator=(const StructureData& obj) {
if (this != &obj) copyAllData_(obj);
return *this;
}
inline bool StructureData::operator==(const StructureData& c) const {
return (
mmtfVersion == c.mmtfVersion &&
mmtfProducer == c.mmtfProducer &&
unitCell == c.unitCell &&
spaceGroup == c.spaceGroup &&
structureId == c.structureId &&
title == c.title &&
depositionDate == c.depositionDate &&
releaseDate == c.releaseDate &&
ncsOperatorList == c.ncsOperatorList &&
bioAssemblyList == c.bioAssemblyList &&
entityList == c.entityList &&
experimentalMethods == c.experimentalMethods &&
resolution == c.resolution &&
rFree == c.rFree &&
rWork == c.rWork &&
numBonds == c.numBonds &&
numAtoms == c.numAtoms &&
numGroups == c.numGroups &&
numChains == c.numChains &&
numModels == c.numModels &&
groupList == c.groupList &&
bondAtomList == c.bondAtomList &&
bondOrderList == c.bondOrderList &&
xCoordList == c.xCoordList &&
yCoordList == c.yCoordList &&
zCoordList == c.zCoordList &&
bFactorList == c.bFactorList &&
atomIdList == c.atomIdList &&
altLocList == c.altLocList &&
occupancyList == c.occupancyList &&
groupIdList == c.groupIdList &&
groupTypeList == c.groupTypeList &&
secStructList == c.secStructList &&
insCodeList == c.insCodeList &&
sequenceIndexList == c.sequenceIndexList &&
chainIdList == c.chainIdList &&
chainNameList == c.chainNameList &&
groupsPerChain == c.groupsPerChain &&
chainsPerModel == c.chainsPerModel &&
bondProperties == c.bondProperties &&
atomProperties == c.atomProperties &&
groupProperties == c.groupProperties &&
chainProperties == c.chainProperties &&
modelProperties == c.modelProperties &&
extraProperties == c.extraProperties);
}
inline bool StructureData::hasConsistentData(bool verbose, uint32_t chain_name_max_length) const {
std::vector<int8_t> allowed_bond_orders;
allowed_bond_orders.push_back(-1);
allowed_bond_orders.push_back(1);
allowed_bond_orders.push_back(2);
allowed_bond_orders.push_back(3);
allowed_bond_orders.push_back(4);
// check unitCell: if given, must be of length 6
if (!hasRightSizeOptional(unitCell, 6)) {
if (verbose) {
std::cout << "inconsistent unitCell (unitCell length != 6)" << std::endl;
}
return false;
}
// check dates
if (!isValidDateFormatOptional(depositionDate)) {
if (verbose) {
std::cout << "inconsistent depositionDate (does not match 'YYYY-MM-DD' "
"or empty)" << std::endl;
}
return false;
}
if (!isValidDateFormatOptional(releaseDate)) {
if (verbose) {
std::cout << "inconsistent releaseDate (does not match 'YYYY-MM-DD' "
"or empty)" << std::endl;
}
return false;
}
// check ncsOperatorList: all elements must have length 16
for (size_t i = 0; i < ncsOperatorList.size(); ++i) {
if ((int)ncsOperatorList[i].size() != 16) {
if (verbose) {
std::cout << "inconsistent ncsOperatorList idx: " << i << " found size: "
<< ncsOperatorList[i].size() << " != 16" << std::endl;
}
return false;
}
}
// check chain indices in bioAssembly-transforms and entities
for (size_t i = 0; i < bioAssemblyList.size(); ++i) {
const BioAssembly& ba = bioAssemblyList[i];
for (size_t j = 0; j < ba.transformList.size(); ++j) {
const Transform & t = ba.transformList[j];
if (!hasValidIndices(t.chainIndexList, numChains)) {
if (verbose) {
std::cout << "inconsistent BioAssemby transform i j: " << i
<< " " << j << std::endl;
}
return false;
}
}
}
for (size_t i = 0; i < entityList.size(); ++i) {
const Entity& ent = entityList[i];
if (!hasValidIndices(ent.chainIndexList, numChains)) {
if (verbose) {
std::cout << "inconsistent entity idx: " << i << std::endl;
}
return false;
}
}
// check groups
for (size_t i = 0; i < groupList.size(); ++i) {
const GroupType& g = groupList[i];
const size_t num_atoms = g.formalChargeList.size();
if (g.atomNameList.size() != num_atoms) {
if (verbose) {
std::cout << "inconsistent group::atomNameList size at idx: "
<< i << std::endl;
}
return false;
}
if (g.elementList.size() != num_atoms) {
if (verbose) {
std::cout << "inconsistent group::elementList size at idx: "
<< i << std::endl;
}
return false;
}
if (!isDefaultValue(g.bondOrderList)) {
if (g.bondAtomList.size() != g.bondOrderList.size() * 2) {
if (verbose) {
std::cout << "inconsistent group::bondAtomList size: " <<
g.bondAtomList.size() << " != group::bondOrderList size(*2): " <<
g.bondOrderList.size()*2 << " at idx: " << i << std::endl;
}
return false;
}
for (size_t j = 0; j < g.bondOrderList.size(); ++j) {
if (std::find(allowed_bond_orders.begin(), allowed_bond_orders.end(),
g.bondOrderList[j]) == allowed_bond_orders.end()) {
if (verbose) {
std::cout << "Cannot have bond order of: " << (int)g.bondOrderList[j]
<< " allowed bond orders are: -1, 1, 2, 3 or 4. at idx: " << j << std::endl;
}
return false;
}
}
}
if (!isDefaultValue(g.bondResonanceList)) {
if (isDefaultValue(g.bondOrderList) || isDefaultValue(g.bondAtomList)) {
if (verbose) {
std::cout << "Cannot have bondResonanceList without both " <<
"bondOrderList and bondAtomList! at idx: " << i << std::endl;
}
return false;
}
if (g.bondOrderList.size() != g.bondResonanceList.size()) {
if (verbose) {
std::cout << "inconsistent group::bondOrderSize size: " <<
g.bondOrderList.size() << " != group::bondResonanceList size: " <<
g.bondResonanceList.size() << " at idx: " << i << std::endl;
}
return false;
}
if (g.bondAtomList.size() != g.bondResonanceList.size() * 2) {
if (verbose) {
std::cout << "inconsistent group::bondAtomList size: " <<
g.bondAtomList.size() << " != group::bondResonanceList size(*2): " <<
g.bondResonanceList.size()*2 << " at idx: " << i << std::endl;
}
return false;
}
// Check bond resonance matches
for (size_t j = 0; j < g.bondResonanceList.size(); ++j) {
if (g.bondResonanceList[j] < -1 || g.bondResonanceList[j] > 1) {
if (verbose) {
std::cout << "group::bondResonanceList had a Resonance of: "
<< (int)g.bondResonanceList[j] << " and only -1, 0, or 1 are allowed"
<< std::endl;
}
return false;
}
if (g.bondOrderList[j] == -1 && g.bondResonanceList[j] != 1) {
if (verbose) {
std::cout << "group::bondResonanceList had a Resonance of: "
<< (int)g.bondResonanceList[j] << " and group::bondOrderList had an order of "
<< (int)g.bondOrderList[j] << " we require unknown bondOrders to have resonance"
<< std::endl;
}
return false;
}
}
}
if (!hasValidIndices(g.bondAtomList, num_atoms)) {
if (verbose) {
std::cout << "inconsistent group::bondAtomList indices (not all in [0, "
<< num_atoms - 1 << "]) at idx: " << i << std::endl;
}
return false;
}
}
// check global bonds
if (!isDefaultValue(bondOrderList)) {
if (bondAtomList.size() != bondOrderList.size() * 2) {
if (verbose) {
std::cout << "inconsistent bondAtomList size: " <<
bondAtomList.size() << " != bondOrderList size(*2): " <<
bondOrderList.size()*2 << std::endl;
}
return false;
}
for (size_t i = 0; i < bondOrderList.size(); ++i) {
if (std::find(allowed_bond_orders.begin(), allowed_bond_orders.end(),
bondOrderList[i]) == allowed_bond_orders.end()) {
if (verbose) {
std::cout << "Cannot have bond order of: " << (int)bondOrderList[i]
<< " allowed bond orders are: -1, 1, 2, 3 or 4. at idx: " << i << std::endl;
}
return false;
}
}
}
if (!isDefaultValue(bondResonanceList)) {
if (isDefaultValue(bondOrderList) || isDefaultValue(bondAtomList)) {
if (verbose) {
std::cout << "Cannot have bondResonanceList without both " <<
"bondOrderList and bondAtomList!" << std::endl;
}
return false;
}
if (bondAtomList.size() != bondResonanceList.size() * 2) {
if (verbose) {
std::cout << "inconsistent bondAtomList size: " <<
bondAtomList.size() << " != bondResonanceList size(*2): " <<
bondResonanceList.size()*2 << std::endl;
}
return false;
}
for (size_t i = 0; i < bondResonanceList.size(); ++i) {
if (bondResonanceList[i] < -1 || bondResonanceList[i] > 1) {
if (verbose) {
std::cout << "bondResonanceList had a Resonance of: "
<< (int)bondResonanceList[i] << " and only -1, 0, or 1 are allowed"
<< std::endl;
}
return false;
}
if (bondOrderList[i] == -1 && bondResonanceList[i] != 1) {
if (verbose) {
std::cout << "bondResonanceList had a Resonance of: "
<< (int)bondResonanceList[i] << " and bondOrderList had an order of "
<< (int)bondOrderList[i] << " we require unknown bondOrders to have resonance"
<< std::endl;
}
return false;
}
}
}
if (!hasValidIndices(bondAtomList, numAtoms)) {
if (verbose) {
std::cout << "inconsistent bondAtomList indices (not all in [0, "
<< numAtoms - 1 << "])" << std::endl;
}
return false;
}
// check vector sizes
if ((int)xCoordList.size() != numAtoms) {
if (verbose) {
std::cout << "inconsistent xCoordList size" << std::endl;
}
return false;
}
if ((int)yCoordList.size() != numAtoms) {
if (verbose) {
std::cout << "inconsistent yCoordList size" << std::endl;
}
return false;
}
if ((int)zCoordList.size() != numAtoms) {
if (verbose) {
std::cout << "inconsistent zCoordList size" << std::endl;
}
return false;
}
if (!hasRightSizeOptional(bFactorList, numAtoms)) {
if (verbose) {
std::cout << "inconsistent bFactorList size" << std::endl;
}
return false;
}
if (!hasRightSizeOptional(atomIdList, numAtoms)) {
if (verbose) {
std::cout << "inconsistent atomIdList size" << std::endl;
}
return false;
}
if (!hasRightSizeOptional(altLocList, numAtoms)) {
if (verbose) {
std::cout << "inconsistent altLocList size" << std::endl;
}
return false;
}
if (!hasRightSizeOptional(occupancyList, numAtoms)) {
if (verbose) {
std::cout << "inconsistent occupancyList size" << std::endl;
}
return false;
}
if ((int)groupIdList.size() != numGroups) {
if (verbose) {
std::cout << "inconsistent groupIdList size" << std::endl;
}
return false;
}
if ((int)groupTypeList.size() != numGroups) {
if (verbose) {
std::cout << "inconsistent groupTypeList size" << std::endl;
}
return false;
}
if (!hasRightSizeOptional(secStructList, numGroups)) {
if (verbose) {
std::cout << "inconsistent secStructList size" << std::endl;
}
return false;
}
if (!hasRightSizeOptional(insCodeList, numGroups)) {
if (verbose) {
std::cout << "inconsistent insCodeList size" << std::endl;
}
return false;
}
if (!hasRightSizeOptional(sequenceIndexList, numGroups)) {
if (verbose) {
std::cout << "inconsistent sequenceIndexList size" << std::endl;
}
return false;
}
if ((int)chainIdList.size() != numChains) {
if (verbose) {
std::cout << "inconsistent chainIdList size" << std::endl;
}
return false;
}
if (!hasRightSizeOptional(chainNameList, numChains)) {
if (verbose) {
std::cout << "inconsistent chainNameList size" << std::endl;
}
return false;
}
if ((int)groupsPerChain.size() != numChains) {
if (verbose) {
std::cout << "inconsistent groupsPerChain size" << std::endl;
}
return false;
}
if ((int)chainsPerModel.size() != numModels) {
if (verbose) {
std::cout << "inconsistent chainsPerModel size" << std::endl;
}
return false;
}
// check indices
if (!hasValidIndices(groupTypeList, groupList.size())) {
if (verbose) {
std::cout << "inconsistent groupTypeList indices (not all in [0, "
<< groupList.size() - 1 << "])" << std::endl;
}
return false;
}
// collect sequence lengths from entities and use to check
std::vector<int32_t> sequenceIndexSize(numChains);
for (size_t i = 0; i < entityList.size(); ++i) {
const Entity& ent = entityList[i];
for (size_t j = 0; j < ent.chainIndexList.size(); ++j) {
sequenceIndexSize[ent.chainIndexList[j]] = ent.sequence.length();
}
}
// traverse structure for more checks
int bond_count_from_atom = 0;
int bond_count_from_order = 0;
int bond_count_from_resonance = 0;
bool all_bond_orderLists_are_default = true;
bool all_bond_resonanceLists_are_default = true;
bool all_bond_atomLists_are_default = true;
if (!isDefaultValue(bondOrderList)) {
all_bond_orderLists_are_default = false;
bond_count_from_order = bondOrderList.size();
}
if (!isDefaultValue(bondResonanceList)) {
all_bond_resonanceLists_are_default = false;
bond_count_from_resonance = bondOrderList.size();
}
if (!isDefaultValue(bondAtomList)) {
all_bond_atomLists_are_default = false;
bond_count_from_atom = bondAtomList.size()/2;
}
int chain_idx = 0; // will be count at end of loop
int group_idx = 0; // will be count at end of loop
int atom_idx = 0; // will be count at end of loop
// traverse models
for (int model_idx = 0; model_idx < numModels; ++model_idx) {
// traverse chains
for (int j = 0; j < chainsPerModel[model_idx]; ++j, ++chain_idx) {
// check chain names (fixed length)
if (chainIdList[chain_idx].size() > chain_name_max_length) {
if (verbose) {
std::cout << "inconsistent chainIdList size at chain_idx: "
<< chain_idx << " size: "
<< chainIdList[chain_idx].size() << std::endl;
}
return false;
}
if ( !isDefaultValue(chainNameList)
&& chainNameList[chain_idx].size() > chain_name_max_length) {
if (verbose) {
std::cout << "inconsistent chainNameList size at chain_idx:"
<< chain_idx << " size: "
<< chainNameList[chain_idx].size() << std::endl;
}
return false;
}
// traverse groups
for (int k = 0; k < groupsPerChain[chain_idx]; ++k, ++group_idx) {
// check seq. idx
if (!isDefaultValue(sequenceIndexList)) {
const int32_t idx = sequenceIndexList[group_idx];
// -1 is ok here
if (idx < -1 || idx >= sequenceIndexSize[chain_idx]) {
if (verbose) {
std::cout << "inconsistent sequenceIndexSize at"
" chain_idx: " << chain_idx << std::endl;
}
return false;
}
}
// count atoms
const GroupType& group = groupList[groupTypeList[group_idx]];
atom_idx += group.atomNameList.size();
// count bonds
if (!isDefaultValue(group.bondOrderList)) {
all_bond_orderLists_are_default = false;
bond_count_from_order += group.bondOrderList.size();
}
if (!isDefaultValue(group.bondResonanceList)) {
all_bond_resonanceLists_are_default = false;
bond_count_from_resonance += group.bondResonanceList.size();
}
if (!isDefaultValue(group.bondAtomList)) {
all_bond_atomLists_are_default = false;
bond_count_from_atom += group.bondAtomList.size()/2;
}
}
}
}
// check sizes
if (!all_bond_orderLists_are_default) {
if (bond_count_from_order != numBonds) {
if (verbose) {
std::cout << "inconsistent numBonds vs bond order count" << std::endl;
}
return false;
}
}
if (!all_bond_resonanceLists_are_default) {
if (bond_count_from_resonance != numBonds) {
if (verbose) {
std::cout << "inconsistent numBonds vs bond resonance count" << std::endl;
}
return false;
}
}
if (!all_bond_atomLists_are_default) {
if (bond_count_from_atom != numBonds) {
if (verbose) {
std::cout << "inconsistent numBonds vs bond atom list count" << std::endl;
}
return false;
}
}
if (chain_idx != numChains) {
if (verbose) {
std::cout << "inconsistent numChains" << std::endl;
}
return false;
}
if (group_idx != numGroups) {
if (verbose) {
std::cout << "inconsistent numGroups size" << std::endl;
}
return false;
}
if (atom_idx != numAtoms) {
if (verbose) {
std::cout << "inconsistent numAtoms size" << std::endl;
}
return false;
}
// All looks good :)
return true;
}
inline std::string StructureData::print(std::string delim) const {
std::ostringstream out;
int modelIndex = 0;
int chainIndex = 0;
int groupIndex = 0;
int atomIndex = 0;
// traverse models
for (int i = 0; i < numModels; i++, modelIndex++) {
// traverse chains
for (int j = 0; j < chainsPerModel[modelIndex]; j++, chainIndex++) {
// traverse groups
for (int k = 0; k < groupsPerChain[chainIndex]; k++, groupIndex++) {
const mmtf::GroupType& group =
groupList[groupTypeList[groupIndex]];
int groupAtomCount = group.atomNameList.size();
for (int l = 0; l < groupAtomCount; l++, atomIndex++) {
// ATOM or HETATM
if (is_hetatm(chainIndex, entityList, group))
out << "HETATM" << delim;
else
out << "ATOM" << delim;
// Atom serial
if ( !mmtf::isDefaultValue(atomIdList) ) {
out << std::setfill('0') << std::internal << std::setw(6) <<
std::right << atomIdList[atomIndex] << delim;
} else out << "." << delim;
// Atom name
out << group.atomNameList[l] << delim;
// Alternate location
if ( !mmtf::isDefaultValue(altLocList) ) {
if ( altLocList[atomIndex] == ' ' ||
altLocList[atomIndex] == 0x00 )
out << "." << delim;
else out << altLocList[atomIndex] << delim;
} else out << "." << delim;
// Group name
out << group.groupName << delim;
// Chain
out << chainIdList[chainIndex] << delim;
if ( !mmtf::isDefaultValue(chainNameList) ) {
out << chainNameList[chainIndex];
out << delim;
} else out << "." << delim;
// Group serial
out << groupIdList[groupIndex] << delim;
// Insertion code
if ( !mmtf::isDefaultValue(insCodeList) ) {
if ( insCodeList[groupIndex] == ' ' ||
insCodeList[groupIndex] == 0x00 )
out << "." << delim;
else out << int(insCodeList[groupIndex]) << delim;
} else out << ". ";
// x, y, z
out << std::fixed << std::setprecision(3);
out << xCoordList[atomIndex] << delim;
out << yCoordList[atomIndex] << delim;
out << zCoordList[atomIndex] << delim;
// B-factor
if ( !mmtf::isDefaultValue(bFactorList) ) {
out << bFactorList[atomIndex] << delim;
} else out << "." << delim;
// Occupancy
if ( !mmtf::isDefaultValue(occupancyList) ) {
out << occupancyList[atomIndex] << delim;
} else out << "." << delim;
// Element
out << group.elementList[l] << delim;
// Charge
out << group.formalChargeList[l] << "\n";
}
}
}
}
return out.str();
}
inline void StructureData::copyMapData_(
std::map<std::string, msgpack::object>& target,
const std::map<std::string, msgpack::object>& source) {
target.clear();
std::map<std::string, msgpack::object>::const_iterator it;
for (it = source.begin(); it != source.end(); ++it) {
msgpack::object tmp_object(it->second, msgpack_zone);
target[it->first] = tmp_object;
}
}
inline void StructureData::copyAllData_(const StructureData& obj) {
mmtfVersion = obj.mmtfVersion;
mmtfProducer = obj.mmtfProducer;
unitCell = obj.unitCell;
spaceGroup = obj.spaceGroup;
structureId = obj.structureId;
title = obj.title;
depositionDate = obj.depositionDate;
releaseDate = obj.releaseDate;
ncsOperatorList = obj.ncsOperatorList;
bioAssemblyList = obj.bioAssemblyList;
entityList = obj.entityList;
experimentalMethods = obj.experimentalMethods;
resolution = obj.resolution;
rFree = obj.rFree;
rWork = obj.rWork;
numBonds = obj.numBonds;
numAtoms = obj.numAtoms;
numGroups = obj.numGroups;
numChains = obj.numChains;
numModels = obj.numModels;
groupList = obj.groupList;
bondAtomList = obj.bondAtomList;
bondOrderList = obj.bondOrderList;
xCoordList = obj.xCoordList;
yCoordList = obj.yCoordList;
zCoordList = obj.zCoordList;
bFactorList = obj.bFactorList;
atomIdList = obj.atomIdList;
altLocList = obj.altLocList;
occupancyList = obj.occupancyList;
groupIdList = obj.groupIdList;
groupTypeList = obj.groupTypeList;
secStructList = obj.secStructList;
insCodeList = obj.insCodeList;
sequenceIndexList = obj.sequenceIndexList;
chainIdList = obj.chainIdList;
chainNameList = obj.chainNameList;
groupsPerChain = obj.groupsPerChain;
chainsPerModel = obj.chainsPerModel;
copyMapData_(bondProperties, obj.bondProperties);
copyMapData_(atomProperties, obj.atomProperties);
copyMapData_(groupProperties, obj.groupProperties);
copyMapData_(chainProperties, obj.chainProperties);
copyMapData_(modelProperties, obj.modelProperties);
copyMapData_(extraProperties, obj.extraProperties);
}
} // mmtf namespace
#endif
cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
if(EMSCRIPTEN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -s TOTAL_MEMORY=150994944 -s DISABLE_EXCEPTION_CATCHING=0")
endif()
add_executable(mmtf_tests mmtf_tests.cpp)
target_compile_features(mmtf_tests PRIVATE cxx_auto_type)
if(WIN32)
target_link_libraries(mmtf_tests Catch msgpackc MMTFcpp ws2_32)
else()
target_link_libraries(mmtf_tests Catch msgpackc MMTFcpp)
endif()
# test for multi-linking
add_executable(multi_cpp_test multi_cpp_test.cpp multi_cpp_test_helper.cpp)
target_compile_features(multi_cpp_test PRIVATE cxx_auto_type)
if(WIN32)
target_link_libraries(multi_cpp_test MMTFcpp ws2_32)
else()
target_link_libraries(multi_cpp_test MMTFcpp)
endif()
set(TEST_RUNNER "none" CACHE STRING "External runner for the tests")
if (${TEST_RUNNER} STREQUAL "node")
add_test( NAME mmtf_tests COMMAND node mmtf_tests)
else()
add_test( NAME mmtf_tests WORKING_DIRECTORY ${CMAKE_BINARY_DIR} COMMAND mmtf_tests)
endif()
#ifdef __EMSCRIPTEN__
#define CATCH_INTERNAL_CONFIG_NO_POSIX_SIGNALS
#define CATCH_CONFIG_RUNNER
#else
#define CATCH_CONFIG_MAIN
#endif
#include "catch.hpp"
#include <mmtf.hpp>
#include <mmtf/export_helpers.hpp>
// NOTE!!! Margin is set to 0.00001
// If we ever get more specific data this may need to be altered!
template <typename T>
bool approx_equal_vector(const T& a, const T& b, float eps = 0.00001) {
if (a.size() != b.size()) return false;
for (std::size_t i=0; i < a.size(); ++i) {
if (a[i] != Approx(b[i]).margin(eps)) return false;
}
return true;
}
std::map<std::string, msgpack::object*>
msgpack_obj_to_map(const msgpack::object& obj) {
std::map<std::string, msgpack::object*> data_map;
msgpack::object_kv* current_key_value = obj.via.map.ptr;
msgpack::object_kv* last_key_value = current_key_value + obj.via.map.size;
for (; current_key_value != last_key_value; ++current_key_value) {
msgpack::object* key = &(current_key_value->key);
msgpack::object* value = &(current_key_value->val);
if (key->type == msgpack::type::STR) {
std::string data_map_key(key->via.str.ptr, key->via.str.size);
data_map[data_map_key] = value;
} else {
throw std::runtime_error("Error: Found non-string key!");
}
}
return data_map;
}
TEST_CASE("assignment operator") {
std::string working_mmtf = "../submodules/mmtf_spec/test-suite/mmtf/173D.mmtf";
mmtf::StructureData sd;
mmtf::decodeFromFile(sd, working_mmtf);
SECTION("basic assignment operator") {
mmtf::StructureData sd2;
sd2 = sd;
REQUIRE(sd2 == sd);
}
SECTION("deep assignment operator") {
mmtf::StructureData sd2;
std::vector<int32_t> clist;
for (int32_t i = 0; i < 256; ++i) {
clist.push_back(i);
}
sd.atomProperties["256l"] = msgpack::object(clist, sd.msgpack_zone);
sd2 = sd;
REQUIRE(sd2 == sd);
REQUIRE(sd2.atomProperties["256l"] == sd.atomProperties["256l"]);
}
SECTION("deep assignment operator") {
mmtf::StructureData sd2;
std::vector<int32_t> clist;
for (int32_t i = 0; i < 256; ++i) {
clist.push_back(i);
}
sd.atomProperties["256l"] = msgpack::object(clist, sd.msgpack_zone);
sd2 = sd;
clist.push_back(22);
sd.atomProperties["256l"] = msgpack::object(clist, sd.msgpack_zone);
REQUIRE(sd2 != sd);
REQUIRE(sd2.atomProperties["256l"] != sd.atomProperties["256l"]);
}
}
TEST_CASE("copy constructor") {
std::string working_mmtf = "../submodules/mmtf_spec/test-suite/mmtf/173D.mmtf";
mmtf::StructureData sd;
mmtf::decodeFromFile(sd, working_mmtf);
SECTION("Basic copy constructor") {
mmtf::StructureData sd2(sd);
REQUIRE(sd2 == sd);
}
SECTION("Check msgpack::object copying") {
std::vector<int32_t> clist;
for (int32_t i = 0; i < 256; ++i) {
clist.push_back(i);
}
sd.atomProperties["256l"] = msgpack::object(clist, sd.msgpack_zone);
mmtf::StructureData sd2(sd);
REQUIRE(sd2.atomProperties["256l"] == sd.atomProperties["256l"]);
}
}
// Tests for structure_data.hpp
TEST_CASE("Test round trip StructureData working") {
std::vector<std::string> works;
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/173D.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/1AA6.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/1AUY.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/1BNA.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/1CAG.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/1HTQ.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/1IGT.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/1L2Q.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/1LPV.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/1MSH.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/1O2F.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/1R9V.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/1SKM.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/3NJW.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/3ZYB.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/4CK4.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/4CUP.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/4OPJ.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/4P3R.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/4V5A.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/4Y60.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/5EMG.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/5ESW.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/empty-all0.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/empty-numChains1.mmtf");
works.push_back("../submodules/mmtf_spec/test-suite/mmtf/empty-numModels1.mmtf");
works.push_back("../temporary_test_data/all_canoncial.mmtf");
works.push_back("../temporary_test_data/1PEF_with_resonance.mmtf");
for (size_t i=0; i<works.size(); ++i) {
mmtf::StructureData sd1, sd2;
mmtf::decodeFromFile(sd1, works[i]);
mmtf::encodeToFile(sd1, "test_mmtf.mmtf");
mmtf::decodeFromFile(sd2, "test_mmtf.mmtf");
REQUIRE( sd1 == sd2 );
}
}
TEST_CASE("Test round trip StructureData not working - EncodeError") {
std::string basic = "../submodules/mmtf_spec/test-suite/mmtf/173D.mmtf";
mmtf::StructureData sd;
mmtf::decodeFromFile(sd, basic);
SECTION("Alter xCoordList") {
sd.xCoordList.push_back(0.334f);
REQUIRE_THROWS_AS(mmtf::encodeToFile(sd, "test_mmtf.mmtf"), mmtf::EncodeError);
}
SECTION("Alter yCoordList") {
sd.yCoordList.push_back(0.334f);
REQUIRE_THROWS_AS(mmtf::encodeToFile(sd, "test_mmtf.mmtf"), mmtf::EncodeError);
}
SECTION("Alter zCoordList") {
sd.zCoordList.push_back(0.334f);
REQUIRE_THROWS_AS(mmtf::encodeToFile(sd, "test_mmtf.mmtf"), mmtf::EncodeError);
}
SECTION("Alter bFactorList") {
sd.bFactorList.push_back(0.334f);
REQUIRE_THROWS_AS(mmtf::encodeToFile(sd, "test_mmtf.mmtf"), mmtf::EncodeError);
}
SECTION("Alter numAtoms") {
sd.numAtoms = 20;
REQUIRE_THROWS_AS(mmtf::encodeToFile(sd, "test_mmtf.mmtf"), mmtf::EncodeError);
}
SECTION("Alter chainIdList") {
sd.chainIdList.push_back("xsz");
REQUIRE_THROWS_AS(mmtf::encodeToFile(sd, "test_mmtf.mmtf"), mmtf::EncodeError);
}
SECTION("chainIdList one too large") {
sd.chainIdList[0] = "IheartMMTF";
REQUIRE_THROWS_AS(mmtf::encodeToFile(sd, "test_mmtf.mmtf"), mmtf::EncodeError);
}
SECTION("Alter single groupList entry") {
sd.groupList[0].formalChargeList.pop_back();
REQUIRE_THROWS_AS(mmtf::encodeToFile(sd, "test_mmtf.mmtf"), mmtf::EncodeError);
sd.groupList[0].atomNameList.pop_back();
REQUIRE_THROWS_AS(mmtf::encodeToFile(sd, "test_mmtf.mmtf"), mmtf::EncodeError);
sd.groupList[0].elementList.pop_back();
REQUIRE_THROWS_AS(mmtf::encodeToFile(sd, "test_mmtf.mmtf"), mmtf::EncodeError);
}
SECTION("Alter groupTypeList") {
sd.groupTypeList[0] = 100;
REQUIRE_THROWS_AS(mmtf::encodeToFile(sd, "test_mmtf.mmtf"), mmtf::EncodeError);
}
}
TEST_CASE("Test StructureData DecodeError") {
std::vector<std::string> doesnt_work;
doesnt_work.push_back("../submodules/mmtf_spec/test-suite/mmtf/empty-mmtfVersion99999999.mmtf");
for (size_t i=0; i<doesnt_work.size(); ++i) {
mmtf::StructureData sd;
REQUIRE_THROWS_AS(mmtf::decodeFromFile(sd, doesnt_work[i]), mmtf::DecodeError);
}
}
TEST_CASE("Test DeltaRecursiveFloat enc/dec") {
std::vector<char> encoded_data;
// h1
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x0a);
// h2
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x03);
// h3
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x03);
encoded_data.push_back(0xe8);
// data
encoded_data.push_back(0x7f);
encoded_data.push_back(0xff);
encoded_data.push_back(0x44);
encoded_data.push_back(0xab);
encoded_data.push_back(0x01);
encoded_data.push_back(0x8f);
encoded_data.push_back(0xff);
encoded_data.push_back(0xca);
msgpack::zone m_zone;
msgpack::object msgp_obj(encoded_data, m_zone);
mmtf::BinaryDecoder bd(msgp_obj, "a_test");
std::vector<float> decoded_input;
bd.decode(decoded_input);
std::vector<float> decoded_data;
decoded_data.push_back(50.346f);
decoded_data.push_back(50.745f);
decoded_data.push_back(50.691f);
std::vector<char> encoded_output
= mmtf::encodeDeltaRecursiveFloat(decoded_data, 1000);
REQUIRE(encoded_data == encoded_output);
REQUIRE(decoded_data.size() == decoded_input.size());
REQUIRE(approx_equal_vector(decoded_data, decoded_input));
}
TEST_CASE("Test RunLengthFloat enc/dec") {
std::vector<char> encoded_data;
// h1
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x09);
// h2
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x03);
// h3
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x64);
// data
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x64);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x03);
msgpack::zone m_zone;
msgpack::object msgp_obj(encoded_data, m_zone);
mmtf::BinaryDecoder bd(msgp_obj, "a_test");
std::vector<float> decoded_input;
bd.decode(decoded_input);
std::vector<float> decoded_data;
decoded_data.push_back(1.00f);
decoded_data.push_back(1.00f);
decoded_data.push_back(1.00f);
std::vector<char> encoded_output = mmtf::encodeRunLengthFloat(decoded_data, 100);
REQUIRE(encoded_data == encoded_output);
REQUIRE(decoded_data.size() == decoded_input.size());
REQUIRE(approx_equal_vector(decoded_data, decoded_input));
}
TEST_CASE("Test RunLengthDeltaInt enc/dec") {
std::vector<char> encoded_data;
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x08);
// h2
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x07);
// h3
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
// data
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x01);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x07);
msgpack::zone m_zone;
msgpack::object msgp_obj(encoded_data, m_zone);
mmtf::BinaryDecoder bd(msgp_obj, "a_test");
std::vector<int32_t> decoded_input;
bd.decode(decoded_input);
std::vector<int32_t> decoded_data;
decoded_data.push_back(1);
decoded_data.push_back(2);
decoded_data.push_back(3);
decoded_data.push_back(4);
decoded_data.push_back(5);
decoded_data.push_back(6);
decoded_data.push_back(7);
std::vector<char> encoded_output = mmtf::encodeRunLengthDeltaInt(decoded_data);
REQUIRE(encoded_data == encoded_output);
REQUIRE(decoded_data.size() == decoded_input.size());
REQUIRE(decoded_data == decoded_input);
}
TEST_CASE("Test RunLengthChar enc/dec") {
std::vector<char> encoded_data;
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x06);
// h2
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x04);
// h3
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
// data
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x41);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x04);
msgpack::zone m_zone;
msgpack::object msgp_obj(encoded_data, m_zone);
mmtf::BinaryDecoder bd(msgp_obj, "a_test");
std::vector<char> decoded_input;
bd.decode(decoded_input);
std::vector<char> decoded_data;
decoded_data.push_back('A');
decoded_data.push_back('A');
decoded_data.push_back('A');
decoded_data.push_back('A');
std::vector<char> encoded_output = mmtf::encodeRunLengthChar(decoded_data);
REQUIRE(encoded_data == encoded_output);
REQUIRE(decoded_data.size() == decoded_input.size());
REQUIRE(decoded_data == decoded_input);
}
TEST_CASE("Test RunLengthInt8 enc/dec") {
std::vector<char> encoded_data;
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x10);
// h2
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x04);
// h3
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
// data
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x02);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x04);
msgpack::zone m_zone;
msgpack::object msgp_obj(encoded_data, m_zone);
mmtf::BinaryDecoder bd(msgp_obj, "a_test");
std::vector<int8_t> decoded_input;
bd.decode(decoded_input);
std::vector<int8_t> decoded_data;
decoded_data.push_back(2);
decoded_data.push_back(2);
decoded_data.push_back(2);
decoded_data.push_back(2);
std::vector<char> encoded_output = mmtf::encodeRunLengthInt8(decoded_data);
REQUIRE(encoded_data == encoded_output);
REQUIRE(decoded_data.size() == decoded_input.size());
REQUIRE(decoded_data == decoded_input);
}
TEST_CASE("Test encodeStringVector enc/dec") {
std::vector<char> encoded_data;
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x05);
// h2
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x06);
// h3
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x04);
// data
encoded_data.push_back('B');
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back('A');
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back('C');
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back('A');
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back('A');
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back('A');
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
msgpack::zone m_zone;
msgpack::object msgp_obj(encoded_data, m_zone);
mmtf::BinaryDecoder bd(msgp_obj, "a_test");
std::vector<std::string> decoded_input;
bd.decode(decoded_input);
std::vector<std::string> decoded_data;
decoded_data.push_back("B");
decoded_data.push_back("A");
decoded_data.push_back("C");
decoded_data.push_back("A");
decoded_data.push_back("A");
decoded_data.push_back("A");
int chain_name_max_length = 4;
std::vector<char> encoded_output = mmtf::encodeStringVector(decoded_data, chain_name_max_length);
REQUIRE(encoded_data == encoded_output);
REQUIRE(decoded_data.size() == decoded_input.size());
REQUIRE(decoded_data == decoded_input);
}
TEST_CASE("Test int 8 enc/dec") {
std::vector<char> encoded_data;
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x02);
// h2
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x05);
// h3
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
// data
encoded_data.push_back(0x07);
encoded_data.push_back(0x06);
encoded_data.push_back(0x06);
encoded_data.push_back(0x07);
encoded_data.push_back(0x07);
msgpack::zone m_zone;
msgpack::object msgp_obj(encoded_data, m_zone);
mmtf::BinaryDecoder bd(msgp_obj, "a_test");
std::vector<int8_t> decoded_input;
bd.decode(decoded_input);
std::vector<int8_t> decoded_data;
decoded_data.push_back(7);
decoded_data.push_back(6);
decoded_data.push_back(6);
decoded_data.push_back(7);
decoded_data.push_back(7);
std::vector<char> encoded_output = mmtf::encodeInt8ToByte(decoded_data);
REQUIRE(encoded_data == encoded_output);
REQUIRE(decoded_data.size() == decoded_input.size());
REQUIRE(decoded_data == decoded_input);
}
TEST_CASE("Test FourByteInt enc/dec") {
std::vector<char> encoded_data;
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x04);
// h2
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x04);
// h3
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
// data
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x01);
encoded_data.push_back(0x00);
encoded_data.push_back(0x02);
encoded_data.push_back(0x00);
encoded_data.push_back(0x01);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x00);
encoded_data.push_back(0x02);
msgpack::zone m_zone;
msgpack::object msgp_obj(encoded_data, m_zone);
mmtf::BinaryDecoder bd(msgp_obj, "a_test");
std::vector<int32_t> decoded_input;
bd.decode(decoded_input);
std::vector<int32_t> decoded_data;
decoded_data.push_back(1);
decoded_data.push_back(131073);
decoded_data.push_back(0);
decoded_data.push_back(2);
std::vector<char> encoded_output = mmtf::encodeFourByteInt(decoded_data);
REQUIRE(encoded_data == encoded_output);
REQUIRE(decoded_data.size() == decoded_input.size());
REQUIRE(decoded_data == decoded_input);
}
TEST_CASE("Test bondOrderList vs bondAtomList") {
std::string working_mmtf = "../submodules/mmtf_spec/test-suite/mmtf/173D.mmtf";
mmtf::StructureData sd;
mmtf::decodeFromFile(sd, working_mmtf);
SECTION("Deleting all bondOrderLists") {
for (auto & group : sd.groupList) {
group.bondOrderList.clear();
}
sd.bondOrderList.clear();
REQUIRE(sd.hasConsistentData());
// we write files without those fields (tested below), can we roundtrip?
mmtf::StructureData sd2;
mmtf::encodeToFile(sd, "test_mmtf_nobondorder.mmtf");
mmtf::decodeFromFile(sd2, "test_mmtf_nobondorder.mmtf");
REQUIRE( sd == sd2 );
}
SECTION("altering group bondOrderLists") {
sd.groupList[0].bondOrderList.push_back(1);
REQUIRE_FALSE(sd.hasConsistentData());
}
SECTION("altering sd bondOrderLists") {
sd.bondOrderList.push_back(1);
REQUIRE_FALSE(sd.hasConsistentData());
}
SECTION("not ok to have bond orders without atom list in group") {
sd.groupList[0].bondAtomList.clear();
REQUIRE_FALSE(sd.hasConsistentData());
}
SECTION("not ok to have bond orders without atom list") {
sd.bondAtomList.clear();
REQUIRE_FALSE(sd.hasConsistentData());
}
}
TEST_CASE("test valid bonds") {
std::string working_mmtf = "../temporary_test_data/all_canoncial.mmtf";
mmtf::StructureData sd;
mmtf::decodeFromFile(sd, working_mmtf);
REQUIRE(sd.hasConsistentData());
SECTION("invalid bond numbers in group") {
int8_t original = sd.groupList[0].bondResonanceList[0];
sd.groupList[0].bondResonanceList[0] = -3;
REQUIRE_FALSE(sd.hasConsistentData());
sd.groupList[0].bondResonanceList[0] = 2;
REQUIRE_FALSE(sd.hasConsistentData());
sd.groupList[0].bondResonanceList[0] = original;
original = sd.groupList[0].bondOrderList[0];
sd.groupList[0].bondOrderList[0] = 0;
REQUIRE_FALSE(sd.hasConsistentData());
sd.groupList[0].bondOrderList[0] = 5;
REQUIRE_FALSE(sd.hasConsistentData());
sd.groupList[0].bondOrderList[0] = -1;
sd.groupList[0].bondResonanceList[0] = -1;
REQUIRE_FALSE(sd.hasConsistentData());
}
SECTION("invalid bond numbers") {
int8_t original = sd.bondResonanceList[0];
sd.bondResonanceList[0] = -3;
REQUIRE_FALSE(sd.hasConsistentData());
sd.bondResonanceList[0] = 2;
REQUIRE_FALSE(sd.hasConsistentData());
sd.bondResonanceList[0] = original;
original = sd.bondOrderList[0];
sd.bondOrderList[0] = 0;
REQUIRE_FALSE(sd.hasConsistentData());
sd.bondOrderList[0] = 5;
REQUIRE_FALSE(sd.hasConsistentData());
sd.bondOrderList[0] = -1;
sd.bondResonanceList[0] = -1;
REQUIRE_FALSE(sd.hasConsistentData());
}
SECTION("invalid bondResonanceList size in group") {
sd.groupList[0].bondResonanceList.push_back(1);
REQUIRE_FALSE(sd.hasConsistentData());
}
SECTION("invalid bondResonanceList size") {
sd.bondResonanceList.push_back(1);
REQUIRE_FALSE(sd.hasConsistentData());
}
SECTION("not ok to have bond resonances without order list in group") {
sd.groupList[0].bondOrderList.clear();
REQUIRE_FALSE(sd.hasConsistentData());
}
SECTION("not ok to have bond resonances without order list") {
sd.bondOrderList.clear();
REQUIRE_FALSE(sd.hasConsistentData());
}
}
TEST_CASE("test group export optional") {
std::string working_mmtf = "../temporary_test_data/all_canoncial.mmtf";
mmtf::StructureData sd;
mmtf::decodeFromFile(sd, working_mmtf);
REQUIRE(sd.hasConsistentData());
mmtf::GroupType & first_group(sd.groupList[0]);
SECTION("check that optional fields exist fist") {
msgpack::zone my_zone;
msgpack::object obj(first_group, my_zone);
std::map<std::string, msgpack::object*> my_map(msgpack_obj_to_map(obj));
REQUIRE(my_map.find("bondOrderList") != my_map.end());
REQUIRE(my_map.find("bondResonanceList") != my_map.end());
REQUIRE(my_map.find("bondAtomList") != my_map.end());
}
SECTION("removing group bondOrderLists") {
msgpack::zone my_zone;
first_group.bondOrderList = std::vector<int8_t>();
msgpack::object obj(first_group, my_zone);
std::map<std::string, msgpack::object*> my_map(msgpack_obj_to_map(obj));
REQUIRE(my_map.find("bondOrderList") == my_map.end());
}
SECTION("altering group bondResonanceLists") {
msgpack::zone my_zone;
first_group.bondResonanceList = std::vector<int8_t>();
msgpack::object obj(first_group, my_zone);
std::map<std::string, msgpack::object*> my_map(msgpack_obj_to_map(obj));
REQUIRE(my_map.find("bondResonanceList") == my_map.end());
}
SECTION("altering group bondAtomLists") {
msgpack::zone my_zone;
first_group.bondAtomList = std::vector<int32_t>();
msgpack::object obj(first_group, my_zone);
std::map<std::string, msgpack::object*> my_map(msgpack_obj_to_map(obj));
REQUIRE(my_map.find("bondAtomList") == my_map.end());
}
}
// Mainly a compiler check, not useful to actually test
void
map_const_sd_helper(mmtf::StructureData const & sd) {
std::map< std::string, std::string > map_str_str_out, map_str_str_in;
map_str_str_out["test"] = "tset";
const mmtf::MapDecoder extraProperties_MD(sd.extraProperties);
extraProperties_MD.decode("map_str_str", true, map_str_str_in);
REQUIRE(map_str_str_in == map_str_str_out);
}
TEST_CASE("mapDecoder types") {
std::string working_mmtf = "../submodules/mmtf_spec/test-suite/mmtf/173D.mmtf";
mmtf::StructureData sd;
mmtf::decodeFromFile(sd, working_mmtf);
std::map< std::string, std::string > map_str_str_out, map_str_str_in;
map_str_str_out["test"] = "tset";
sd.extraProperties["map_str_str"] = msgpack::object(map_str_str_out, sd.msgpack_zone);
// Also check to make sure const works
const mmtf::MapDecoder extraProperties_MD(sd.extraProperties);
extraProperties_MD.decode("map_str_str", true, map_str_str_in);
REQUIRE(map_str_str_in == map_str_str_out);
map_const_sd_helper(sd);
}
// test/example of how to use extra data
TEST_CASE("atomProperties field") {
std::string working_mmtf = "../submodules/mmtf_spec/test-suite/mmtf/173D.mmtf";
mmtf::StructureData sd, sd2;
mmtf::decodeFromFile(sd, working_mmtf);
/// Pack
// 1. Make your input data
std::vector<int32_t> clist_out;
std::vector<int32_t> clist_in, clist_in_decoded, nothing;
for (int32_t i = 0; i < sd.numAtoms; ++i) {
clist_out.push_back(i % 256);
}
// 2. msgpack zones have weird lifetimes, make sure you use the zone
// in StructureData to avoid any weird errors
sd.atomProperties["256_atomColorList"]
= msgpack::object(clist_out, sd.msgpack_zone);
sd.atomProperties["256_atomColorList_encoded"]
= msgpack::object(mmtf::encodeRunLengthDeltaInt(clist_out), sd.msgpack_zone);
mmtf::encodeToFile(sd, "test_atomProperties.mmtf");
/// Done Pack
/// Start Unpack
mmtf::decodeFromFile(sd2, "test_atomProperties.mmtf");
// Retrieve our 256 color list via convert
mmtf::MapDecoder atomProperties_MD(sd2.atomProperties);
atomProperties_MD.decode("256_atomColorList", true, clist_in);
atomProperties_MD.decode("256_atomColorList_encoded", true, clist_in_decoded);
/// Done Unpack
REQUIRE(clist_out == clist_in);
REQUIRE(clist_in == clist_in_decoded);
// you would catch mmtf::DecodeError if you wanted continue even after using
// required=true.
REQUIRE_THROWS_AS(atomProperties_MD.decode("NONEXISTANT", true, nothing),
mmtf::DecodeError);
}
// simple helper adding vector (numbers 0 to num_items-1) of given length both
// encoded (key "data_encoded") and non-encoded (key "data") to destination_map
void add_extra_data(std::map<std::string, msgpack::object>& destination_map,
int32_t num_items, mmtf::StructureData& sd) {
std::vector<int32_t> v;
for (int32_t i = 0; i < num_items; ++i) {
v.push_back(i);
}
destination_map["data"] = msgpack::object(v, sd.msgpack_zone);
destination_map["data_encoded"]
= msgpack::object(mmtf::encodeRunLengthDeltaInt(v), sd.msgpack_zone);
}
// check data stored above
void check_extra_data(const std::map<std::string, msgpack::object>& sd_map,
int32_t num_items) {
std::vector<int32_t> v;
for (int32_t i = 0; i < num_items; ++i) {
v.push_back(i);
}
std::vector<int32_t> v_in, v_in_decoded, nothing;
mmtf::MapDecoder map_decoder(sd_map);
map_decoder.decode("data", true, v_in);
map_decoder.decode("data_encoded", true, v_in_decoded);
REQUIRE(v_in == v);
REQUIRE(v_in_decoded == v);
REQUIRE_THROWS_AS(map_decoder.decode("NONEXISTANT", true, nothing),
mmtf::DecodeError);
}
// test round trips with all extra data set
TEST_CASE("extra data fields") {
// Randomly chosen, small example MMTF
std::string working_mmtf = "../submodules/mmtf_spec/test-suite/mmtf/173D.mmtf";
mmtf::StructureData sd, sd2, sd3;
mmtf::decodeFromFile(sd, working_mmtf);
// Add extra data
add_extra_data(sd.bondProperties, sd.numBonds, sd);
add_extra_data(sd.atomProperties, sd.numAtoms, sd);
add_extra_data(sd.groupProperties, sd.numGroups, sd);
add_extra_data(sd.chainProperties, sd.numChains, sd);
add_extra_data(sd.modelProperties, sd.numModels, sd);
add_extra_data(sd.extraProperties, 42, sd);
// Encode/decode
mmtf::encodeToFile(sd, "test_extra_data.mmtf");
mmtf::decodeFromFile(sd2, "test_extra_data.mmtf");
REQUIRE(sd == sd2);
// Check round-trip after reading extra data
// (i.e. we can safely read/write unknown extra data)
mmtf::encodeToFile(sd2, "test_extra_data2.mmtf");
mmtf::decodeFromFile(sd3, "test_extra_data2.mmtf");
REQUIRE(sd2 == sd3);
// Check data
check_extra_data(sd.bondProperties, sd.numBonds);
check_extra_data(sd.atomProperties, sd.numAtoms);
check_extra_data(sd.groupProperties, sd.numGroups);
check_extra_data(sd.chainProperties, sd.numChains);
check_extra_data(sd.modelProperties, sd.numModels);
check_extra_data(sd.extraProperties, 42);
}
TEST_CASE("Test export_helpers") {
std::string working_mmtf = "../submodules/mmtf_spec/test-suite/mmtf/3NJW.mmtf";
mmtf::StructureData sd;
mmtf::decodeFromFile(sd, working_mmtf);
size_t numbonds_ref = sd.numBonds;
std::vector<int32_t> bonddata;
// test requirement: groupTypeList has duplicates
REQUIRE(sd.groupTypeList.size() != sd.groupList.size());
// must throw with duplicates
REQUIRE_THROWS_AS(mmtf::BondAdder(sd), mmtf::EncodeError);
// make groupTypeList non-duplicate
for (size_t i = 1; i < sd.groupTypeList.size(); ++i) {
for (size_t j = 0; j < i; ++j) {
if (sd.groupTypeList[i] == sd.groupTypeList[j]) {
sd.groupTypeList[i] = sd.groupList.size();
sd.groupList.push_back(sd.groupList[sd.groupTypeList[j]]);
break;
}
}
}
// test assert: groupTypeList has no duplicates anymore
REQUIRE(sd.groupTypeList.size() == sd.groupList.size());
// remove group bonds
size_t offset = 0;
for (auto groupType : sd.groupTypeList) {
auto & group = sd.groupList[groupType];
for (size_t i = 0; i < group.bondOrderList.size(); ++i) {
bonddata.push_back(group.bondAtomList[i * 2] + offset);
bonddata.push_back(group.bondAtomList[i * 2 + 1] + offset);
bonddata.push_back(group.bondOrderList[i]);
--sd.numBonds;
}
offset += group.atomNameList.size();
group.bondAtomList.clear();
group.bondOrderList.clear();
}
// remove global bonds
for (size_t i = 0; i < sd.bondOrderList.size(); ++i) {
bonddata.push_back(sd.bondAtomList[i * 2]);
bonddata.push_back(sd.bondAtomList[i * 2 + 1]);
bonddata.push_back(sd.bondOrderList[i]);
--sd.numBonds;
}
sd.bondAtomList.clear();
sd.bondOrderList.clear();
// test assert: all bonds have been transferred to `bonddata`
REQUIRE(bonddata.size() == numbonds_ref * 3);
REQUIRE(sd.numBonds == 0);
REQUIRE(sd.hasConsistentData());
// re-add bonds
mmtf::BondAdder bondadder(sd);
for (size_t i = 0; i < bonddata.size(); i += 3) {
REQUIRE(bondadder(bonddata[i], bonddata[i + 1], bonddata[i + 2]));
}
REQUIRE(sd.numBonds == numbonds_ref);
REQUIRE(sd.hasConsistentData());
// re-compress groupTypeList
mmtf::compressGroupList(sd);
REQUIRE(sd.groupTypeList.size() != sd.groupList.size());
REQUIRE(sd.hasConsistentData());
// compare with original data
mmtf::StructureData sd_ref;
mmtf::decodeFromFile(sd_ref, working_mmtf);
REQUIRE(sd_ref.bondAtomList == sd.bondAtomList);
REQUIRE(sd_ref.bondOrderList == sd.bondOrderList);
REQUIRE(sd_ref.groupTypeList == sd.groupTypeList);
REQUIRE(sd_ref.groupList == sd.groupList);
}
TEST_CASE("Test mapdecoder from raw mmtf") {
std::string working_mmtf = "../submodules/mmtf_spec/test-suite/mmtf/173D.mmtf";
mmtf::MapDecoder md;
mmtf::mapDecoderFromFile(md, working_mmtf);
std::vector<int> bonds;
REQUIRE_NOTHROW(md.decode("bondAtomList", true, bonds));
std::ifstream ifs(working_mmtf.c_str(), std::ifstream::in | std::ios::binary);
REQUIRE_NOTHROW(mmtf::mapDecoderFromStream(md, ifs));
REQUIRE_NOTHROW(md.decode("bondAtomList", true, bonds));
}
TEST_CASE("Test various encode and decode options") {
// fetch reference data
std::string working_mmtf = "../submodules/mmtf_spec/test-suite/mmtf/173D.mmtf";
mmtf::StructureData sd_ref;
mmtf::decodeFromFile(sd_ref, working_mmtf);
// write into buffer
std::ostringstream buffer;
mmtf::encodeToStream(sd_ref, buffer);
std::string const buffer_str(buffer.str());
SECTION("decodeFromBuffer") {
mmtf::StructureData sd;
mmtf::decodeFromBuffer(sd, buffer_str.data(), buffer_str.size());
REQUIRE(sd_ref == sd);
}
SECTION("decodeFromStream") {
mmtf::StructureData sd;
std::istringstream ibuffer(buffer_str);
mmtf::decodeFromStream(sd, ibuffer);
REQUIRE(sd_ref == sd);
// note: as of PR#31 (July 2019), we could also reread the same buffer
}
SECTION("mapDecoderFromBuffer") {
mmtf::MapDecoder md;
mmtf::mapDecoderFromBuffer(md, buffer_str.data(), buffer_str.size());
std::vector<int> bondAtomList;
REQUIRE_NOTHROW(md.decode("bondAtomList", true, bondAtomList));
REQUIRE(sd_ref.bondAtomList == bondAtomList);
mmtf::StructureData sd;
mmtf::decodeFromMapDecoder(sd, md);
REQUIRE(sd_ref == sd);
}
SECTION("mapDecoderFromStream") {
mmtf::MapDecoder md;
std::istringstream ibuffer(buffer_str);
mmtf::mapDecoderFromStream(md, ibuffer);
std::vector<int> bondAtomList;
REQUIRE_NOTHROW(md.decode("bondAtomList", true, bondAtomList));
REQUIRE(sd_ref.bondAtomList == bondAtomList);
mmtf::StructureData sd;
mmtf::decodeFromMapDecoder(sd, md);
REQUIRE(sd_ref == sd);
}
}
TEST_CASE("Test is_hetatm (chain_index version)") {
std::string working_mmtf = "../temporary_test_data/3zqs.mmtf";
mmtf::StructureData sd;
mmtf::decodeFromFile(sd, working_mmtf);
SECTION("CHECK CHAINS") {
int modelIndex = 0;
int chainIndex = 0;
for (int i = 0; i < sd.numModels; i++, modelIndex++) {
for (int j = 0; j < sd.chainsPerModel[modelIndex]; j++, chainIndex++) {
// chain indices 0 and 1 belong to a polymer entity
// all others should be marked as hetatm
if (chainIndex < 2) {
REQUIRE(is_polymer(chainIndex, sd.entityList));
} else {
REQUIRE_FALSE(is_polymer(chainIndex, sd.entityList));
}
}
}
}
SECTION("CHECK WITH GROUP") {
// chain indices 0 and 1 belong to polymer entity 0
std::string expected_sequence = sd.entityList[0].sequence;
int modelIndex = 0;
int chainIndex = 0;
int groupIndex = 0;
for (int i = 0; i < sd.numModels; i++, modelIndex++) {
for (int j = 0; j < sd.chainsPerModel[modelIndex]; j++, chainIndex++) {
std::string found_seq = "";
for (int k = 0; k < sd.groupsPerChain[chainIndex]; k++, groupIndex++) {
const mmtf::GroupType& group =
sd.groupList[sd.groupTypeList[groupIndex]];
bool hetatm = is_hetatm(chainIndex, sd.entityList, group);
if (chainIndex < 2) {
if (!hetatm) found_seq += group.singleLetterCode;
} else {
REQUIRE(hetatm);
}
}
if (chainIndex < 2) REQUIRE(expected_sequence == found_seq);
}
}
}
SECTION("throw check") {
REQUIRE_THROWS_AS(is_polymer(999, sd.entityList), mmtf::DecodeError);
}
}
#ifdef __EMSCRIPTEN__
#include <emscripten.h>
int main(int argc, char* argv[]) {
// Give node.js an access to the root filesystem
EM_ASM(
FS.mkdir('root');
FS.mount(NODEFS, { root: '/' }, 'root');
FS.chdir('root/' + process.cwd() + '/../');
);
return Catch::Session().run(argc, argv);
}
#endif
// vi:noexpandtab:sw=4:ts=4
// *************************************************************************
//
// Simple test for compilation issues.
//
// *************************************************************************
#include <mmtf.hpp>
#include "multi_cpp_test_helper.hpp"
int main(int argc, char** argv) {
// decode MMTF file
std::string filename = "../submodules/mmtf_spec/test-suite/mmtf/173D.mmtf";
mmtf::StructureData data;
read_check_file(data, filename);
return 0;
}
// *************************************************************************
//
// Simple test for compilation issues.
//
// *************************************************************************
#include "multi_cpp_test_helper.hpp"
#include <iostream>
void read_check_file(mmtf::StructureData& data, const std::string& filename) {
// decode MMTF file
mmtf::decodeFromFile(data, filename);
// check if the data is self-consistent
if (data.hasConsistentData()) {
std::cout << "Successfully read " << filename << ".\n";
} else {
std::cout << "Inconsistent data in " << filename << ".\n";
}
}
// *************************************************************************
//
// Simple test for compilation issues.
//
// *************************************************************************
#include <mmtf.hpp>
#include <string>
void read_check_file(mmtf::StructureData& data, const std::string& filename);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment