Commit 39fc5608 by Maarten L. Hekkelman

documentation backup

parent e2fca07f
......@@ -17,6 +17,7 @@ configure_file(${DOXYFILE_IN} ${DOXYFILE_OUT} @ONLY)
file(MAKE_DIRECTORY ${DOXYGEN_OUTPUT_DIR}) #Doxygen won't create this for us
add_custom_command(OUTPUT ${DOXYGEN_INDEX_FILE}
BYPRODUCTS ${DOXYGEN_OUTPUT_DIR}/xml
DEPENDS ${CIFPP_PUBLIC_HEADERS} ${DOXYFILE_OUT}
COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYFILE_OUT}
MAIN_DEPENDENCY ${DOXYFILE_OUT} ${DOXYFILE_IN}
......@@ -35,6 +36,7 @@ add_custom_target(Sphinx ALL
-Dbreathe_projects.${PROJECT_NAME}=${DOXYGEN_OUTPUT_DIR}/xml
${SPHINX_SOURCE} ${SPHINX_BUILD}
DEPENDS ${DOXYGEN_INDEX_FILE}
BYPRODUCTS ${CMAKE_CURRENT_SOURCE_DIR}/api
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Generating documentation with Sphinx")
......
......@@ -3,7 +3,7 @@ FILE_PATTERNS = *.hpp
STRIP_FROM_PATH = @DOXYGEN_INPUT_DIR@
RECURSIVE = YES
GENERATE_XML = YES
PREDEFINED += and=&& or=|| not=! CIFPP_EXPORT=""
PREDEFINED += and=&& or=|| not=! CIFPP_EXPORT=
GENERATE_HTML = NO
GENERATE_TODOLIST = NO
INPUT = @DOXYGEN_INPUT_DIR@
Introduction
============
Information on 3D structures of proteins originally came formatted in [PDB](http://www.wwpdb.org/documentation/file-format-content/format33/v3.3.html) files. Although the specification for this format had some real restrictions like a mandatory HEADER and CRYST line, many programs implemented this very poorly often writing out only ATOM records. And users became used to this.
The PDB format has some severe limitations rendering it useless for all but very small protein structures. A new format called [mmCIF](https://mmcif.wwpdb.org/) has been around for decades and now is the default format for the Protein Data Bank.
The software developed in the [PDB-REDO](https://pdb-redo.eu/) project aims at improving 3D models based on original experimental data. For this, the tools need to be able to work with both PDB and mmCIF files. A decision was made to make mmCIF leading internally in all programs and convert PDB directly into mmCIF before processing the data. A robust conversion had to be developed to make this possible since, as noted above, files can come with more or less information making it sometimes needed to do a sequence alignment to find out the exact residue numbers.
And so libcif++ came to life, a library to work with mmCIF files. Work on this library started early 2017 and has developed quite a bit since then. To reduce dependency on other libraries, some functionality was added that is not strictly related to reading and writing mmCIF files but may be useful nonetheless. This is mostly code that is used in 3D calculations and symmetry operations.
Design
------
The main part of the library is a set of classes that work with mmCIF files. They are:
* :cpp:class:`cif::file`
* :cpp:class:`cif::datablock`
* :cpp:class:`cif::category`
The :cpp:class:`cif::file` class encapsulates, you guessed it, the contents of a mmCIF file. In such a file there are one or more :cpp:class:`cif::datablock`s and each datablock contains one or more :cpp:class:`cif::category`s.
.. toctree::
:maxdepth: 2
:caption: Contents
......
......@@ -24,7 +24,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/// \file atom_type.hpp cif++/atom_type.hpp
/// \file atom_type.hpp
/// This file contains information about all known elements
......@@ -293,6 +293,7 @@ class atom_type_traits
/// \brief Return the radius for a charged version of this atom, returns the effective radius by default
///
/// \param charge The charge of the ion
/// \param type The requested ion radius type
/// \return The radius of the ion
float ionic_radius(int charge, ionic_radius_type type = ionic_radius_type::effective) const
{
......@@ -314,7 +315,7 @@ class atom_type_traits
///
/// The coefficients from Waasmaier & Kirfel (1995), Acta Cryst. A51, 416-431.
///
/// @param charge The charge for which the structure values should be returned, use @ref cif::atom_type_traits::kWSKFVal to return the Cval and Siva values
/// @param charge The charge for which the structure values should be returned, use kWSKFVal to return the *Cval* and *Siva* values
/// @return The scattering factors as a @ref SFData struct
const SFData &wksf(int charge = 0) const;
......
......@@ -26,7 +26,7 @@
#pragma once
/// \file category.hpp cif++/category.hpp
/// \file category.hpp
/// Documentation for the cif::category class
///
/// The category class should meet the requirements of Container and
......@@ -634,6 +634,7 @@ class category
/// @brief Return the value for column @a column for the first row that matches condition @a cond
/// @tparam The type of the value to return
/// @param cond The condition to search for
/// @param column The column for which the value should be returned
/// @return The value found or a default constructed value if not found
template <typename T>
T find_first(condition &&cond, const char *column) const
......@@ -646,6 +647,7 @@ class category
/// @tparam The type of the value to return
/// @param pos The location to start searching
/// @param cond The condition to search for
/// @param column The column for which the value should be returned
/// @return The value found or a default constructed value if not found
template <typename T>
T find_first(const_iterator pos, condition &&cond, const char *column) const
......@@ -658,6 +660,7 @@ class category
/// @brief Return a tuple containing the values for the columns @a columns for the first row that matches condition @a cond
/// @tparam The types of the values to return
/// @param cond The condition to search for
/// @param columns The columns for which the values should be returned
/// @return The values found or default constructed values if not found
template <typename... Ts, typename... Cs, typename U = std::enable_if_t<sizeof...(Ts) != 1>>
std::tuple<Ts...> find_first(condition &&cond, Cs... columns) const
......@@ -672,6 +675,7 @@ class category
/// @tparam The types of the values to return
/// @param pos The location to start searching
/// @param cond The condition to search for
/// @param columns The columns for which the values should be returned
/// @return The values found or default constructed values if not found
template <typename... Ts, typename... Cs, typename U = std::enable_if_t<sizeof...(Ts) != 1>>
std::tuple<Ts...> find_first(const_iterator pos, condition &&cond, Cs... columns) const
......@@ -735,7 +739,6 @@ class category
/// @brief Return the maximum value for column @a column for all rows
/// @tparam The type of the value to return
/// @param column The column to use for the value
/// @param cond The condition to search for
/// @return The value found or the maximum value for the type
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
T find_min(const char *column) const
......
......@@ -26,14 +26,6 @@
#pragma once
/// \file compound.hpp cif++/compound.hpp
/// This file contains the definition for the class compound, encapsulating
/// the information found for compounds in the CCD.
///
/// The data is loaded by default from a file called `components.cif`. This file
/// is located using @ref cif::load_resource which searches the default directories
/// See documentation on @ref cif::load_resource for more information
#include "cif++/atom_type.hpp"
#include "cif++/datablock.hpp"
#include "cif++/exports.hpp"
......@@ -45,6 +37,18 @@
#include <tuple>
#include <vector>
/// \file compound.hpp
/// This file contains the definition for the class compound, encapsulating
/// the information found for compounds in the CCD.
///
/// The data is loaded by default from a file called `components.cif`. This file
/// is located using load_resource. (See documentation on cif::load_resource for more information)
///
/// But if the CCP4 environment is available at runtime, the compound information
/// may also be generated from the CCP4 monomer library.
///
/// Note that the information in CCP4 and CCD is not equal.
namespace cif
{
......@@ -54,7 +58,7 @@ class compound;
struct compound_atom;
class compound_factory_impl;
/// \brief The bond type as defined in the CCD, possible values taken from the mmcif_pdbx file
/// \brief The bond type or bond order as defined in the CCD, possible values taken from the mmcif_pdbx file
enum class bond_type
{
sing, ///< single bond
......@@ -73,20 +77,40 @@ std::string to_string(bond_type bondType);
/// @brief return the @ref bond_type for the string representation @a bondType
bond_type from_string(const std::string &bondType);
/// \brief The possible stereo config values for a compound_atom.
///
/// As the site https://psiberg.com/r-s-nomenclature/ states:
///
/// > RS nomenclature is currently the preferred system for assigning absolute
/// > configuration to chiral molecules. The letters R and S come from the Latin
/// > words ‘Rectus‘ and ‘Sinister‘ meaning ‘right’ and ‘left’. Molecules that
/// > rotate the plane of polarized light to right are referred to as ‘R isomers’
/// > and the molecules that rotate the plane of polarized light to left are
/// > referred to ‘S isomers’.
enum class stereo_config_type : uint8_t
{
N = 'N',
R = 'R',
S = 'S'
};
/// --------------------------------------------------------------------
/// \brief struct containing information about an atom in a chemical compound.
/// This is a subset of the available information. Contact the author if you need more fields.
struct compound_atom
{
std::string id;
atom_type type_symbol;
int charge = 0;
bool aromatic = false;
bool leaving_atom = false;
bool stereo_config = false;
float x, y, z;
std::string id; ///< Identifier for each atom in the chemical component
atom_type type_symbol; ///< The element type for each atom in the chemical component.
int charge = 0; ///< The formal charge assigned to each atom in the chemical component.
bool aromatic = false; ///< Defines atoms in an aromatic moiety
bool leaving_atom = false; ///< Flags atoms with "leaving" capability
stereo_config_type stereo_config = stereo_config_type::N; ///< Defines the stereochemical configuration of the chiral center atom.
float x, ///< The x component of the coordinates for each atom specified as orthogonal angstroms.
y, ///< The y component of the coordinates for each atom specified as orthogonal angstroms.
z; ///< The z component of the coordinates for each atom specified as orthogonal angstroms.
/// Return the location of the atom as a point
point get_location() const
{
return { x, y, z };
......@@ -98,9 +122,10 @@ struct compound_atom
struct compound_bond
{
std::string atom_id[2];
bond_type type;
bool aromatic = false, stereo_config = false;
std::string atom_id[2]; ///< The ID's of the two atoms that define the bond.
bond_type type; ///< The bond order of the chemical bond associated with the specified atoms.
bool aromatic = false, ///< Defines aromatic bonds.
stereo_config = false; ///< Defines stereochemical bonds.
};
/// --------------------------------------------------------------------
......@@ -116,23 +141,26 @@ class compound
public:
// accessors
std::string id() const { return m_id; }
std::string name() const { return m_name; }
std::string type() const { return m_type; }
std::string id() const { return m_id; } ///< Return the alphanumeric code for the chemical component.
std::string name() const { return m_name; } ///< Return the name of the chemical component.
std::string type() const { return m_type; } ///< Return the type of monomer.
std::string formula() const { return m_formula; } ///< Return the chemical formula of the chemical component.
float formula_weight() const { return m_formula_weight; } ///< Return the formula mass of the chemical component in Daltons.
int formal_charge() const { return m_formal_charge; } ///< Return the formal charge on the chemical component.
/// The group record is only available in CCP4 monomer library files.
/// For CCD entries this value will always contain 'non-polymer'
std::string group() const { return m_group; }
std::string formula() const { return m_formula; }
float formula_weight() const { return m_formula_weight; }
int formal_charge() const { return m_formal_charge; }
const std::vector<compound_atom> &atoms() const { return m_atoms; }
const std::vector<compound_bond> &bonds() const { return m_bonds; }
const std::vector<compound_atom> &atoms() const { return m_atoms; } ///< Return the list of atoms for this compound
const std::vector<compound_bond> &bonds() const { return m_bonds; } ///< Return the list of bonds for this compound
compound_atom get_atom_by_atom_id(const std::string &atom_id) const;
compound_atom get_atom_by_atom_id(const std::string &atom_id) const; ///< Return the atom with id @a atom_id
bool atoms_bonded(const std::string &atomId_1, const std::string &atomId_2) const;
float bond_length(const std::string &atomId_1, const std::string &atomId_2) const;
bool atoms_bonded(const std::string &atomId_1, const std::string &atomId_2) const; ///< Return true if @a atomId_1 is bonded to @a atomId_2
float bond_length(const std::string &atomId_1, const std::string &atomId_2) const; ///< Return the bond length between @a atomId_1 and @a atomId_2
bool is_water() const
bool is_water() const ///< Return if the compound is actually a water
{
return m_id == "HOH" or m_id == "H2O" or m_id == "WAT";
}
......@@ -159,6 +187,8 @@ class compound
// --------------------------------------------------------------------
// Factory class for compound and Link objects
/// Use the compound_factory singleton instance to create compound objects
class compound_factory
{
public:
......@@ -170,14 +200,28 @@ class compound_factory
/// flag to true.
static void init(bool useThreadLocalInstanceOnly);
/// Return the singleton instance. If initialized with local threads, this is the
/// instance for the current thread.
static compound_factory &instance();
/// Delete and reset the singleton instance. If initialized with local threads, this is the
/// instance for the current thread.
static void clear();
/// Set the default dictionary file to @a inDictFile
void set_default_dictionary(const std::filesystem::path &inDictFile);
/// Override any previously loaded dictionary with @a inDictFile
void push_dictionary(const std::filesystem::path &inDictFile);
/// Remove the last pushed dictionary
void pop_dictionary();
/// Return whether @a res_name is a valid and known peptide
bool is_known_peptide(const std::string &res_name) const;
/// Return whether @a res_name is a valid and known base
bool is_known_base(const std::string &res_name) const;
/// \brief Create the compound object for \a id
......@@ -190,7 +234,8 @@ class compound_factory
~compound_factory();
static CIFPP_EXPORT const std::map<std::string, char> kAAMap, kBaseMap;
CIFPP_EXPORT static const std::map<std::string, char> kAAMap, ///< Globally accessible static list of the default amino acids
kBaseMap; ///< Globally accessible static list of the default bases
private:
compound_factory();
......
......@@ -29,6 +29,8 @@
#include "cif++/category.hpp"
#include "cif++/forward_decl.hpp"
/// \file datablock.hpp
namespace cif
{
......
......@@ -32,6 +32,10 @@
#include "cif++/datablock.hpp"
#include "cif++/parser.hpp"
/// \file file.hpp
/// The file class defined here encapsulates the contents of an mmCIF file
/// It is mainly a list of @ref cif::datablock objects
namespace cif
{
......
......@@ -27,8 +27,11 @@
#pragma once
#include "cif++/atom_type.hpp"
#include "cif++/datablock.hpp"
#include "cif++/point.hpp"
#include <numeric>
#include <memory>
#if __cpp_lib_format
#include <format>
......@@ -794,6 +797,7 @@ class structure
///
/// \param asym_id The asym ID
/// \param seq_id The sequence ID
/// \param auth_seq_id The auth sequence ID
void remove_residue(const std::string &asym_id, int seq_id, const std::string &auth_seq_id);
/// \brief Create a new non-polymer entity, returns new ID
......
......@@ -40,6 +40,10 @@
#include <clipper/core/coords.h>
#endif
/// \file point.hpp
/// This file contains the definition for *cif::point* as well as
/// lots of routines and classes that can manipulate points.
namespace cif
{
......
......@@ -188,6 +188,9 @@ struct iless
}
};
/// iset is a std::set of std::string but with a comparator that
/// ignores character case.
using iset = std::set<std::string, iless>;
// --------------------------------------------------------------------
......
......@@ -49,6 +49,10 @@
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING 1
#endif
/// \file utilities.hpp
/// This file contains code that is very generic in nature like a progress_bar
/// The cif namespace
namespace cif
{
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment