Commit 0855965e by Maarten L. Hekkelman

Documenting more

Fixed colouring output manipulators
parent fe3cbdab
......@@ -243,7 +243,6 @@ set(project_sources
${PROJECT_SOURCE_DIR}/src/pdb/pdb_record.hpp
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.hpp
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.cpp
${PROJECT_SOURCE_DIR}/src/pdb/tls.cpp
)
set(project_headers
......@@ -268,6 +267,8 @@ set(project_headers
${PROJECT_SOURCE_DIR}/include/cif++/model.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/cif2pdb.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/io.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif.hpp
......
Introduction
============
Information on 3D structures of proteins originally came formatted in [PDB](http://www.wwpdb.org/documentation/file-format-content/format33/v3.3.html) files. Although the specification for this format had some real restrictions like a mandatory HEADER and CRYST line, many programs implemented this very poorly often writing out only ATOM records. And users became used to this.
Information on 3D structures of proteins originally came formatted in `PDB <http://www.wwpdb.org/documentation/file-format-content/format33/v3.3.html>`_ files. Although the specification for this format had some real restrictions like a mandatory HEADER and CRYST line, many programs implemented this very poorly often writing out only ATOM records. And users became used to this.
The PDB format has some severe limitations rendering it useless for all but very small protein structures. A new format called [mmCIF](https://mmcif.wwpdb.org/) has been around for decades and now is the default format for the Protein Data Bank.
The PDB format has some severe limitations rendering it useless for all but very small protein structures. A new format called `mmCIF <https://mmcif.wwpdb.org/>`_ has been around for decades and now is the default format for the Protein Data Bank.
The software developed in the [PDB-REDO](https://pdb-redo.eu/) project aims at improving 3D models based on original experimental data. For this, the tools need to be able to work with both PDB and mmCIF files. A decision was made to make mmCIF leading internally in all programs and convert PDB directly into mmCIF before processing the data. A robust conversion had to be developed to make this possible since, as noted above, files can come with more or less information making it sometimes needed to do a sequence alignment to find out the exact residue numbers.
The software developed in the `PDB-REDO <https://pdb-redo.eu/>`_ project aims at improving 3D models based on original experimental data. For this, the tools need to be able to work with both PDB and mmCIF files. A decision was made to make mmCIF leading internally in all programs and convert PDB directly into mmCIF before processing the data. A robust conversion had to be developed to make this possible since, as noted above, files can come with more or less information making it sometimes needed to do a sequence alignment to find out the exact residue numbers.
And so libcif++ came to life, a library to work with mmCIF files. Work on this library started early 2017 and has developed quite a bit since then. To reduce dependency on other libraries, some functionality was added that is not strictly related to reading and writing mmCIF files but may be useful nonetheless. This is mostly code that is used in 3D calculations and symmetry operations.
......
......@@ -37,5 +37,5 @@
#include "cif++/model.hpp"
#include "cif++/pdb/io.hpp"
#include "cif++/pdb.hpp"
#include "cif++/gzio.hpp"
\ No newline at end of file
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2023 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/file.hpp"
/**
* @file pdb.hpp
*
* This file presents the API to read and write files in the
* legacy and ancient PDB format.
*
* The code works on the basis of best effort since it is
* impossible to have correct round trip fidelity.
*
*/
namespace cif::pdb
{
/// --------------------------------------------------------------------
// PDB to mmCIF
/** @brief Read a file in either mmCIF or PDB format from file @a file,
* compressed or not, depending on the content.
*/
file read(const std::filesystem::path &file);
/** @brief Read a file in either mmCIF or PDB format from std::istream @a is,
* compressed or not, depending on the content.
*/
file read(std::istream &is);
/**
* @brief Read a file in legacy PDB format from std::istream @a is and
* put the data into @a cifFile
*/
file read_pdb_file(std::istream &pdbFile);
// mmCIF to PDB
/** @brief Write out the data in @a db in legacy PDB format
* to std::ostream @a os
*/
void write(std::ostream &os, const datablock &db);
/** @brief Write out the data in @a f in legacy PDB format
* to std::ostream @a os
*/
inline void write(std::ostream &os, const file &f)
{
write(os, f.front());
}
/** @brief Write out the data in @a db to file @a file
* in legacy PDB format or mmCIF format, depending on the
* filename extension.
*
* If extension of @a file is *.gz* the resulting file will
* be written in gzip compressed format.
*/
void write(const std::filesystem::path &file, const datablock &db);
/** @brief Write out the data in @a f to file @a file
* in legacy PDB format or mmCIF format, depending on the
* filename extension.
*
* If extension of @a file is *.gz* the resulting file will
* be written in gzip compressed format.
*/
inline void write(const std::filesystem::path &p, const file &f)
{
write(p, f.front());
}
// --------------------------------------------------------------------
// Other I/O related routines
/** @brief Return the HEADER line for the data in @a data
*
* The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program.
*
* @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned
*/
std::string get_HEADER_line(const datablock &data, std::string::size_type truncate_at = 127);
/** @brief Return the COMPND line for the data in @a data
*
* The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program.
*
* @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned
*/
std::string get_COMPND_line(const datablock &data, std::string::size_type truncate_at = 127);
/** @brief Return the SOURCE line for the data in @a data
*
* The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program.
*
* @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned
*/
std::string get_SOURCE_line(const datablock &data, std::string::size_type truncate_at = 127);
/** @brief Return the AUTHOR line for the data in @a data
*
* The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program.
*
* @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned
*/
std::string get_AUTHOR_line(const datablock &data, std::string::size_type truncate_at = 127);
} // namespace pdbx
......@@ -26,19 +26,8 @@
#pragma once
#include "cif++/datablock.hpp"
/// \file cif2pdb.hpp
/// \deprecated This file is no longer used. Please use "cif++/pdb.hpp" instead
namespace cif::pdb
{
/// \brief Just the HEADER, COMPND, SOURCE and AUTHOR lines
void write_header_lines(std::ostream &os, const datablock &data);
std::string get_HEADER_line(const datablock &data, std::string::size_type truncate_at = 127);
std::string get_COMPND_line(const datablock &data, std::string::size_type truncate_at = 127);
std::string get_SOURCE_line(const datablock &data, std::string::size_type truncate_at = 127);
std::string get_AUTHOR_line(const datablock &data, std::string::size_type truncate_at = 127);
#warning "Use of this file is deprecated, please use "cif++/pdb.hpp"
} // namespace pdbx
......@@ -26,37 +26,7 @@
#pragma once
#include "cif++/datablock.hpp"
/// \file io.hpp
/// \deprecated This file is no longer used. Please use "cif++/pdb.hpp" instead
namespace cif::pdb
{
/// \brief Read a file in either mmCIF or PDB format, compressed or not,
/// depending on the content.
file read(const std::filesystem::path &file);
/// \brief Read a file in either mmCIF or PDB format, compressed or not,
/// depending on the content.
file read(std::istream &is);
/// \brief Write out a file in PDB format
void write(std::ostream &os, const datablock &db);
/// \brief Write out a file in PDB format
inline void write(std::ostream &os, const file &f)
{
write(os, f.front());
}
/// \brief Write out a file in PDB format or mmCIF format, depending on the filename extension
void write(const std::filesystem::path &file, const datablock &db);
/// \brief Write out a file in PDB format or mmCIF format, depending on the filename extension
inline void write(const std::filesystem::path &p, const file &f)
{
write(p, f.front());
}
}
\ No newline at end of file
#warning "Use of this file is deprecated, please use "cif++/pdb.hpp"
\ No newline at end of file
......@@ -26,13 +26,7 @@
#pragma once
#include "cif++/file.hpp"
/// \file pdb2cif.hpp
/// \deprecated This file is no longer used. Please use "cif++/pdb.hpp" instead
namespace cif::pdb
{
void ReadPDBFile(std::istream &pdbFile, file &cifFile);
} // namespace cif::pdb
\ No newline at end of file
#warning "Use of this file is deprecated, please use "cif++/pdb.hpp"
\ No newline at end of file
......@@ -26,28 +26,7 @@
#pragma once
#include "cif++/datablock.hpp"
#include <string>
#include <tuple>
#include <vector>
/// \file tls.hpp
/// \deprecated This code has been moved to libpdb-redo
namespace cif
{
struct tls_selection;
struct tls_residue;
struct tls_selection
{
virtual ~tls_selection() {}
virtual void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, std::size_t indentLevel = 0) const = 0;
std::vector<std::tuple<std::string, int, int>> get_ranges(cif::datablock &db, bool pdbNamespace) const;
};
// Low level: get the selections
std::unique_ptr<tls_selection> parse_tls_selection_details(const std::string &program, const std::string &selection);
} // namespace cif
#warning "This code has been moved to libpdb-redo"
......@@ -57,11 +57,19 @@
/** \file utilities.hpp
*
* This file contains code that is very generic in nature like a progress_bar
* and classes you can use to colourise output text.
*/
namespace cif
{
/**
* @brief The global variable VERBOSE contains the level of verbosity
* requested. A value of 0 is normal, with some output on error conditions.
* A value > 0 will result in more output, the higher the value, the more
* output. A value < 0 will make the library silent, even in error
* conditions.
*/
extern CIFPP_EXPORT int VERBOSE;
/// return the git 'build' number
......@@ -69,61 +77,75 @@ std::string get_version_nr();
// --------------------------------------------------------------------
/// Return the width of the current output terminal, or the default 80 in case output is not to a terminal
uint32_t get_terminal_width();
// --------------------------------------------------------------------
/// Return the path of the current executable
std::string get_executable_path();
// --------------------------------------------------------------------
// some manipulators to write coloured text to terminals
/// @brief The defined string colours
enum class StringColour
{
BLACK = 0,
RED,
GREEN,
YELLOW,
BLUE,
MAGENTA,
CYAN,
WHITE,
NONE = 9
};
template<StringColour C>
struct ColourDefinition
{
static constexpr StringColour value = C;
};
enum class TextStyle
{
REGULAR = 22,
BOLD = 1
};
template<TextStyle S>
struct StyleDefinition
{
static constexpr TextStyle value = S;
};
/**
* When writing out text to the terminal it is often useful to have
* some of the text colourised. But only if the output is really a
* terminal since colouring text is done using escape sequences
* an if output is redirected to a file, these escape sequences end up
* in the file making the real text less easy to read.
*
* The code presented here is rather basic. It mimics the std::quoted
* manipulator in that it will colour a string with optionally
* requested colours and text style.
*
* Example:
*
* @code {.cpp}
* using namespace cif::colour;
* std::cout << cif::coloured("Hello, world!", white, red, bold) << '\n';
* @endcode
*
*/
template<typename ForeColour, typename BackColour, typename Style>
struct ColourAndStyle
namespace colour
{
static constexpr StringColour fore_colour = ForeColour::value;
static constexpr StringColour back_colour = BackColour::value;
static constexpr TextStyle text_style = Style::value;
/// @brief The defined colours
enum colour_type
{
black = 0,
red,
green,
yellow,
blue,
magenta,
cyan,
white,
none = 9
};
enum style_type
{
bold = 1,
underlined = 4,
blink = 5,
inverse = 7,
regular = 22,
};
namespace detail
{
/**
* @brief Struct for delimited strings.
*/
template <typename StringType>
struct coloured_string_t
{
static_assert(std::is_reference_v<StringType> or std::is_pointer_v<StringType>,
"String type must be pointer or reference");
coloured_string_t(StringType s, colour_type fc, colour_type bc, style_type st)
: m_str(s)
, m_fore_colour(static_cast<int>(fc) + 30)
, m_back_colour(static_cast<int>(bc) + 40)
, m_style(static_cast<int>(st))
{
}
static constexpr int fore_colour_number = static_cast<int>(fore_colour) + 30;
static constexpr int back_colour_number = static_cast<int>(back_colour) + 40;
static constexpr int style_number = static_cast<int>(text_style);
coloured_string_t &operator=(coloured_string_t &) = delete;
friend std::ostream &operator<<(std::ostream &os, ColourAndStyle clr)
template <typename char_type, typename traits_type>
friend std::basic_ostream<char_type, traits_type> &operator<<(
std::basic_ostream<char_type, traits_type> &os, const coloured_string_t &cs)
{
bool use_colour = false;
......@@ -134,44 +156,59 @@ struct ColourAndStyle
if (use_colour)
{
if (fore_colour == StringColour::NONE and back_colour == StringColour::NONE)
os << "\033[0m";
else
os << "\033[" << fore_colour_number << ';' << style_number << ';' << back_colour_number << 'm';
os << "\033[" << cs.m_fore_colour << ';' << cs.m_style << ';' << cs.m_back_colour << 'm'
<< cs.m_str
<< "\033[0m";
}
return os;
}
};
template<typename ForeColour, typename BackColour>
constexpr auto coloured(const ForeColour fore, const BackColour back)
StringType m_str;
int m_fore_colour, m_back_colour;
int m_style;
};
} // namespace detail
} // namespace colour
/**
* @brief Manipulator for coloured strings.
* @param str String to quote.
* @param fg Foreground (=text) colour to use
* @param bg Background colour to use
* @param st Text style to use
*/
template <typename char_type>
inline auto coloured(const char_type *str,
colour::colour_type fg, colour::colour_type bg = colour::colour_type::none,
colour::style_type st = colour::style_type::regular)
{
return ColourAndStyle<ForeColour, BackColour, StyleDefinition<TextStyle::REGULAR>>{};
return colour::detail::coloured_string_t<const char_type *>(str, fg, bg, st);
}
template<typename ForeColour, typename BackColour, typename Style>
constexpr auto coloured(const ForeColour fore, const BackColour back, Style style)
template <typename char_type, typename traits_type, typename allocator_type>
inline auto coloured(const std::basic_string<char_type, traits_type, allocator_type> &str,
colour::colour_type fg, colour::colour_type bg = colour::colour_type::none,
colour::style_type st = colour::style_type::regular)
{
return ColourAndStyle<ForeColour, BackColour, Style>{};
return colour::detail::coloured_string_t<const std::basic_string<char_type, traits_type, allocator_type> &>(str, fg, bg, st);
}
namespace colour
template <typename char_type, typename traits_type, typename allocator_type>
inline auto coloured(std::basic_string<char_type, traits_type, allocator_type> &str,
colour::colour_type fg, colour::colour_type bg = colour::colour_type::none,
colour::style_type st = colour::style_type::regular)
{
return colour::detail::coloured_string_t<std::basic_string<char_type, traits_type, allocator_type> &>(str, fg, bg, st);
}
template <typename char_type, typename traits_type>
inline auto coloured(std::basic_string_view<char_type, traits_type> &str,
colour::colour_type fg, colour::colour_type bg = colour::colour_type::none,
colour::style_type st = colour::style_type::regular)
{
constexpr ColourDefinition<StringColour::BLACK> black = ColourDefinition<StringColour::BLACK>();
constexpr ColourDefinition<StringColour::RED> red = ColourDefinition<StringColour::RED>();
constexpr ColourDefinition<StringColour::GREEN> green = ColourDefinition<StringColour::GREEN>();
constexpr ColourDefinition<StringColour::YELLOW> yellow = ColourDefinition<StringColour::YELLOW>();
constexpr ColourDefinition<StringColour::BLUE> blue = ColourDefinition<StringColour::BLUE>();
constexpr ColourDefinition<StringColour::MAGENTA> magenta = ColourDefinition<StringColour::MAGENTA>();
constexpr ColourDefinition<StringColour::CYAN> cyan = ColourDefinition<StringColour::CYAN>();
constexpr ColourDefinition<StringColour::WHITE> white = ColourDefinition<StringColour::WHITE>();
constexpr ColourDefinition<StringColour::NONE> none = ColourDefinition<StringColour::NONE>();
constexpr StyleDefinition<TextStyle::REGULAR> regular = StyleDefinition<TextStyle::REGULAR>();
constexpr StyleDefinition<TextStyle::BOLD> bold = StyleDefinition<TextStyle::BOLD>();
constexpr auto reset = cif::coloured(none, none, regular);
return colour::detail::coloured_string_t<std::basic_string_view<char_type, traits_type> &>(str, fg, bg, st);
}
// --------------------------------------------------------------------
......
......@@ -25,8 +25,6 @@
*/
#include "cif++.hpp"
#include "cif++/pdb/cif2pdb.hpp"
#include "cif++/gzio.hpp"
#include <cmath>
#include <deque>
......
......@@ -26,9 +26,7 @@
#include "pdb2cif_remark_3.hpp"
#include <cif++.hpp>
#include <cif++/pdb/pdb2cif.hpp>
#include <cif++/gzio.hpp>
#include "cif++.hpp"
#include <iomanip>
#include <map>
......@@ -39,10 +37,8 @@ using cif::category;
using cif::datablock;
using cif::iequals;
using cif::key;
// using cif::row;
using cif::to_lower;
using cif::to_lower_copy;
// using cif::compound_factory;
// --------------------------------------------------------------------
// attempt to come up with better error handling
......@@ -6054,7 +6050,7 @@ int PDBFileParser::PDBChain::AlignResToSeqRes()
// C++ is getting closer to Pascal :-)
auto printAlignment = [&tb, highX, highY, &rx, &ry, this]()
{
std::cerr << std::string(cif::get_terminal_width(), '-') << std::endl
std::cerr << std::string(22, '-') << std::endl
<< "Alignment for chain " << mDbref.chainID << std::endl
<< std::endl;
std::vector<std::pair<std::string, std::string>> alignment;
......@@ -6186,7 +6182,7 @@ bool PDBFileParser::PDBChain::SameSequence(const PDBChain &rhs) const
// --------------------------------------------------------------------
void ReadPDBFile(std::istream &pdbFile, cif::file &cifFile)
void read_pdb_file(std::istream &pdbFile, cif::file &cifFile)
{
PDBFileParser p;
......@@ -6214,7 +6210,7 @@ file read(std::istream &is)
// is 'H'. It is as simple as that.
if (ch == 'h' or ch == 'H')
ReadPDBFile(is, result);
read_pdb_file(is, result);
else
{
try
......
......@@ -1060,7 +1060,7 @@ bool Remark3Parser::match(const char *expr, int nextState)
{
using namespace colour;
std::cerr << coloured(white, red, bold) << "No match:" << reset << " '" << expr << '\'' << std::endl;
std::cerr << coloured("No match:", white, red, bold) << " '" << expr << '\'' << std::endl;
}
return result;
......@@ -1124,7 +1124,7 @@ float Remark3Parser::parse()
{
using namespace colour;
std::cerr << coloured(white, red, bold) << "Dropping line:" << reset << " '" << mLine << '\'' << std::endl;
std::cerr << coloured("Dropping line:", white, red, bold) << " '" << mLine << '\'' << std::endl;
}
++dropped;
......
/*
Created by: Maarten L. Hekkelman
Date: dinsdag 07 november, 2017
Copyright 2017 NKI AVL
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
// #include <sys/ioctl.h>
// #include <termios.h>
#include "cif++.hpp"
#include "cif++/pdb/tls.hpp"
#include <iomanip>
#include <iostream>
namespace cif
{
const int
kResidueNrWildcard = std::numeric_limits<int>::min(),
kNoSeqNum = std::numeric_limits<int>::max() - 1;
// --------------------------------------------------------------------
// We parse selection statements and create a selection expression tree
// which is then interpreted by setting the selected flag for the
// residues. After that, the selected ranges are collected and printed.
struct tls_residue
{
std::string chainID;
int seqNr = 0;
char iCode;
std::string name;
bool selected;
std::string asymID;
int seqID = 0;
bool operator==(const tls_residue &rhs) const
{
return chainID == rhs.chainID and
seqNr == rhs.seqNr and
iCode == rhs.iCode and
iequals(name, rhs.name) and
selected == rhs.selected;
}
};
void dump_selection(const std::vector<tls_residue> &selected, size_t indentLevel)
{
std::string indent(indentLevel * 2, ' ');
auto i = selected.begin();
bool first = true;
// First print in PDB space
while (i != selected.end())
{
auto b = find_if(i, selected.end(), [](auto s) -> bool
{ return s.selected; });
if (b == selected.end())
break;
if (first)
std::cout << indent << "PDB:" << std::endl;
first = false;
auto e = find_if(b, selected.end(), [b](auto s) -> bool
{ return s.chainID != b->chainID or not s.selected; });
std::cout << indent << " >> " << b->chainID << ' ' << b->seqNr << ':' << (e - 1)->seqNr << std::endl;
i = e;
}
// Then in mmCIF space
if (not first)
std::cout << indent << "mmCIF:" << std::endl;
i = selected.begin();
while (i != selected.end())
{
auto b = find_if(i, selected.end(), [](auto s) -> bool
{ return s.selected; });
if (b == selected.end())
break;
auto e = find_if(b, selected.end(), [b](auto s) -> bool
{ return s.asymID != b->asymID or not s.selected; });
std::string asymID = b->asymID;
int from = b->seqID, to = from;
for (auto j = b + 1; j != e; ++j)
{
if (j->seqID == to + 1)
to = j->seqID;
else if (j->seqID != to) // probably an insertion code
{
if (from == kNoSeqNum or to == kNoSeqNum)
std::cout << indent << " >> " << asymID << std::endl;
else
std::cout << indent << " >> " << asymID << ' ' << from << ':' << to << std::endl;
asymID = b->asymID;
from = to = b->seqID;
}
}
if (from == kNoSeqNum or to == kNoSeqNum)
std::cout << indent << " >> " << asymID << std::endl;
else
std::cout << indent << " >> " << asymID << ' ' << from << ':' << to << std::endl;
i = e;
}
if (first)
{
using namespace colour;
std::cout << indent << coloured(white, red, bold) << "Empty selection" << reset << std::endl;
}
}
std::vector<std::tuple<std::string, int, int>> tls_selection::get_ranges(cif::datablock &db, bool pdbNamespace) const
{
std::vector<tls_residue> selected;
// Collect the residues from poly seq scheme...
for (auto r : db["pdbx_poly_seq_scheme"])
{
std::string chain, seqNr, iCode, name;
std::string asymID;
int seqID = 0;
if (pdbNamespace)
cif::tie(chain, seqNr, iCode, name, asymID, seqID) = r.get("pdb_strand_id", "pdb_seq_num", "pdb_ins_code", "pdb_mon_id", "asym_id", "seq_id");
else
{
cif::tie(chain, seqNr, name) = r.get("asym_id", "seq_id", "mon_id");
asymID = chain;
seqID = stoi(seqNr);
}
if (seqNr.empty())
continue;
if (iCode.length() > 1)
throw std::runtime_error("invalid iCode");
selected.push_back({ chain, stoi(seqNr), iCode[0], name, false, asymID, seqID });
}
// ... those from the nonpoly scheme
for (auto r : db["pdbx_nonpoly_scheme"])
{
std::string chain, seqNr, iCode, name, asymID;
if (pdbNamespace)
{
cif::tie(chain, seqNr, iCode, name, asymID) = r.get("pdb_strand_id", "pdb_seq_num", "pdb_ins_code", "pdb_mon_id", "asym_id");
if (seqNr.empty())
continue;
}
else
{
cif::tie(chain, name) = r.get("asym_id", "mon_id");
asymID = chain;
seqNr = "0";
}
if (iequals(name, "HOH") or iequals(name, "H2O"))
continue;
if (iCode.length() > 1)
throw std::runtime_error("invalid iCode");
selected.push_back({ chain, stoi(seqNr), iCode[0], name, false, asymID, kNoSeqNum });
}
// ... those from the nonpoly scheme
for (auto r : db["pdbx_branch_scheme"])
{
std::string chain, seqNr, iCode, name, asymID;
if (pdbNamespace)
{
cif::tie(chain, seqNr, iCode, name, asymID) = r.get("auth_asym_id", "pdb_seq_num", "pdb_ins_code", "pdb_mon_id", "asym_id");
if (seqNr.empty())
continue;
}
else
{
cif::tie(chain, name) = r.get("asym_id", "mon_id");
asymID = chain;
seqNr = "0";
}
if (iCode.length() > 1)
throw std::runtime_error("invalid iCode");
selected.push_back({ chain, stoi(seqNr), iCode[0], name, false, asymID, kNoSeqNum });
}
// selected might consist of multiple ranges
// output per chain
stable_sort(selected.begin(), selected.end(), [](auto &a, auto &b) -> bool
{
int d = a.chainID.compare(b.chainID);
if (d == 0)
d = a.seqNr - b.seqNr;
return d < 0; });
collect_residues(db, selected);
std::vector<std::tuple<std::string, int, int>> result;
if (pdbNamespace)
{
auto i = selected.begin();
while (i != selected.end())
{
auto b = find_if(i, selected.end(), [](auto s) -> bool
{ return s.selected; });
if (b == selected.end())
break;
auto e = find_if(b, selected.end(), [b](auto s) -> bool
{ return s.chainID != b->chainID or not s.selected; });
// return ranges with strict increasing sequence numbers.
// So when there's a gap in the sequence we split the range.
// Beware of iCodes though
result.push_back(std::make_tuple(b->chainID, b->seqNr, b->seqNr));
for (auto j = b + 1; j != e; ++j)
{
if (j->seqNr == std::get<2>(result.back()) + 1)
std::get<2>(result.back()) = j->seqNr;
else if (j->seqNr != std::get<2>(result.back())) // probably an insertion code
result.push_back(std::make_tuple(b->chainID, j->seqNr, j->seqNr));
}
i = e;
}
}
else
{
auto i = selected.begin();
while (i != selected.end())
{
auto b = find_if(i, selected.end(), [](auto s) -> bool
{ return s.selected; });
if (b == selected.end())
break;
auto e = find_if(b, selected.end(), [b](auto s) -> bool
{ return s.asymID != b->asymID or not s.selected; });
// return ranges with strict increasing sequence numbers.
// So when there's a gap in the sequence we split the range.
// Beware of iCodes though
result.push_back(std::make_tuple(b->asymID, b->seqID, b->seqID));
for (auto j = b + 1; j != e; ++j)
{
if (j->seqID == std::get<2>(result.back()) + 1)
std::get<2>(result.back()) = j->seqID;
else if (j->seqID != std::get<2>(result.back())) // probably an insertion code
result.push_back(std::make_tuple(b->asymID, j->seqID, j->seqID));
}
i = e;
}
}
for (auto &&[name, i1, i2] : result)
{
if (i1 == kNoSeqNum) i1 = 0;
if (i2 == kNoSeqNum) i2 = 0;
}
return result;
}
struct tls_selection_not : public tls_selection
{
tls_selection_not(std::unique_ptr<tls_selection> selection)
: selection(selection.release())
{
}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
selection->collect_residues(db, residues, indentLevel + 1);
for (auto &r : residues)
r.selected = not r.selected;
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "NOT" << std::endl;
dump_selection(residues, indentLevel);
}
}
std::unique_ptr<tls_selection> selection;
};
struct tls_selection_all : public tls_selection
{
tls_selection_all() {}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
for (auto &r : residues)
r.selected = true;
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "ALL" << std::endl;
dump_selection(residues, indentLevel);
}
}
};
struct tls_selection_chain : public tls_selection_all
{
tls_selection_chain(const std::string &chainID)
: m_chain(chainID)
{
}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
bool allChains = m_chain == "*";
for (auto &r : residues)
r.selected = allChains or r.chainID == m_chain;
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "CHAIN " << m_chain << std::endl;
dump_selection(residues, indentLevel);
}
}
std::string m_chain;
};
struct tls_selection_res_id : public tls_selection_all
{
tls_selection_res_id(int seqNr, char iCode)
: m_seq_nr(seqNr)
, m_icode(iCode)
{
}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
for (auto &r : residues)
r.selected = r.seqNr == m_seq_nr and r.iCode == m_icode;
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "ResID " << m_seq_nr << (m_icode ? std::string{ m_icode } : "") << std::endl;
dump_selection(residues, indentLevel);
}
}
int m_seq_nr;
char m_icode;
};
struct tls_selection_range_seq : public tls_selection_all
{
tls_selection_range_seq(int first, int last)
: m_first(first)
, m_last(last)
{
}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
for (auto &r : residues)
{
r.selected = ((r.seqNr >= m_first or m_first == kResidueNrWildcard) and
(r.seqNr <= m_last or m_last == kResidueNrWildcard));
}
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "Range " << m_first << ':' << m_last << std::endl;
dump_selection(residues, indentLevel);
}
}
int m_first, m_last;
};
struct tls_selection_range_id : public tls_selection_all
{
tls_selection_range_id(int first, int last, char icodeFirst = 0, char icodeLast = 0)
: m_first(first)
, m_last(last)
, m_icode_first(icodeFirst)
, m_icode_last(icodeLast)
{
}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
// need to do this per chain
std::set<std::string> chains;
for (auto &r : residues)
chains.insert(r.chainID);
for (std::string chain : chains)
{
auto f = find_if(residues.begin(), residues.end(),
[this,chain](auto r) -> bool
{
return r.chainID == chain and r.seqNr == m_first and r.iCode == m_icode_first;
});
auto l = find_if(residues.begin(), residues.end(),
[this,chain](auto r) -> bool
{
return r.chainID == chain and r.seqNr == m_last and r.iCode == m_icode_last;
});
if (f != residues.end() and l != residues.end() and f <= l)
{
++l;
for (; f != l; ++f)
f->selected = true;
}
}
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "Through " << m_first << ':' << m_last << std::endl;
dump_selection(residues, indentLevel);
}
}
int m_first, m_last;
char m_icode_first, m_icode_last;
};
struct tls_selection_union : public tls_selection
{
tls_selection_union(std::unique_ptr<tls_selection> &lhs, std::unique_ptr<tls_selection> &rhs)
: lhs(lhs.release())
, rhs(rhs.release())
{
}
tls_selection_union(std::unique_ptr<tls_selection> &lhs, std::unique_ptr<tls_selection> &&rhs)
: lhs(lhs.release())
, rhs(rhs.release())
{
}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
auto a = residues;
for_each(a.begin(), a.end(), [](auto &r)
{ r.selected = false; });
auto b = residues;
for_each(b.begin(), b.end(), [](auto &r)
{ r.selected = false; });
lhs->collect_residues(db, a, indentLevel + 1);
rhs->collect_residues(db, b, indentLevel + 1);
for (auto ai = a.begin(), bi = b.begin(), ri = residues.begin(); ri != residues.end(); ++ai, ++bi, ++ri)
ri->selected = ai->selected or bi->selected;
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "Union" << std::endl;
dump_selection(residues, indentLevel);
}
}
std::unique_ptr<tls_selection> lhs;
std::unique_ptr<tls_selection> rhs;
};
struct tls_selection_intersection : public tls_selection
{
tls_selection_intersection(std::unique_ptr<tls_selection> &lhs, std::unique_ptr<tls_selection> &rhs)
: lhs(lhs.release())
, rhs(rhs.release())
{
}
tls_selection_intersection(std::unique_ptr<tls_selection> &lhs, std::unique_ptr<tls_selection> &&rhs)
: lhs(lhs.release())
, rhs(rhs.release())
{
}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
auto a = residues;
for_each(a.begin(), a.end(), [](auto &r)
{ r.selected = false; });
auto b = residues;
for_each(b.begin(), b.end(), [](auto &r)
{ r.selected = false; });
lhs->collect_residues(db, a, indentLevel + 1);
rhs->collect_residues(db, b, indentLevel + 1);
for (auto ai = a.begin(), bi = b.begin(), ri = residues.begin(); ri != residues.end(); ++ai, ++bi, ++ri)
ri->selected = ai->selected and bi->selected;
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "Intersection" << std::endl;
dump_selection(residues, indentLevel);
}
}
std::unique_ptr<tls_selection> lhs;
std::unique_ptr<tls_selection> rhs;
};
struct tls_selection_by_name : public tls_selection_all
{
public:
tls_selection_by_name(const std::string &resname)
: m_name(resname)
{
}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
for (auto &r : residues)
r.selected = r.name == m_name;
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "Name " << m_name << std::endl;
dump_selection(residues, indentLevel);
}
}
std::string m_name;
};
struct tls_selection_by_element : public tls_selection_all
{
public:
tls_selection_by_element(const std::string &element)
: m_element(element)
{
}
void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, size_t indentLevel) const override
{
// rationale... We want to select residues only. So we select
// residues that have just a single atom of type m_element.
// And we assume these have as residue name... m_element.
// ... Right?
for (auto &r : residues)
r.selected = iequals(r.name, m_element);
if (cif::VERBOSE > 0)
{
std::cout << std::string(indentLevel * 2, ' ') << "Element " << m_element << std::endl;
dump_selection(residues, indentLevel);
}
}
std::string m_element;
};
// --------------------------------------------------------------------
class tls_selection_parser_impl
{
public:
tls_selection_parser_impl(const std::string &selection)
: m_selection(selection)
, m_p(m_selection.begin())
, m_end(m_selection.end())
{
}
virtual std::unique_ptr<tls_selection> Parse() = 0;
protected:
virtual int get_next_token() = 0;
virtual void match(int token);
virtual std::string to_string(int token) = 0;
std::string m_selection;
std::string::iterator m_p, m_end;
int m_lookahead;
std::string m_token;
};
void tls_selection_parser_impl::match(int token)
{
if (m_lookahead == token)
m_lookahead = get_next_token();
else
{
std::string expected;
if (token >= 256)
expected = to_string(token);
else
expected = { char(token) };
std::string found;
if (m_lookahead >= 256)
found = to_string(m_lookahead) + " (" + m_token + ')';
else
found = { char(m_lookahead) };
throw std::runtime_error("Expected " + expected + " but found " + found);
}
}
// --------------------------------------------------------------------
class TLSSelectionParserImplPhenix : public tls_selection_parser_impl
{
public:
TLSSelectionParserImplPhenix(const std::string &selection)
: tls_selection_parser_impl(selection)
{
m_lookahead = get_next_token();
}
virtual std::unique_ptr<tls_selection> Parse();
private:
std::unique_ptr<tls_selection> ParseAtomSelection();
std::unique_ptr<tls_selection> ParseTerm();
std::unique_ptr<tls_selection> ParseFactor();
enum TOKEN
{
pt_NONE = 0,
pt_IDENT = 256,
pt_STRING,
pt_NUMBER,
pt_RESID,
pt_EOLN,
pt_KW_ALL,
pt_KW_CHAIN,
pt_KW_RESSEQ,
pt_KW_RESID,
pt_KW_ICODE,
pt_KW_RESNAME,
pt_KW_ELEMENT,
pt_KW_AND,
pt_KW_OR,
pt_KW_NOT,
pt_KW_PDB,
pt_KW_ENTRY,
pt_KW_THROUGH
};
virtual int get_next_token();
virtual std::string to_string(int token);
int m_value_i;
std::string m_value_s;
char m_icode;
};
int TLSSelectionParserImplPhenix::get_next_token()
{
int result = pt_NONE;
enum STATE
{
st_START,
st_RESID = 200,
st_NUM = 300,
st_IDENT = 400,
st_QUOTED = 500,
st_DQUOTED = 550,
st_OTHER = 600
};
int state = st_START;
m_value_i = 0;
m_icode = 0;
m_value_s.clear();
auto s = m_p;
auto start = state;
m_token.clear();
auto restart = [&]()
{
switch (start)
{
case st_START: state = start = st_RESID; break;
case st_RESID: state = start = st_NUM; break;
case st_NUM: state = start = st_IDENT; break;
case st_IDENT: state = start = st_QUOTED; break;
case st_QUOTED: state = start = st_DQUOTED; break;
case st_DQUOTED: state = start = st_OTHER; break;
}
m_token.clear();
m_p = s;
};
auto retract = [&]()
{
--m_p;
m_token.pop_back();
};
while (result == pt_NONE)
{
char ch = *m_p++;
if (m_p > m_end)
ch = 0;
else
m_token += ch;
switch (state)
{
// start block
case st_START:
if (ch == 0)
result = pt_EOLN;
else if (isspace(ch))
{
m_token.clear();
++s;
}
else
restart();
break;
// RESID block
case st_RESID:
if (ch == '-')
state = st_RESID + 1;
else if (isdigit(ch))
{
m_value_i = (ch - '0');
state = st_RESID + 2;
}
else
restart();
break;
case st_RESID + 1:
if (isdigit(ch))
{
m_value_i = -(ch - '0');
state = st_RESID + 2;
}
else
restart();
break;
case st_RESID + 2:
if (isdigit(ch))
m_value_i = 10 * m_value_i + (m_value_i < 0 ? -1 : 1) * (ch - '0');
else if (isalpha(ch))
{
m_icode = ch;
state = st_RESID + 3;
}
else
restart();
break;
case st_RESID + 3:
if (isalnum(ch))
restart();
else
{
retract();
result = pt_RESID;
}
break;
// NUM block
case st_NUM:
if (ch == '-')
state = st_NUM + 1;
else if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM + 2;
}
else
restart();
break;
case st_NUM + 1:
if (isdigit(ch))
{
m_value_i = -(ch - '0');
state = st_NUM + 2;
}
else
restart();
break;
case st_NUM + 2:
if (isdigit(ch))
m_value_i = 10 * m_value_i + (m_value_i < 0 ? -1 : 1) * (ch - '0');
else if (not isalpha(ch))
{
result = pt_NUMBER;
retract();
}
else
restart();
break;
// IDENT block
case st_IDENT:
if (isalnum(ch))
{
m_value_s = { ch };
state = st_IDENT + 1;
}
else
restart();
break;
case st_IDENT + 1:
if (isalnum(ch) or ch == '\'')
m_value_s += ch;
else
{
--m_p;
result = pt_IDENT;
}
break;
// QUOTED block
case st_QUOTED:
if (ch == '\'')
{
m_value_s.clear();
state = st_QUOTED + 1;
}
else
restart();
break;
case st_QUOTED + 1:
if (ch == '\'')
result = pt_STRING;
else if (ch == 0)
throw std::runtime_error("Unexpected end of selection, missing quote character?");
else
m_value_s += ch;
break;
// QUOTED block
case st_DQUOTED:
if (ch == '\"')
{
m_value_s.clear();
state = st_DQUOTED + 1;
}
else
restart();
break;
case st_DQUOTED + 1:
if (ch == '\"')
result = pt_STRING;
else if (ch == 0)
throw std::runtime_error("Unexpected end of selection, missing quote character?");
else
m_value_s += ch;
break;
// OTHER block
case st_OTHER:
result = ch;
break;
}
}
if (result == pt_IDENT)
{
if (iequals(m_value_s, "CHAIN"))
result = pt_KW_CHAIN;
else if (iequals(m_value_s, "ALL"))
result = pt_KW_ALL;
else if (iequals(m_value_s, "AND"))
result = pt_KW_AND;
else if (iequals(m_value_s, "OR"))
result = pt_KW_OR;
else if (iequals(m_value_s, "NOT"))
result = pt_KW_NOT;
else if (iequals(m_value_s, "RESSEQ"))
result = pt_KW_RESSEQ;
else if (iequals(m_value_s, "RESID") or iequals(m_value_s, "RESI"))
result = pt_KW_RESID;
else if (iequals(m_value_s, "RESNAME"))
result = pt_KW_RESNAME;
else if (iequals(m_value_s, "ELEMENT"))
result = pt_KW_ELEMENT;
else if (iequals(m_value_s, "PDB"))
result = pt_KW_PDB;
else if (iequals(m_value_s, "ENTRY"))
result = pt_KW_ENTRY;
else if (iequals(m_value_s, "THROUGH"))
result = pt_KW_THROUGH;
}
return result;
}
std::string TLSSelectionParserImplPhenix::to_string(int token)
{
switch (token)
{
case pt_IDENT: return "identifier";
case pt_STRING: return "std::string";
case pt_NUMBER: return "number";
case pt_RESID: return "resid";
case pt_EOLN: return "end of line";
case pt_KW_ALL: return "ALL";
case pt_KW_CHAIN: return "CHAIN";
case pt_KW_RESSEQ: return "RESSEQ";
case pt_KW_RESID: return "RESID";
case pt_KW_RESNAME: return "RESNAME";
case pt_KW_ELEMENT: return "ELEMENT";
case pt_KW_AND: return "AND";
case pt_KW_OR: return "OR";
case pt_KW_NOT: return "NOT";
case pt_KW_PDB: return "PDB";
case pt_KW_ENTRY: return "ENTRY";
case pt_KW_THROUGH: return "THROUGH";
default: return "character";
}
}
std::unique_ptr<tls_selection> TLSSelectionParserImplPhenix::Parse()
{
if (m_lookahead == pt_KW_PDB)
{
match(pt_KW_PDB);
// Match(pt_KW_ENTRY);
throw std::runtime_error("Unimplemented PDB ENTRY specification");
}
std::unique_ptr<tls_selection> result = ParseAtomSelection();
bool extraParenthesis = false;
if (m_lookahead == ')')
{
extraParenthesis = true;
m_lookahead = get_next_token();
}
match(pt_EOLN);
if (extraParenthesis)
std::cerr << "WARNING: too many closing parenthesis in TLS selection statement" << std::endl;
return result;
}
std::unique_ptr<tls_selection> TLSSelectionParserImplPhenix::ParseAtomSelection()
{
std::unique_ptr<tls_selection> result = ParseTerm();
while (m_lookahead == pt_KW_OR)
{
match(pt_KW_OR);
result.reset(new tls_selection_union(result, ParseTerm()));
}
return result;
}
std::unique_ptr<tls_selection> TLSSelectionParserImplPhenix::ParseTerm()
{
std::unique_ptr<tls_selection> result = ParseFactor();
while (m_lookahead == pt_KW_AND)
{
match(pt_KW_AND);
result.reset(new tls_selection_intersection(result, ParseFactor()));
}
return result;
}
std::unique_ptr<tls_selection> TLSSelectionParserImplPhenix::ParseFactor()
{
std::unique_ptr<tls_selection> result;
switch (m_lookahead)
{
case '(':
match('(');
result = ParseAtomSelection();
if (m_lookahead == pt_EOLN)
std::cerr << "WARNING: missing closing parenthesis in TLS selection statement" << std::endl;
else
match(')');
break;
case pt_KW_NOT:
match(pt_KW_NOT);
result.reset(new tls_selection_not(ParseAtomSelection()));
break;
case pt_KW_CHAIN:
{
match(pt_KW_CHAIN);
std::string chainID = m_value_s;
if (m_lookahead == pt_NUMBER) // sigh
{
chainID = to_string(m_value_i);
match(pt_NUMBER);
}
else
match(m_lookahead == pt_STRING ? pt_STRING : pt_IDENT);
result.reset(new tls_selection_chain(chainID));
break;
}
case pt_KW_RESNAME:
{
match(pt_KW_RESNAME);
std::string name = m_value_s;
match(pt_IDENT);
result.reset(new tls_selection_by_name(name));
break;
}
case pt_KW_ELEMENT:
{
match(pt_KW_ELEMENT);
std::string element = m_value_s;
match(pt_IDENT);
result.reset(new tls_selection_by_element(element));
break;
}
case pt_KW_RESSEQ:
{
match(pt_KW_RESSEQ);
int from = m_value_i;
match(pt_NUMBER);
int to = from;
if (m_lookahead == ':')
{
match(':');
to = m_value_i;
match(pt_NUMBER);
}
result.reset(new tls_selection_range_seq(from, to));
break;
}
case pt_KW_RESID:
{
match(pt_KW_RESID);
int from, to;
char icode_from = 0, icode_to = 0;
bool through = false;
from = to = m_value_i;
if (m_lookahead == pt_NUMBER)
match(pt_NUMBER);
else
{
icode_from = m_icode;
match(pt_RESID);
}
if (m_lookahead == ':' or m_lookahead == pt_KW_THROUGH or m_lookahead == '-')
{
through = m_lookahead == pt_KW_THROUGH;
match(m_lookahead);
to = m_value_i;
if (m_lookahead == pt_NUMBER)
match(pt_NUMBER);
else
{
icode_to = m_icode;
match(pt_RESID);
}
if (through)
result.reset(new tls_selection_range_id(from, to, icode_from, icode_to));
else
{
if (cif::VERBOSE and (icode_from or icode_to))
std::cerr << "Warning, ignoring insertion codes" << std::endl;
result.reset(new tls_selection_range_seq(from, to));
}
}
else
result.reset(new tls_selection_res_id(from, icode_from));
break;
}
case pt_KW_ALL:
match(pt_KW_ALL);
result.reset(new tls_selection_all());
break;
default:
throw std::runtime_error("Unexpected token " + to_string(m_lookahead) + " (" + m_token + ')');
}
return result;
}
// --------------------------------------------------------------------
class TLSSelectionParserImplBuster : public tls_selection_parser_impl
{
public:
TLSSelectionParserImplBuster(const std::string &selection);
virtual std::unique_ptr<tls_selection> Parse();
protected:
enum TOKEN
{
bt_NONE = 0,
bt_IDENT = 256,
bt_NUMBER,
bt_EOLN,
};
virtual int get_next_token();
virtual std::string to_string(int token);
std::unique_ptr<tls_selection> ParseGroup();
std::tuple<std::string, int> ParseAtom();
std::unique_ptr<tls_selection> ParseOldGroup();
int m_value_i;
std::string m_value_s;
bool m_parsing_old_style = false;
};
TLSSelectionParserImplBuster::TLSSelectionParserImplBuster(const std::string &selection)
: tls_selection_parser_impl(selection)
{
m_lookahead = get_next_token();
}
int TLSSelectionParserImplBuster::get_next_token()
{
int result = bt_NONE;
enum STATE
{
st_START,
st_NEGATE,
st_NUM,
st_IDENT
} state = st_START;
m_value_i = 0;
m_value_s.clear();
bool negative = false;
while (result == bt_NONE)
{
char ch = *m_p++;
if (m_p > m_end)
ch = 0;
switch (state)
{
case st_START:
if (ch == 0)
result = bt_EOLN;
else if (isspace(ch))
continue;
else if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM;
}
else if (isalpha(ch))
{
m_value_s = { ch };
state = st_IDENT;
}
else if (ch == '-')
{
state = st_NEGATE;
}
else
result = ch;
break;
case st_NEGATE:
if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM;
negative = true;
}
else
{
--m_p;
result = '-';
}
break;
case st_NUM:
if (isdigit(ch))
m_value_i = 10 * m_value_i + (ch - '0');
else
{
if (negative)
m_value_i = -m_value_i;
result = bt_NUMBER;
--m_p;
}
break;
case st_IDENT:
if (isalnum(ch))
m_value_s += ch;
else
{
--m_p;
result = bt_IDENT;
}
break;
}
}
return result;
}
std::string TLSSelectionParserImplBuster::to_string(int token)
{
switch (token)
{
case bt_IDENT: return "identifier (" + m_value_s + ')';
case bt_NUMBER: return "number (" + to_string(m_value_i) + ')';
case bt_EOLN: return "end of line";
default:
assert(false);
return "unknown token";
}
}
std::unique_ptr<tls_selection> TLSSelectionParserImplBuster::ParseGroup()
{
std::unique_ptr<tls_selection> result;
auto add = [&result](const std::string &chainID, int from, int to)
{
std::unique_ptr<tls_selection> sc(new tls_selection_chain(chainID));
std::unique_ptr<tls_selection> sr(new tls_selection_range_seq(from, to));
std::unique_ptr<tls_selection> s(new tls_selection_intersection(sc, sr));
if (result == nullptr)
result.reset(s.release());
else
result.reset(new tls_selection_union{ result, s });
};
match('{');
do
{
std::string chain1;
int seqNr1;
std::tie(chain1, seqNr1) = ParseAtom();
if (m_lookahead == '-')
{
std::string chain2;
int seqNr2 = seqNr1;
match('-');
if (m_lookahead == bt_NUMBER)
{
seqNr2 = m_value_i;
match(bt_NUMBER);
}
else
{
std::tie(chain2, seqNr2) = ParseAtom();
if (chain1 != chain2)
{
std::cerr << "Warning, ranges over multiple chains detected" << std::endl;
std::unique_ptr<tls_selection> sc1(new tls_selection_chain(chain1));
std::unique_ptr<tls_selection> sr1(new tls_selection_range_seq(seqNr1, kResidueNrWildcard));
std::unique_ptr<tls_selection> s1(new tls_selection_intersection(sc1, sr1));
std::unique_ptr<tls_selection> sc2(new tls_selection_chain(chain2));
std::unique_ptr<tls_selection> sr2(new tls_selection_range_seq(kResidueNrWildcard, seqNr2));
std::unique_ptr<tls_selection> s2(new tls_selection_intersection(sc2, sr2));
std::unique_ptr<tls_selection> s(new tls_selection_union(s1, s2));
if (result == nullptr)
result.reset(s.release());
else
result.reset(new tls_selection_union{ result, s });
chain1.clear();
}
}
if (not chain1.empty())
add(chain1, seqNr1, seqNr2);
}
else
add(chain1, seqNr1, seqNr1);
} while (m_lookahead != '}');
match('}');
return result;
}
std::tuple<std::string, int> TLSSelectionParserImplBuster::ParseAtom()
{
std::string chain = m_value_s;
int seqNr = kResidueNrWildcard;
if (m_lookahead == '*')
match('*');
else
match(bt_IDENT);
match('|');
if (m_lookahead == '*')
match('*');
else
{
seqNr = m_value_i;
match(bt_NUMBER);
if (m_lookahead == ':')
{
match(':');
std::string atom = m_value_s;
if (cif::VERBOSE > 0)
std::cerr << "Warning: ignoring atom ID '" << atom << "' in TLS selection" << std::endl;
match(bt_IDENT);
}
}
return std::make_tuple(chain, seqNr);
}
std::unique_ptr<tls_selection> TLSSelectionParserImplBuster::Parse()
{
std::unique_ptr<tls_selection> result = ParseGroup();
match(bt_EOLN);
return result;
}
// --------------------------------------------------------------------
class TLSSelectionParserImplBusterOld : public tls_selection_parser_impl
{
public:
TLSSelectionParserImplBusterOld(const std::string &selection)
: tls_selection_parser_impl(selection)
{
m_lookahead = get_next_token();
}
virtual std::unique_ptr<tls_selection> Parse();
private:
std::unique_ptr<tls_selection> ParseAtomSelection();
std::unique_ptr<tls_selection> ParseTerm();
std::unique_ptr<tls_selection> ParseFactor();
std::unique_ptr<tls_selection> ParseResid();
std::unique_ptr<tls_selection> ParseChainResid();
enum TOKEN
{
pt_NONE = 0,
pt_IDENT = 256,
pt_CHAINRESID,
pt_STRING,
pt_NUMBER,
pt_RANGE,
pt_EOLN,
pt_KW_ALL,
pt_KW_CHAIN,
pt_KW_RESSEQ,
pt_KW_RESID,
pt_KW_RESNAME,
pt_KW_ELEMENT,
pt_KW_AND,
pt_KW_OR,
pt_KW_NOT,
pt_KW_PDB,
pt_KW_ENTRY,
pt_KW_THROUGH
};
virtual int get_next_token();
virtual std::string to_string(int token);
int m_value_i;
std::string m_value_s;
int m_value_r[2];
};
int TLSSelectionParserImplBusterOld::get_next_token()
{
int result = pt_NONE;
enum STATE
{
st_START,
st_NEGATE,
st_NUM,
st_RANGE,
st_IDENT_1,
st_IDENT,
st_CHAINRESID,
st_QUOTED_1,
st_QUOTED_2
} state = st_START;
m_value_i = 0;
m_value_s.clear();
bool negative = false;
while (result == pt_NONE)
{
char ch = *m_p++;
if (m_p > m_end)
ch = 0;
switch (state)
{
case st_START:
if (ch == 0)
result = pt_EOLN;
else if (isspace(ch))
continue;
else if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM;
}
else if (isalpha(ch))
{
m_value_s = { ch };
state = st_IDENT_1;
}
else if (ch == '-')
{
state = st_NEGATE;
}
else if (ch == '\'')
{
state = st_QUOTED_1;
}
else
result = ch;
break;
case st_NEGATE:
if (isdigit(ch))
{
m_value_i = ch - '0';
state = st_NUM;
negative = true;
}
else
{
--m_p;
result = '-';
}
break;
case st_NUM:
if (isdigit(ch))
m_value_i = 10 * m_value_i + (ch - '0');
else if (ch == '-' or ch == ':')
{
if (negative)
m_value_i = -m_value_i;
m_value_r[0] = m_value_i;
m_value_r[1] = 0;
state = st_RANGE;
}
else
{
if (negative)
m_value_i = -m_value_i;
result = pt_NUMBER;
--m_p;
}
break;
case st_RANGE: // TODO: question, is "-2--1" a valid range? We do not support that, yet
if (isdigit(ch))
m_value_r[1] = 10 * m_value_r[1] + (ch - '0');
else if (m_value_r[1] != 0)
{
result = pt_RANGE;
--m_p;
}
else
{
--m_p;
--m_p;
result = pt_NUMBER;
}
break;
case st_IDENT_1:
if (isalpha(ch))
{
m_value_s += ch;
state = st_IDENT;
}
else if (isdigit(ch))
{
m_value_i = (ch - '0');
state = st_CHAINRESID;
}
else
{
--m_p;
result = pt_IDENT;
}
break;
case st_CHAINRESID:
if (isalpha(ch))
{
m_value_s += to_string(m_value_i);
m_value_s += ch;
state = st_IDENT;
}
else if (isdigit(ch))
m_value_i = 10 * m_value_i + (ch - '0');
else
{
--m_p;
result = pt_CHAINRESID;
}
break;
case st_IDENT:
if (isalnum(ch))
m_value_s += ch;
else
{
--m_p;
result = pt_IDENT;
}
break;
case st_QUOTED_1:
if (ch == '\'')
{
--m_p;
result = '\'';
}
else
{
m_value_s = { ch };
state = st_QUOTED_2;
}
break;
case st_QUOTED_2:
if (ch == '\'')
result = pt_STRING;
else if (ch == 0)
throw std::runtime_error("Unexpected end of selection, missing quote character?");
else
m_value_s += ch;
break;
}
}
if (result == pt_IDENT)
{
if (iequals(m_value_s, "CHAIN"))
result = pt_KW_CHAIN;
else if (iequals(m_value_s, "ALL"))
result = pt_KW_ALL;
else if (iequals(m_value_s, "AND"))
result = pt_KW_AND;
else if (iequals(m_value_s, "OR"))
result = pt_KW_OR;
else if (iequals(m_value_s, "NOT"))
result = pt_KW_NOT;
else if (iequals(m_value_s, "RESSEQ"))
result = pt_KW_RESSEQ;
else if (iequals(m_value_s, "RESID") or iequals(m_value_s, "RESI") or iequals(m_value_s, "RESIDUES"))
result = pt_KW_RESID;
else if (iequals(m_value_s, "RESNAME"))
result = pt_KW_RESNAME;
else if (iequals(m_value_s, "PDB"))
result = pt_KW_PDB;
else if (iequals(m_value_s, "ENTRY"))
result = pt_KW_ENTRY;
else if (iequals(m_value_s, "THROUGH"))
result = pt_KW_THROUGH;
}
return result;
}
std::string TLSSelectionParserImplBusterOld::to_string(int token)
{
switch (token)
{
case pt_IDENT: return "identifier (" + m_value_s + ')';
case pt_STRING: return "std::string (" + m_value_s + ')';
case pt_NUMBER: return "number (" + to_string(m_value_i) + ')';
case pt_RANGE: return "range (" + to_string(m_value_r[0]) + ':' + to_string(m_value_r[1]) + ')';
case pt_EOLN: return "end of line";
case pt_KW_ALL: return "ALL";
case pt_KW_CHAIN: return "CHAIN";
case pt_KW_RESSEQ: return "RESSEQ";
case pt_KW_RESID: return "RESID";
case pt_KW_RESNAME: return "RESNAME";
case pt_KW_ELEMENT: return "ELEMENT";
case pt_KW_AND: return "AND";
case pt_KW_OR: return "OR";
case pt_KW_NOT: return "NOT";
case pt_KW_PDB: return "PDB";
case pt_KW_ENTRY: return "ENTRY";
case pt_KW_THROUGH: return "THROUGH";
default:
assert(false);
return "unknown token";
}
}
std::unique_ptr<tls_selection> TLSSelectionParserImplBusterOld::Parse()
{
if (m_lookahead == pt_KW_PDB)
{
match(pt_KW_PDB);
// Match(pt_KW_ENTRY);
throw std::runtime_error("Unimplemented PDB ENTRY specification");
}
std::unique_ptr<tls_selection> result = ParseAtomSelection();
match(pt_EOLN);
return result;
}
std::unique_ptr<tls_selection> TLSSelectionParserImplBusterOld::ParseAtomSelection()
{
std::unique_ptr<tls_selection> result = ParseTerm();
while (m_lookahead == pt_KW_OR)
{
match(pt_KW_OR);
result.reset(new tls_selection_union(result, ParseTerm()));
}
return result;
}
std::unique_ptr<tls_selection> TLSSelectionParserImplBusterOld::ParseTerm()
{
std::unique_ptr<tls_selection> result = ParseFactor();
while (m_lookahead == pt_KW_AND)
{
match(pt_KW_AND);
result.reset(new tls_selection_intersection(result, ParseFactor()));
}
return result;
}
std::unique_ptr<tls_selection> TLSSelectionParserImplBusterOld::ParseFactor()
{
std::unique_ptr<tls_selection> result;
switch (m_lookahead)
{
case '(':
match('(');
result = ParseAtomSelection();
match(')');
break;
case pt_KW_NOT:
match(pt_KW_NOT);
result.reset(new tls_selection_not(ParseAtomSelection()));
break;
case pt_KW_CHAIN:
{
match(pt_KW_CHAIN);
std::string chainID = m_value_s;
if (m_lookahead == pt_NUMBER) // sigh
{
chainID = to_string(m_value_i);
match(pt_NUMBER);
}
else
match(m_lookahead == pt_STRING ? pt_STRING : pt_IDENT);
result.reset(new tls_selection_chain(chainID));
break;
}
case pt_KW_RESNAME:
{
match(pt_KW_RESNAME);
std::string name = m_value_s;
match(pt_IDENT);
result.reset(new tls_selection_by_name(name));
break;
}
case pt_KW_RESSEQ:
match(pt_KW_RESSEQ);
result = ParseResid();
break;
case pt_KW_RESID:
match(pt_KW_RESID);
result = ParseResid();
break;
case pt_KW_ALL:
match(pt_KW_ALL);
result.reset(new tls_selection_all());
break;
case pt_CHAINRESID:
result = ParseChainResid();
break;
default:
throw std::runtime_error("Unexpected token " + to_string(m_lookahead));
}
return result;
}
std::unique_ptr<tls_selection> TLSSelectionParserImplBusterOld::ParseResid()
{
std::unique_ptr<tls_selection> result;
for (;;)
{
int from, to;
if (m_lookahead == pt_RANGE)
{
from = m_value_r[0];
to = m_value_r[1];
match(pt_RANGE);
}
else
{
from = m_value_i;
match(pt_NUMBER);
to = from;
if (m_lookahead == ':' or m_lookahead == '-' or m_lookahead == pt_KW_THROUGH)
{
match(m_lookahead);
to = m_value_i;
match(pt_NUMBER);
}
}
std::unique_ptr<tls_selection> range(new tls_selection_range_seq(from, to));
if (result)
result.reset(new tls_selection_union(result, range));
else
result.reset(range.release());
if (m_lookahead == ',')
{
match(',');
continue;
}
break;
}
return result;
}
std::unique_ptr<tls_selection> TLSSelectionParserImplBusterOld::ParseChainResid()
{
std::unique_ptr<tls_selection> result;
for (;;)
{
int from, to;
from = to = m_value_i;
std::string chainID = m_value_s;
match(pt_CHAINRESID);
if (m_lookahead == '-')
{
match(m_lookahead);
to = m_value_i;
if (m_value_s != chainID)
throw std::runtime_error("Cannot have two different chainIDs in a range selection");
match(pt_CHAINRESID);
}
std::unique_ptr<tls_selection> sc(new tls_selection_chain(chainID));
std::unique_ptr<tls_selection> sr(new tls_selection_range_seq(from, to));
std::unique_ptr<tls_selection> range(new tls_selection_intersection(sc, sr));
if (result)
result.reset(new tls_selection_union(result, range));
else
result.reset(range.release());
if (m_lookahead == ',')
{
match(',');
continue;
}
break;
}
return result;
}
// --------------------------------------------------------------------
class TLSSelectionParserBase
{
public:
virtual std::unique_ptr<tls_selection> Parse(const std::string &selection) const = 0;
virtual ~TLSSelectionParserBase() {}
};
template <typename IMPL>
class TLSSelectionParser
{
public:
virtual std::unique_ptr<tls_selection> Parse(const std::string &selection) const
{
std::unique_ptr<tls_selection> result;
try
{
IMPL p(selection);
result = p.Parse();
}
catch (const std::exception &ex)
{
std::cerr << "ParseError: " << ex.what() << std::endl;
}
return result;
}
};
// --------------------------------------------------------------------
std::unique_ptr<tls_selection> parse_tls_selection_details(const std::string &program, const std::string &selection)
{
TLSSelectionParser<TLSSelectionParserImplPhenix> phenix;
TLSSelectionParser<TLSSelectionParserImplBuster> buster;
TLSSelectionParser<TLSSelectionParserImplBusterOld> busterOld;
std::unique_ptr<tls_selection> result;
if (cif::icontains(program, "buster"))
{
result = buster.Parse(selection);
if (not result)
{
if (cif::VERBOSE > 0)
std::cerr << "Falling back to old BUSTER" << std::endl;
result = busterOld.Parse(selection);
}
if (not result)
{
if (cif::VERBOSE > 0)
std::cerr << "Falling back to PHENIX" << std::endl;
result = phenix.Parse(selection);
}
}
else if (cif::icontains(program, "phenix"))
{
result = phenix.Parse(selection);
if (not result)
{
if (cif::VERBOSE > 0)
std::cerr << "Falling back to BUSTER" << std::endl;
result = buster.Parse(selection);
}
if (not result)
{
if (cif::VERBOSE > 0)
std::cerr << "Falling back to old BUSTER" << std::endl;
result = busterOld.Parse(selection);
}
}
else
{
if (cif::VERBOSE > 0)
std::cerr << "No known program specified, trying PHENIX" << std::endl;
result = phenix.Parse(selection);
if (not result)
{
if (cif::VERBOSE > 0)
std::cerr << "Falling back to BUSTER" << std::endl;
result = buster.Parse(selection);
}
if (not result)
{
if (cif::VERBOSE > 0)
std::cerr << "Falling back to old BUSTER" << std::endl;
result = busterOld.Parse(selection);
}
}
return result;
}
} // namespace cif
......@@ -86,23 +86,6 @@ uint32_t get_terminal_width()
return csbi.srWindow.Right - csbi.srWindow.Left + 1;
}
std::string GetExecutablePath()
{
WCHAR buffer[4096];
DWORD n = ::GetModuleFileNameW(nullptr, buffer, sizeof(buffer) / sizeof(WCHAR));
if (n == 0)
throw std::runtime_error("could not get exe path");
std::wstring ws(buffer);
// convert from utf16 to utf8
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv1;
std::string u8str = conv1.to_bytes(ws);
return u8str;
}
#else
#include <limits.h>
......@@ -120,17 +103,6 @@ uint32_t get_terminal_width()
return result;
}
std::string get_executable_path()
{
using namespace std::literals;
// This used to be PATH_MAX, but lets simply assume 1024 is enough...
char path[1024] = "";
if (readlink("/proc/self/exe", path, sizeof(path)) == -1)
throw std::runtime_error("could not get exe path "s + strerror(errno));
return {path};
}
#endif
// --------------------------------------------------------------------
......
......@@ -90,11 +90,11 @@ BOOST_AUTO_TEST_CASE(clr_1)
{
using namespace cif::colour;
std::cout << "Hello, " << cif::coloured(white, red, regular) << "world!" << reset << std::endl
<< "Hello, " << cif::coloured(white, red, bold) << "world!" << reset << std::endl
<< "Hello, " << cif::coloured(black, red) << "world!" << reset << std::endl
<< "Hello, " << cif::coloured(white, green) << "world!" << reset << std::endl
<< "Hello, " << cif::coloured(white, blue) << "world!" << reset << std::endl
<< "Hello, " << cif::coloured(blue, white) << "world!" << reset << std::endl
<< "Hello, " << cif::coloured(red, white, bold) << "world!" << reset << std::endl;
std::cout << "Hello, " << cif::coloured("world!", white, red, regular) << std::endl
<< "Hello, " << cif::coloured("world!", white, red, bold) << std::endl
<< "Hello, " << cif::coloured("world!", black, red) << std::endl
<< "Hello, " << cif::coloured("world!", white, green) << std::endl
<< "Hello, " << cif::coloured("world!", white, blue) << std::endl
<< "Hello, " << cif::coloured("world!", blue, white) << std::endl
<< "Hello, " << cif::coloured("world!", red, white, bold) << std::endl;
}
\ No newline at end of file
#include <cif++/utilities.hpp>
#include "cif++/utilities.hpp"
#include <random>
#include <thread>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment