Commit 6d0ea5c6 by Maarten L. Hekkelman

Drop using CCP4 monomers library

version bump
parent dbe40e3a
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
cmake_minimum_required(VERSION 3.16) cmake_minimum_required(VERSION 3.16)
# set the project name # set the project name
project(libcifpp VERSION 5.2.5 LANGUAGES CXX) project(libcifpp VERSION 6.0.0 LANGUAGES CXX)
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
......
Version 6.0.0
- Drop the use of CCP4's monomer library for compound information
Version 5.2.5 Version 5.2.5
- Correctly import the Eigen3 library - Correctly import the Eigen3 library
......
...@@ -44,11 +44,7 @@ ...@@ -44,11 +44,7 @@
/// The data is loaded by default from a file called `components.cif`. This file /// The data is loaded by default from a file called `components.cif`. This file
/// is located using load_resource. (See documentation on cif::load_resource for more information) /// is located using load_resource. (See documentation on cif::load_resource for more information)
/// ///
/// But if the CCP4 environment is available at runtime, the compound information /// Note that since version 6 the CCP4 monomer library is no longer used.
/// may also be generated from the CCP4 monomer library.
///
/// Note that the information in CCP4 and CCD is not equal.
///
/// See also :doc:`/compound` for more information. /// See also :doc:`/compound` for more information.
...@@ -157,10 +153,6 @@ class compound ...@@ -157,10 +153,6 @@ class compound
float formula_weight() const { return m_formula_weight; } ///< Return the formula mass of the chemical component in Daltons. float formula_weight() const { return m_formula_weight; } ///< Return the formula mass of the chemical component in Daltons.
int formal_charge() const { return m_formal_charge; } ///< Return the formal charge on the chemical component. int formal_charge() const { return m_formal_charge; } ///< Return the formal charge on the chemical component.
/// The group record is only available in CCP4 monomer library files.
/// For CCD entries this value will always contain 'non-polymer'
std::string group() const { return m_group; }
const std::vector<compound_atom> &atoms() const { return m_atoms; } ///< Return the list of atoms for this compound const std::vector<compound_atom> &atoms() const { return m_atoms; } ///< Return the list of atoms for this compound
const std::vector<compound_bond> &bonds() const { return m_bonds; } ///< Return the list of bonds for this compound const std::vector<compound_bond> &bonds() const { return m_bonds; } ///< Return the list of bonds for this compound
...@@ -176,8 +168,6 @@ class compound ...@@ -176,8 +168,6 @@ class compound
private: private:
friend class compound_factory_impl; friend class compound_factory_impl;
friend class CCD_compound_factory_impl;
friend class CCP4_compound_factory_impl;
compound(cif::datablock &db); compound(cif::datablock &db);
compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group); compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group);
...@@ -246,6 +236,8 @@ class compound_factory ...@@ -246,6 +236,8 @@ class compound_factory
CIFPP_EXPORT static const std::map<std::string, char> kAAMap, ///< Globally accessible static list of the default amino acids CIFPP_EXPORT static const std::map<std::string, char> kAAMap, ///< Globally accessible static list of the default amino acids
kBaseMap; ///< Globally accessible static list of the default bases kBaseMap; ///< Globally accessible static list of the default bases
void report_missing_compound(const std::string &compound_id);
private: private:
compound_factory(); compound_factory();
......
...@@ -366,6 +366,14 @@ std::unique_ptr<std::istream> load_resource(std::filesystem::path name); ...@@ -366,6 +366,14 @@ std::unique_ptr<std::istream> load_resource(std::filesystem::path name);
void add_file_resource(const std::string &name, std::filesystem::path dataFile); void add_file_resource(const std::string &name, std::filesystem::path dataFile);
/** /**
* @brief List all the file resources added with cif::add_file_resource.
*
* @param os The std::ostream to write the directories to
*/
void list_file_resources(std::ostream &os);
/**
* @brief Add a directory to the list of search directories. This list is * @brief Add a directory to the list of search directories. This list is
* searched in a last-in-first-out order. * searched in a last-in-first-out order.
* *
...@@ -379,4 +387,12 @@ void add_file_resource(const std::string &name, std::filesystem::path dataFile); ...@@ -379,4 +387,12 @@ void add_file_resource(const std::string &name, std::filesystem::path dataFile);
void add_data_directory(std::filesystem::path dataDir); void add_data_directory(std::filesystem::path dataDir);
/**
* @brief List all the data directories, for error reporting on missing resources.
*
* @param os The std::ostream to write the directories to
*/
void list_data_directories(std::ostream &os);
} // namespace cif } // namespace cif
...@@ -313,11 +313,10 @@ const std::map<std::string, char> compound_factory::kBaseMap{ ...@@ -313,11 +313,10 @@ const std::map<std::string, char> compound_factory::kBaseMap{
class compound_factory_impl : public std::enable_shared_from_this<compound_factory_impl> class compound_factory_impl : public std::enable_shared_from_this<compound_factory_impl>
{ {
public: public:
compound_factory_impl(std::shared_ptr<compound_factory_impl> next); compound_factory_impl();
compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next); compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next);
virtual ~compound_factory_impl() ~compound_factory_impl()
{ {
for (auto c : m_compounds) for (auto c : m_compounds)
delete c; delete c;
...@@ -331,7 +330,7 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto ...@@ -331,7 +330,7 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto
compound *result = nullptr; compound *result = nullptr;
// walk the list, see if any of us has the compound already // walk the list, see if any of the implementations has the compound already
for (auto impl = shared_from_this(); impl; impl = impl->m_next) for (auto impl = shared_from_this(); impl; impl = impl->m_next)
{ {
for (auto cmp : impl->m_compounds) for (auto cmp : impl->m_compounds)
...@@ -363,155 +362,52 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto ...@@ -363,155 +362,52 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto
return result; return result;
} }
std::shared_ptr<compound_factory_impl> next() const std::shared_ptr<compound_factory_impl> next()
{ {
return m_next; return m_next;
} }
bool is_known_peptide(const std::string &resName) void describe(std::ostream &os)
{
return m_known_peptides.count(resName) or
(m_next and m_next->is_known_peptide(resName));
}
bool is_known_base(const std::string &resName)
{ {
return m_known_bases.count(resName) or if (m_file.empty())
(m_next and m_next->is_known_base(resName)); os << "CCD components.cif resource\n";
else
os << "CCD components file: " << std::quoted(m_file.string()) << '\n';
if (m_next)
m_next->describe(os);
} }
protected: private:
virtual compound *create(const std::string &id) compound *create(const std::string &id);
{
// For the base class we assume every compound is preloaded
return nullptr;
}
std::shared_timed_mutex mMutex; std::shared_timed_mutex mMutex;
fs::path m_file;
cif::parser::datablock_index m_index;
std::vector<compound *> m_compounds; std::vector<compound *> m_compounds;
std::set<std::string> m_known_peptides;
std::set<std::string> m_known_bases;
std::set<std::string> m_missing; std::set<std::string> m_missing;
std::shared_ptr<compound_factory_impl> m_next; std::shared_ptr<compound_factory_impl> m_next;
}; };
// -------------------------------------------------------------------- compound_factory_impl::compound_factory_impl()
compound_factory_impl::compound_factory_impl(std::shared_ptr<compound_factory_impl> next)
: m_next(next)
{ {
for (const auto &[key, value] : compound_factory::kAAMap)
m_known_peptides.insert(key);
for (const auto &[key, value] : compound_factory::kBaseMap)
m_known_bases.insert(key);
} }
compound_factory_impl::compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next) compound_factory_impl::compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next)
: m_next(next) : m_file(file)
, m_next(next)
{ {
cif::file cifFile(file);
if (cifFile.contains("comp_list")) // So this is a CCP4 restraints file, special handling
{
auto &compList = cifFile["comp_list"];
auto &chemComp = compList["chem_comp"];
for (const auto &[id, name, group] : chemComp.rows<std::string, std::string, std::string>("id", "name", "group"))
{
std::string type;
// known groups are (counted from ccp4 monomer dictionary)
// D-pyranose
// DNA
// L-PEPTIDE LINKING
// L-SACCHARIDE
// L-peptide
// L-pyranose
// M-peptide
// NON-POLYMER
// P-peptide
// RNA
// furanose
// non-polymer
// non_polymer
// peptide
// pyranose
// saccharide
if (cif::iequals(id, "gly"))
type = "peptide linking";
else if (cif::iequals(group, "l-peptide") or cif::iequals(group, "L-peptide linking") or cif::iequals(group, "peptide") or cif::iequals(group, "p-peptide"))
type = "L-peptide linking";
else if (cif::iequals(group, "DNA"))
type = "DNA linking";
else if (cif::iequals(group, "RNA"))
type = "RNA linking";
else
type = "non-polymer";
auto &db = cifFile["comp_" + id];
m_compounds.push_back(new compound(db, id, name, type, group));
}
}
else
{
// A CCD components file, validate it first
try
{
cifFile.load_dictionary("mmcif_pdbx.dic");
if (not cifFile.is_valid())
{
std::cerr << "The components file " << file << " is not valid\n";
if (cif::VERBOSE < 1)
std::cerr << "(use --verbose to see why)\n";
}
}
catch (const std::exception &e)
{
std::cerr << "When trying to load the components file " << file << " there was an exception:\n"
<< e.what() << '\n';
}
for (auto &db : cifFile)
m_compounds.push_back(new compound(db));
}
} }
// -------------------------------------------------------------------- compound *compound_factory_impl::create(const std::string &id)
// Version for the default compounds, based on the cached components.cif file from CCD
class CCD_compound_factory_impl : public compound_factory_impl
{
public:
CCD_compound_factory_impl(std::shared_ptr<compound_factory_impl> next, const fs::path &file)
: compound_factory_impl(next)
, mCompoundsFile(file)
{
}
CCD_compound_factory_impl(std::shared_ptr<compound_factory_impl> next)
: compound_factory_impl(next)
{
}
compound *create(const std::string &id) override;
cif::parser::datablock_index mIndex;
fs::path mCompoundsFile;
};
compound *CCD_compound_factory_impl::create(const std::string &id)
{ {
compound *result = nullptr; compound *result = nullptr;
std::unique_ptr<std::istream> ccd; std::unique_ptr<std::istream> ccd;
if (mCompoundsFile.empty()) if (m_file.empty())
{ {
ccd = cif::load_resource("components.cif"); ccd = cif::load_resource("components.cif");
if (not ccd) if (not ccd)
...@@ -521,11 +417,11 @@ compound *CCD_compound_factory_impl::create(const std::string &id) ...@@ -521,11 +417,11 @@ compound *CCD_compound_factory_impl::create(const std::string &id)
} }
} }
else else
ccd.reset(new std::ifstream(mCompoundsFile)); ccd.reset(new std::ifstream(m_file));
cif::file file; cif::file file;
if (mIndex.empty()) if (m_index.empty())
{ {
if (cif::VERBOSE > 1) if (cif::VERBOSE > 1)
{ {
...@@ -535,20 +431,20 @@ compound *CCD_compound_factory_impl::create(const std::string &id) ...@@ -535,20 +431,20 @@ compound *CCD_compound_factory_impl::create(const std::string &id)
} }
cif::parser parser(*ccd, file); cif::parser parser(*ccd, file);
mIndex = parser.index_datablocks(); m_index = parser.index_datablocks();
if (cif::VERBOSE > 1) if (cif::VERBOSE > 1)
std::cout << " done" << std::endl; std::cout << " done" << std::endl;
// reload the resource, perhaps this should be improved... // reload the resource, perhaps this should be improved...
if (mCompoundsFile.empty()) if (m_file.empty())
{ {
ccd = cif::load_resource("components.cif"); ccd = cif::load_resource("components.cif");
if (not ccd) if (not ccd)
throw std::runtime_error("Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data."); throw std::runtime_error("Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data.");
} }
else else
ccd.reset(new std::ifstream(mCompoundsFile)); ccd.reset(new std::ifstream(m_file));
} }
if (cif::VERBOSE > 1) if (cif::VERBOSE > 1)
...@@ -558,7 +454,7 @@ compound *CCD_compound_factory_impl::create(const std::string &id) ...@@ -558,7 +454,7 @@ compound *CCD_compound_factory_impl::create(const std::string &id)
} }
cif::parser parser(*ccd, file); cif::parser parser(*ccd, file);
parser.parse_single_datablock(id, mIndex); parser.parse_single_datablock(id, m_index);
if (cif::VERBOSE > 1) if (cif::VERBOSE > 1)
std::cout << " done" << std::endl; std::cout << " done" << std::endl;
...@@ -575,107 +471,6 @@ compound *CCD_compound_factory_impl::create(const std::string &id) ...@@ -575,107 +471,6 @@ compound *CCD_compound_factory_impl::create(const std::string &id)
} }
} }
if (result == nullptr and cif::VERBOSE > 0)
std::cerr << "Could not locate compound " << id << " in the CCD components file\n";
return result;
}
// --------------------------------------------------------------------
// Version for the default compounds, based on the data found in CCP4's monomers lib
class CCP4_compound_factory_impl : public compound_factory_impl
{
public:
CCP4_compound_factory_impl(const fs::path &clibd_mon, std::shared_ptr<compound_factory_impl> next = nullptr);
compound *create(const std::string &id) override;
private:
fs::path m_CLIBD_MON;
};
CCP4_compound_factory_impl::CCP4_compound_factory_impl(const fs::path &clibd_mon, std::shared_ptr<compound_factory_impl> next)
: compound_factory_impl(next)
, m_CLIBD_MON(clibd_mon)
{
const std::regex peptideRx("(?:[lmp]-)?peptide", std::regex::icase);
cif::file file(m_CLIBD_MON / "list" / "mon_lib_list.cif");
auto &chemComps = file["comp_list"]["chem_comp"];
for (const auto &[group, comp_id] : chemComps.rows<std::string, std::string>("group", "id"))
{
if (std::regex_match(group, peptideRx))
m_known_peptides.insert(comp_id);
else if (cif::iequals(group, "DNA") or cif::iequals(group, "RNA"))
m_known_bases.insert(comp_id);
}
}
compound *CCP4_compound_factory_impl::create(const std::string &id)
{
compound *result = nullptr;
fs::path resFile = m_CLIBD_MON / cif::to_lower_copy(id.substr(0, 1)) / (id + ".cif");
if (not fs::exists(resFile) and (id == "COM" or id == "CON" or "PRN")) // seriously...
resFile = m_CLIBD_MON / cif::to_lower_copy(id.substr(0, 1)) / (id + '_' + id + ".cif");
if (fs::exists(resFile))
{
cif::file cf(resFile.string());
auto &db_list = cf["comp_list"];
auto list = db_list["chem_comp"];
if (list.size() == 1)
{
std::string name, group;
uint32_t numberAtomsAll, numberAtomsNh;
cif::tie(name, group, numberAtomsAll, numberAtomsNh) =
list.front().get("name", "group", "number_atoms_all", "number_atoms_nh");
// locate the datablock
auto &db = cf["comp_" + id];
std::string type;
// known groups are (counted from ccp4 monomer dictionary)
// D-pyranose
// DNA
// L-PEPTIDE LINKING
// L-SACCHARIDE
// L-peptide
// L-pyranose
// M-peptide
// NON-POLYMER
// P-peptide
// RNA
// furanose
// non-polymer
// non_polymer
// peptide
// pyranose
// saccharide
if (cif::iequals(id, "gly"))
type = "peptide linking";
else if (cif::iequals(group, "l-peptide") or cif::iequals(group, "L-peptide linking") or cif::iequals(group, "peptide") or cif::iequals(group, "p-peptide"))
type = "L-peptide linking";
else if (cif::iequals(group, "DNA"))
type = "DNA linking";
else if (cif::iequals(group, "RNA"))
type = "RNA linking";
else
type = "non-polymer";
m_compounds.push_back(new compound(db, id, name, type, group));
result = m_compounds.back();
}
}
return result; return result;
} }
...@@ -695,15 +490,9 @@ compound_factory::compound_factory() ...@@ -695,15 +490,9 @@ compound_factory::compound_factory()
{ {
auto ccd = cif::load_resource("components.cif"); auto ccd = cif::load_resource("components.cif");
if (ccd) if (ccd)
m_impl = std::make_shared<CCD_compound_factory_impl>(m_impl); m_impl = std::make_shared<compound_factory_impl>();
else if (cif::VERBOSE > 0)
std::cerr << "CCD components.cif file was not found\n";
const char *clibd_mon = getenv("CLIBD_MON");
if (clibd_mon != nullptr and fs::is_directory(clibd_mon))
m_impl = std::make_shared<CCP4_compound_factory_impl>(clibd_mon, m_impl);
else if (cif::VERBOSE > 0) else if (cif::VERBOSE > 0)
std::cerr << "CCP4 monomers library not found, CLIBD_MON is not defined\n"; std::cerr << "CCD components.cif resource was not found\n";
} }
compound_factory::~compound_factory() compound_factory::~compound_factory()
...@@ -741,7 +530,7 @@ void compound_factory::set_default_dictionary(const fs::path &inDictFile) ...@@ -741,7 +530,7 @@ void compound_factory::set_default_dictionary(const fs::path &inDictFile)
try try
{ {
m_impl.reset(new CCD_compound_factory_impl(m_impl, inDictFile)); m_impl.reset(new compound_factory_impl(inDictFile, m_impl));
} }
catch (const std::exception &) catch (const std::exception &)
{ {
...@@ -772,17 +561,50 @@ void compound_factory::pop_dictionary() ...@@ -772,17 +561,50 @@ void compound_factory::pop_dictionary()
const compound *compound_factory::create(std::string id) const compound *compound_factory::create(std::string id)
{ {
return m_impl ? m_impl->get(id) : nullptr; auto result = m_impl ? m_impl->get(id) : nullptr;
if (not result)
report_missing_compound(id);
return result;
} }
bool compound_factory::is_known_peptide(const std::string &resName) const bool compound_factory::is_known_peptide(const std::string &resName) const
{ {
return m_impl ? m_impl->is_known_peptide(resName) : kAAMap.count(resName) > 0; return kAAMap.count(resName) > 0;
} }
bool compound_factory::is_known_base(const std::string &resName) const bool compound_factory::is_known_base(const std::string &resName) const
{ {
return m_impl ? m_impl->is_known_base(resName) : kBaseMap.count(resName) > 0; return kBaseMap.count(resName) > 0;
}
void compound_factory::report_missing_compound(const std::string &compound_id)
{
static bool s_reported = false;
if (std::exchange(s_reported, true) == false)
{
using namespace cif::colour;
std::clog << "\n" << cif::coloured("Configuration error:", white, red) << "\n\n"
<< "The attempt to retrieve compound information for " << std::quoted(compound_id) << " failed.\n\n"
<< "This information is searched for in a CCD file called components.cif or components.cif.gz\n"
<< "which should be located in one of the following directories:\n\n";
cif::list_data_directories(std::clog);
std::clog << "\n(Note that you can add a directory to the search paths by setting the LIBCIFPP_DATA_DIR environmental variable)\n\n";
if (m_impl)
{
std::clog << "The current order of compound factory objects is:\n\n";
m_impl->describe(std::clog);
}
else
std::clog << "No compound factory objects are created since none of the data sources is found.\n";
cif::list_file_resources(std::clog);
std::clog.flush();
}
} }
} // namespace cif } // namespace cif
...@@ -608,6 +608,9 @@ sac_parser::datablock_index sac_parser::index_datablocks() ...@@ -608,6 +608,9 @@ sac_parser::datablock_index sac_parser::index_datablocks()
std::string::size_type si = 0; std::string::size_type si = 0;
std::string datablock; std::string datablock;
// Seek to beginning of file
m_source.pubseekpos(0);
for (auto ch = m_source.sbumpc(); ch != std::streambuf::traits_type::eof(); ch = m_source.sbumpc()) for (auto ch = m_source.sbumpc(); ch != std::streambuf::traits_type::eof(); ch = m_source.sbumpc())
{ {
switch (state) switch (state)
...@@ -667,7 +670,7 @@ sac_parser::datablock_index sac_parser::index_datablocks() ...@@ -667,7 +670,7 @@ sac_parser::datablock_index sac_parser::index_datablocks()
case data_name: case data_name:
if (is_non_blank(ch)) if (is_non_blank(ch))
datablock.insert(datablock.end(), char(ch)); datablock.insert(datablock.end(), (char)std::toupper(ch));
else if (is_space(ch)) else if (is_space(ch))
{ {
if (not datablock.empty()) if (not datablock.empty())
......
...@@ -5146,7 +5146,7 @@ void PDBFileParser::ParseConnectivtyAnnotation() ...@@ -5146,7 +5146,7 @@ void PDBFileParser::ParseConnectivtyAnnotation()
getCategory("struct_conn")->emplace({ getCategory("struct_conn")->emplace({
{ "id", type + std::to_string(linkNr) }, { "id", type + std::to_string(linkNr) },
{ "conn_type_id", type }, { "conn_type_id", type },
// { "ccp4_link_id", ccp4LinkID }, // { "ccp4_link_id", ccp4LinkID },
......
...@@ -845,6 +845,9 @@ class resource_pool ...@@ -845,6 +845,9 @@ class resource_pool
std::unique_ptr<std::istream> load(fs::path name); std::unique_ptr<std::istream> load(fs::path name);
const auto data_directories() { return mDirs; }
const auto file_resources() { return mLocalResources; }
private: private:
resource_pool(); resource_pool();
...@@ -937,4 +940,22 @@ std::unique_ptr<std::istream> load_resource(std::filesystem::path name) ...@@ -937,4 +940,22 @@ std::unique_ptr<std::istream> load_resource(std::filesystem::path name)
return resource_pool::instance().load(name); return resource_pool::instance().load(name);
} }
void list_file_resources(std::ostream &os)
{
auto &file_resources = resource_pool::instance().file_resources();
if (not file_resources.empty())
{
os << "\nThe following named resources were loaded:\n";
for (const auto &[name, path] : file_resources)
os << name << " -> " << std::quoted(path.string()) << '\n';
}
}
void list_data_directories(std::ostream &os)
{
for (auto &p : resource_pool::instance().data_directories())
os << p << '\n';
}
} // namespace cif } // namespace cif
...@@ -3483,3 +3483,11 @@ ATOM 7 CD PRO A 1 15.762 13.216 43.724 1.00 30.71 C)" ...@@ -3483,3 +3483,11 @@ ATOM 7 CD PRO A 1 15.762 13.216 43.724 1.00 30.71 C)"
auto f = cif::pdb::read(is); auto f = cif::pdb::read(is);
} }
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(compound_not_found_test_1)
{
auto cmp = cif::compound_factory::instance().create("&&&");
BOOST_CHECK(cmp == nullptr);
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment