Commit 1ae3cf7b by Maarten L. Hekkelman

moving towards using CCD, parsing single datablock

parent 915d6504
...@@ -218,7 +218,7 @@ $(1)-test: test/$(1)-test ...@@ -218,7 +218,7 @@ $(1)-test: test/$(1)-test
endef endef
TESTS = unit # pdb2cif TESTS = unit pdb2cif
$(foreach part,$(TESTS),$(eval $(call TEST_template,$(part)))) $(foreach part,$(TESTS),$(eval $(call TEST_template,$(part))))
......
...@@ -1474,7 +1474,8 @@ Some influential environment variables: ...@@ -1474,7 +1474,8 @@ Some influential environment variables:
DEBUG Build a debug version of the application DEBUG Build a debug version of the application
LT_SYS_LIBRARY_PATH LT_SYS_LIBRARY_PATH
User-defined run-time library search path. User-defined run-time library search path.
CCP4 The location where CCP4 is installed CCP4 The location where CCP4 is installed. This is only required if
you want to rebuild the symmetry operations table.
MRC Specify a location for the mrc executable MRC Specify a location for the mrc executable
DATA_LIB_DIR DATA_LIB_DIR
The location where to store packaged dictionary files The location where to store packaged dictionary files
......
...@@ -53,7 +53,7 @@ dnl and now for the semantic version ...@@ -53,7 +53,7 @@ dnl and now for the semantic version
LIBCIF_SEMANTIC_VERSION=libcifpp_version LIBCIF_SEMANTIC_VERSION=libcifpp_version
AC_SUBST(LIBCIF_SEMANTIC_VERSION) AC_SUBST(LIBCIF_SEMANTIC_VERSION)
AC_ARG_VAR([CCP4], [The location where CCP4 is installed]) AC_ARG_VAR([CCP4], [The location where CCP4 is installed. CCP4 is only required if you want to rebuild the symmetry operations table.])
AX_MRC AX_MRC
...@@ -88,11 +88,11 @@ AX_IOSTREAMS_BZ2 ...@@ -88,11 +88,11 @@ AX_IOSTREAMS_BZ2
AC_CHECK_LIB([atomic], [atomic_flag_clear]) AC_CHECK_LIB([atomic], [atomic_flag_clear])
dnl Set output variables for the various directories dnl Set output variables for the various directories
AC_ARG_VAR([DATA_LIB_DIR], [The location where to store packaged dictionary files]) AC_ARG_VAR([DATA_LIB_DIR], [The location where to store packaged dictionary and CCD files])
AC_SUBST([DATA_LIB_DIR]) AC_SUBST([DATA_LIB_DIR])
DATA_LIB_DIR=$datadir/libcifpp DATA_LIB_DIR=$datadir/libcifpp
AC_ARG_VAR([DATA_CACHE_DIR], [The location where to store cached dictionary files]) AC_ARG_VAR([DATA_CACHE_DIR], [The location where to store cached dictionary and CCD files])
AC_SUBST([DATA_CACHE_DIR]) AC_SUBST([DATA_CACHE_DIR])
DATA_CACHE_DIR=/var/cache/libcifpp DATA_CACHE_DIR=/var/cache/libcifpp
......
...@@ -1922,6 +1922,9 @@ class File ...@@ -1922,6 +1922,9 @@ class File
void load(std::istream& is); void load(std::istream& is);
void save(std::ostream& os); void save(std::ostream& os);
/// \brief Load only the data block \a datablock from the mmCIF file
void load(std::istream& is, const std::string& datablock);
void save(std::ostream& os, const std::vector<std::string>& order) { write(os, order); } void save(std::ostream& os, const std::vector<std::string>& order) { write(os, order); }
void write(std::ostream& os, const std::vector<std::string>& order); void write(std::ostream& os, const std::vector<std::string>& order);
......
...@@ -101,7 +101,7 @@ std::tuple<std::string,std::string> splitTagName(const std::string& tag); ...@@ -101,7 +101,7 @@ std::tuple<std::string,std::string> splitTagName(const std::string& tag);
class SacParser class SacParser
{ {
public: public:
SacParser(std::istream& is); SacParser(std::istream& is, bool init = true);
virtual ~SacParser() {} virtual ~SacParser() {}
enum CIFToken enum CIFToken
...@@ -142,6 +142,8 @@ class SacParser ...@@ -142,6 +142,8 @@ class SacParser
CIFToken getNextToken(); CIFToken getNextToken();
void match(CIFToken token); void match(CIFToken token);
bool parseFile(const std::string& datablock);
void parseFile(); void parseFile();
void parseGlobal(); void parseGlobal();
void parseDataBlock(); void parseDataBlock();
......
...@@ -98,7 +98,6 @@ class Compound ...@@ -98,7 +98,6 @@ class Compound
{ {
public: public:
Compound(cif::Datablock &db); Compound(cif::Datablock &db);
~Compound();
/// \brief factory method, create a Compound based on the three letter code /// \brief factory method, create a Compound based on the three letter code
/// (for amino acids) or the one-letter code (for bases) or the /// (for amino acids) or the one-letter code (for bases) or the
...@@ -106,21 +105,6 @@ class Compound ...@@ -106,21 +105,6 @@ class Compound
static const Compound *create(const std::string &id); static const Compound *create(const std::string &id);
// /// this second factory method can create a Compound even if it is not
// /// recorded in the library. It will take the values from the CCP4 lib
// /// unless the value passed to this function is not empty.
// static const Compound* create(const std::string& id, const std::string& name,
// const std::string& type, const std::string& formula);
/// \brief Create compounds based on the data in the file \a components
///
/// It is often required to add information about unknown components.
/// This file parses either a CCP4 or a CCD formatted components file
///
/// \param components The mmCIF file containing the components
/// \result An array containing the ID's of the added components
static std::vector<std::string> addExtraComponents(const std::filesystem::path &components);
// accessors // accessors
std::string id() const { return mID; } std::string id() const { return mID; }
......
...@@ -3067,6 +3067,21 @@ void File::load(std::istream& is) ...@@ -3067,6 +3067,21 @@ void File::load(std::istream& is)
} }
} }
void File::load(std::istream& is, const std::string& datablock)
{
Validator* saved = mValidator;
setValidator(nullptr);
Parser p(is, *this);
p.parseFile(datablock);
if (saved != nullptr)
{
setValidator(saved);
(void)isValid();
}
}
void File::save(std::ostream& os) void File::save(std::ostream& os)
{ {
Datablock* e = mHead; Datablock* e = mHead;
......
...@@ -84,13 +84,15 @@ const char* SacParser::kValueName[] = { ...@@ -84,13 +84,15 @@ const char* SacParser::kValueName[] = {
// -------------------------------------------------------------------- // --------------------------------------------------------------------
SacParser::SacParser(std::istream& is) SacParser::SacParser(std::istream& is, bool init)
: mData(is) : mData(is)
{ {
mValidate = true; mValidate = true;
mLineNr = 1; mLineNr = 1;
mBol = true; mBol = true;
mLookahead = getNextToken();
if (init)
mLookahead = getNextToken();
} }
void SacParser::error(const std::string& msg) void SacParser::error(const std::string& msg)
...@@ -521,6 +523,90 @@ SacParser::CIFToken SacParser::getNextToken() ...@@ -521,6 +523,90 @@ SacParser::CIFToken SacParser::getNextToken()
return result; return result;
} }
bool SacParser::parseFile(const std::string& datablock)
{
// first locate the start, as fast as we can
auto &sb = *mData.rdbuf();
enum {
start, comment, string, string_quote, qstring, data
} state = start;
int quote = 0;
bool bol = true;
std::string dblk = "data_" + datablock;
std::string::size_type si = 0;
bool found = false;
while (sb.in_avail() > 0 and not found)
{
int ch = sb.sbumpc();
switch (state)
{
case start:
switch (ch)
{
case '#': state = comment; break;
case 'd':
case 'D':
state = data;
si = 1;
break;
case '\'':
case '"':
state = string;
quote = ch;
break;
case ';':
if (bol)
state = qstring;
break;
}
break;
case comment:
if (ch == '\n')
state = start;
break;
case string:
if (ch == quote)
state = string_quote;
break;
case string_quote:
if (std::isspace(ch))
state = start;
else
state = string;
break;
case qstring:
if (ch == ';' and bol)
state = start;
break;
case data:
if (isspace(ch) and dblk[si] == 0)
found = true;
else if (dblk[si++] != ch)
state = start;
break;
}
bol = (ch == '\n');
}
if (found)
{
produceDatablock(datablock);
mLookahead = getNextToken();
parseDataBlock();
}
return found;
}
void SacParser::parseFile() void SacParser::parseFile()
{ {
while (mLookahead != eCIFTokenEOF) while (mLookahead != eCIFTokenEOF)
......
...@@ -106,7 +106,6 @@ struct CompoundBondLess ...@@ -106,7 +106,6 @@ struct CompoundBondLess
} }
}; };
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// Compound // Compound
...@@ -304,13 +303,13 @@ class CompoundFactoryImpl ...@@ -304,13 +303,13 @@ class CompoundFactoryImpl
CompoundFactoryImpl(const std::string &file, CompoundFactoryImpl *next); CompoundFactoryImpl(const std::string &file, CompoundFactoryImpl *next);
~CompoundFactoryImpl() virtual ~CompoundFactoryImpl()
{ {
delete mNext; delete mNext;
} }
Compound *get(std::string id); Compound *get(std::string id);
Compound *create(std::string id); virtual Compound *create(std::string id);
CompoundFactoryImpl *pop() CompoundFactoryImpl *pop()
{ {
...@@ -335,7 +334,6 @@ class CompoundFactoryImpl ...@@ -335,7 +334,6 @@ class CompoundFactoryImpl
private: private:
std::shared_timed_mutex mMutex; std::shared_timed_mutex mMutex;
std::string mPath;
std::vector<Compound *> mCompounds; std::vector<Compound *> mCompounds;
std::set<std::string> mKnownPeptides; std::set<std::string> mKnownPeptides;
std::set<std::string> mKnownBases; std::set<std::string> mKnownBases;
...@@ -355,8 +353,7 @@ CompoundFactoryImpl::CompoundFactoryImpl() ...@@ -355,8 +353,7 @@ CompoundFactoryImpl::CompoundFactoryImpl()
} }
CompoundFactoryImpl::CompoundFactoryImpl(const std::string &file, CompoundFactoryImpl *next) CompoundFactoryImpl::CompoundFactoryImpl(const std::string &file, CompoundFactoryImpl *next)
: mPath(file) : mNext(next)
, mNext(next)
{ {
cif::File cifFile(file); cif::File cifFile(file);
if (not cifFile.isValid()) if (not cifFile.isValid())
...@@ -450,6 +447,48 @@ Compound *CompoundFactoryImpl::create(std::string id) ...@@ -450,6 +447,48 @@ Compound *CompoundFactoryImpl::create(std::string id)
} }
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// Version for the default compounds, based on the cached components.cif file from CCD
class CCDCompoundFactoryImpl : public CompoundFactoryImpl
{
public:
CCDCompoundFactoryImpl() {}
Compound *create(std::string id) override;
};
Compound *CCDCompoundFactoryImpl::create(std::string id)
{
ba::to_upper(id);
Compound *result = get(id);
auto ccd = cif::loadResource("components.cif");
if (not ccd)
throw std::runtime_error("Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-dictionary-script to fetch the data.");
if (cif::VERBOSE)
{
std::cout << "Loading component " << id << "...";
std::cout.flush();
}
cif::File file;
file.load(*ccd, id);
if (cif::VERBOSE)
std::cout << " done" << std::endl;
auto &db = file.firstDatablock();
if (db.getName() == id)
result = new Compound(db);
else if (cif::VERBOSE)
std::cerr << "Could not locate compound " << id << " in the CCD components file" << std::endl;
return result;
}
// --------------------------------------------------------------------
CompoundFactory *CompoundFactory::sInstance; CompoundFactory *CompoundFactory::sInstance;
thread_local std::unique_ptr<CompoundFactory> CompoundFactory::tlInstance; thread_local std::unique_ptr<CompoundFactory> CompoundFactory::tlInstance;
...@@ -461,7 +500,7 @@ void CompoundFactory::init(bool useThreadLocalInstanceOnly) ...@@ -461,7 +500,7 @@ void CompoundFactory::init(bool useThreadLocalInstanceOnly)
} }
CompoundFactory::CompoundFactory() CompoundFactory::CompoundFactory()
: mImpl(nullptr) : mImpl(new CCDCompoundFactoryImpl)
{ {
} }
...@@ -544,12 +583,4 @@ bool CompoundFactory::isKnownBase(const std::string &resName) const ...@@ -544,12 +583,4 @@ bool CompoundFactory::isKnownBase(const std::string &resName) const
return mImpl->isKnownBase(resName); return mImpl->isKnownBase(resName);
} }
// --------------------------------------------------------------------
std::vector<std::string> Compound::addExtraComponents(const std::filesystem::path &components)
{
}
} // namespace mmcif } // namespace mmcif
...@@ -21,22 +21,27 @@ if ! [ -d @DATA_CACHE_DIR@ ]; then ...@@ -21,22 +21,27 @@ if ! [ -d @DATA_CACHE_DIR@ ]; then
exit exit
fi fi
# fetch the dictionary fetch_dictionary () {
dict=$1
source=$2
dict=@DATA_CACHE_DIR@/mmcif_pdbx_v50.dic wget -O${dict}.gz ${source}
source=https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic.gz
wget -O${dict}.gz ${source} # be careful not to nuke an existing dictionary file
# extract to a temporary file first
# be careful not to nuke an existing dictionary file gunzip -c ${dict}.gz > ${dict}-tmp
# extract to a temporary file first
gunzip -c ${dict}.gz > ${dict}-tmp # then move the extracted file to the final location
# then move the extracted file to the final location mv ${dict}-tmp ${dict}
mv ${dict}-tmp ${dict} # and clean up afterwards
# and clean up afterwards rm ${dict}.gz
}
rm ${dict}.gz # fetch the dictionaries
fetch_dictionary "@DATA_CACHE_DIR@/mmcif_pdbx_v50.dic" "https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic.gz"
fetch_dictionary "@DATA_CACHE_DIR@/components.cif" "ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment