Commit 2e2fc11f by Maarten L. Hekkelman

renaming, first steps

parent d44ed57c
...@@ -231,7 +231,7 @@ $(1)-test: test/$(1)-test ...@@ -231,7 +231,7 @@ $(1)-test: test/$(1)-test
endef endef
TESTS = unit pdb2cif TESTS = unit pdb2cif rename-compound
$(foreach part,$(TESTS),$(eval $(call TEST_template,$(part)))) $(foreach part,$(TESTS),$(eval $(call TEST_template,$(part))))
......
...@@ -38,7 +38,7 @@ LT_INIT([disable-shared pic-only]) ...@@ -38,7 +38,7 @@ LT_INIT([disable-shared pic-only])
AC_SUBST(LIBTOOL_DEPS) AC_SUBST(LIBTOOL_DEPS)
dnl versioning, first for libtool dnl versioning, for libtool
LIBCIF_CURRENT=1 LIBCIF_CURRENT=1
LIBCIF_REVISION=1 LIBCIF_REVISION=1
LIBCIF_AGE=1 LIBCIF_AGE=1
......
...@@ -1881,6 +1881,12 @@ class Category ...@@ -1881,6 +1881,12 @@ class Category
void reorderByIndex(); void reorderByIndex();
void sort(std::function<int(const Row&, const Row&)> comparator); void sort(std::function<int(const Row&, const Row&)> comparator);
// --------------------------------------------------------------------
// generate a new, unique ID. Pass it an ID generating function based on
// a sequence number. This function will be called until the result is
// unique in the context of this category
std::string getUniqueID(std::function<std::string(int)> generator = cif::cifIdForNumber);
private: private:
void write(std::ostream& os); void write(std::ostream& os);
......
...@@ -92,6 +92,11 @@ inline char tolower(char ch) ...@@ -92,6 +92,11 @@ inline char tolower(char ch)
std::tuple<std::string,std::string> splitTagName(const std::string& tag); std::tuple<std::string,std::string> splitTagName(const std::string& tag);
// -------------------------------------------------------------------- // --------------------------------------------------------------------
// generate a cif name, mainly used to generate asym_id's
std::string cifIdForNumber(int number);
// --------------------------------------------------------------------
// custom wordwrapping routine // custom wordwrapping routine
std::vector<std::string> wordWrap(const std::string& text, unsigned int width); std::vector<std::string> wordWrap(const std::string& text, unsigned int width);
......
...@@ -445,7 +445,8 @@ class Structure ...@@ -445,7 +445,8 @@ class Structure
Atom getAtomByLabel(const std::string& atomID, const std::string& asymID, Atom getAtomByLabel(const std::string& atomID, const std::string& asymID,
const std::string& compID, int seqID, const std::string& altID = ""); const std::string& compID, int seqID, const std::string& altID = "");
const Residue& getResidue(const std::string& asymID, const std::string& compID, int seqID) const; /// \brief Get a residue, if \a seqID is zero, the non-polymers are searched
const Residue& getResidue(const std::string& asymID, const std::string& compID, int seqID = 0) const;
// map between auth and label locations // map between auth and label locations
......
...@@ -1385,6 +1385,23 @@ void Category::sort(std::function<int(const Row&, const Row&)> comparator) ...@@ -1385,6 +1385,23 @@ void Category::sort(std::function<int(const Row&, const Row&)> comparator)
assert(size() == rows.size()); assert(size() == rows.size());
} }
std::string Category::getUniqueID(std::function<std::string(int)> generator)
{
using namespace cif::literals;
int nr = size() + 1;
for (;;)
{
std::string result = generator(nr++);
if (exists("id"_key == result))
continue;
return result;
}
}
size_t Category::size() const size_t Category::size() const
{ {
size_t result = 0; size_t result = 0;
......
...@@ -215,6 +215,37 @@ std::tuple<std::string, std::string> splitTagName(const std::string &tag) ...@@ -215,6 +215,37 @@ std::tuple<std::string, std::string> splitTagName(const std::string &tag)
} }
// -------------------------------------------------------------------- // --------------------------------------------------------------------
std::string cifIdForNumber(int number)
{
std::string result;
if (number >= 26 * 26 * 26)
result = 'L' + std::to_string(number);
else
{
if (number >= 26 * 26)
{
int v = number / (26 * 26);
result += 'A' - 1 + v;
number %= (26 * 26);
}
if (number >= 26)
{
int v = number / 26;
result += 'A' - 1 + v;
number %= 26;
}
result += 'A' + number;
}
assert(not result.empty());
return result;
}
// --------------------------------------------------------------------
// Simplified line breaking code taken from a decent text editor. // Simplified line breaking code taken from a decent text editor.
// In this case, simplified means it only supports ASCII. // In this case, simplified means it only supports ASCII.
......
...@@ -566,21 +566,19 @@ class PDBFileParser ...@@ -566,21 +566,19 @@ class PDBFileParser
if (not result.empty() and result.back() != ']') if (not result.empty() and result.back() != ']')
result += '-'; result += '-';
if (sugar->c1.resName == "MAN") result += "alpha-D-mannopyranose"; auto compound = CompoundFactory::instance().create(sugar->c1.resName);
if (compound)
result += compound->name();
else if (sugar->c1.resName == "MAN") result += "alpha-D-mannopyranose";
else if (sugar->c1.resName == "BMA") result += "beta-D-mannopyranose"; else if (sugar->c1.resName == "BMA") result += "beta-D-mannopyranose";
else if (sugar->c1.resName == "NAG") result += "2-acetamido-2-deoxy-beta-D-glucopyranose"; else if (sugar->c1.resName == "NAG") result += "2-acetamido-2-deoxy-beta-D-glucopyranose";
else if (sugar->c1.resName == "NDG") result += "2-acetamido-2-deoxy-alpha-D-glucopyranose"; else if (sugar->c1.resName == "NDG") result += "2-acetamido-2-deoxy-alpha-D-glucopyranose";
else if (sugar->c1.resName == "FUC") result += "alpha-L-fucopyranose"; else if (sugar->c1.resName == "FUC") result += "alpha-L-fucopyranose";
else if (sugar->c1.resName == "FUL") result += "beta-L-fucopyranose"; else if (sugar->c1.resName == "FUL") result += "beta-L-fucopyranose";
else else
{ result += sugar->c1.resName;
auto compound = CompoundFactory::instance().create(sugar->c1.resName);
if (compound)
result += compound->name();
else
result += sugar->c1.resName;
}
return result; return result;
} }
...@@ -952,35 +950,6 @@ class PDBFileParser ...@@ -952,35 +950,6 @@ class PDBFileParser
return c; return c;
} }
std::string cifIdForInt(int nr) const
{
std::string result;
if (nr >= 26 * 26 * 26)
result = 'L' + std::to_string(nr);
else
{
if (nr >= 26 * 26)
{
int v = nr / (26 * 26);
result += 'A' - 1 + v;
nr %= (26 * 26);
}
if (nr >= 26)
{
int v = nr / 26;
result += 'A' - 1 + v;
nr %= 26;
}
result += 'A' + nr;
}
assert(not result.empty());
return result;
}
std::vector<char> altLocsForAtom(char chainID, int seqNum, char iCode, std::string atomName); std::vector<char> altLocsForAtom(char chainID, int seqNum, char iCode, std::string atomName);
void MapChainID2AsymIDS(char chainID, std::vector<std::string>& asymIds); void MapChainID2AsymIDS(char chainID, std::vector<std::string>& asymIds);
...@@ -1484,7 +1453,7 @@ void PDBFileParser::ParseTitle() ...@@ -1484,7 +1453,7 @@ void PDBFileParser::ParseTitle()
// 37 - 40 ... // 37 - 40 ...
std::string old = vS(22, 25); std::string old = vS(22, 25);
std::string date = pdb2cifDate(vS(12, 20)); std::string date = pdb2cifDate(vS(12, 20));
cat = getCategory("pdbx_database_PDB_obs"); cat = getCategory("pdbx_database_PDB_obs");
...@@ -1506,7 +1475,7 @@ void PDBFileParser::ParseTitle() ...@@ -1506,7 +1475,7 @@ void PDBFileParser::ParseTitle()
Match("TITLE ", false); Match("TITLE ", false);
std::string title; std::string title;
if (mRec->is("TITLE ")) // 1 - 6 Record name "TITLE " if (mRec->is("TITLE ")) // 1 - 6 Record name "TITLE "
{ // 9 - 10 Continuation continuation Allows concatenation of multiple records. { // 9 - 10 Continuation continuation Allows concatenation of multiple records.
title = vS(11); // 11 - 80 String title Title of the experiment. title = vS(11); // 11 - 80 String title Title of the experiment.
GetNextRecord(); GetNextRecord();
} }
...@@ -3770,7 +3739,7 @@ void PDBFileParser::ConstructEntities() ...@@ -3770,7 +3739,7 @@ void PDBFileParser::ConstructEntities()
int asymNr = 0; int asymNr = 0;
for (auto& chain: mChains) for (auto& chain: mChains)
{ {
std::string asymID = cifIdForInt(asymNr++); std::string asymID = cif::cifIdForNumber(asymNr++);
std::string entityID = mMolID2EntityID[chain.mMolID]; std::string entityID = mMolID2EntityID[chain.mMolID];
mAsymID2EntityID[asymID] = entityID; mAsymID2EntityID[asymID] = entityID;
...@@ -4182,7 +4151,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4182,7 +4151,7 @@ void PDBFileParser::ConstructEntities()
if (ih != chain.mSeqres.end()) if (ih != chain.mSeqres.end())
continue; continue;
heti.asymID = cifIdForInt(asymNr++); heti.asymID = cif::cifIdForNumber(asymNr++);
} }
std::set<std::string> writtenAsyms; std::set<std::string> writtenAsyms;
...@@ -4261,7 +4230,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4261,7 +4230,7 @@ void PDBFileParser::ConstructEntities()
{ {
if (waterChains.count(het.chainID) == 0) if (waterChains.count(het.chainID) == 0)
{ {
asymID = cifIdForInt(asymNr++); asymID = cif::cifIdForNumber(asymNr++);
waterChains[het.chainID] = asymID; waterChains[het.chainID] = asymID;
} }
else else
...@@ -4604,7 +4573,7 @@ void PDBFileParser::ConstructSugarTrees(int& asymNr) ...@@ -4604,7 +4573,7 @@ void PDBFileParser::ConstructSugarTrees(int& asymNr)
// create an asym for this sugar tree // create an asym for this sugar tree
std::string asymID = cifIdForInt(asymNr++); std::string asymID = cif::cifIdForNumber(asymNr++);
getCategory("struct_asym")->emplace({ getCategory("struct_asym")->emplace({
{ "id", asymID }, { "id", asymID },
......
...@@ -2237,13 +2237,51 @@ void Structure::moveAtom(Atom& a, Point p) ...@@ -2237,13 +2237,51 @@ void Structure::moveAtom(Atom& a, Point p)
void Structure::changeResidue(const Residue& res, const std::string& newCompound, void Structure::changeResidue(const Residue& res, const std::string& newCompound,
const std::vector<std::tuple<std::string,std::string>>& remappedAtoms) const std::vector<std::tuple<std::string,std::string>>& remappedAtoms)
{ {
using namespace cif::literals;
const auto compound = Compound::create(newCompound);
if (not compound)
throw std::runtime_error("Unknown compound " + newCompound);
cif::Datablock& db = *mFile.impl().mDb; cif::Datablock& db = *mFile.impl().mDb;
std::string asymID = res.asymID();
std::string entityID; std::string entityID;
std::tie(entityID) = db["struct_asym"].find1<std::string>("id"_key == asymID, { "entity_id" });
// First make sure the compound is already known or insert it. // First make sure the compound is already known or insert it.
// And if the residue is an entity, we must make sure it exists // And if the residue is an entity, we must make sure it exists
insertCompound(newCompound, res.isEntity()); insertCompound(newCompound, res.isEntity());
// Next, if it is a non-polymer, update the entityID
if (db["pdbx_entity_nonpoly"].exists("entity_id"_key == entityID and "comp_id"_key == res.compoundID()))
{
try
{
std::tie(entityID) = db["entity"].find1<std::string>("type"_key == "non-polymer" and "pdbx_description"_key == compound->name(), { "id" });
}
catch (const std::exception& ex)
{
entityID = db["entity"].getUniqueID([](int i) { return std::to_string(i); });
db["entity"].emplace({
{ "id", entityID },
{ "type", "non-polymer" },
{ "src_method", "man" },
{ "pdbx_description", compound->name() },
{ "formula_weight", compound->formulaWeight() }
});
}
if (not db["pdbx_entity_nonpoly"].exists("entity_id"_key == entityID and "comp_id"_key == newCompound))
{
db["pdbx_entity_nonpoly"].emplace({
{ "entity_id", entityID },
{ "name", compound->name() },
{ "comp_id", newCompound }
});
}
}
auto& atomSites = db["atom_site"]; auto& atomSites = db["atom_site"];
auto atoms = res.atoms(); auto atoms = res.atoms();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment