Commit c3930428 by Maarten L. Hekkelman

sugar tree work in pdb2cif

parent 5e5e5c21
...@@ -196,7 +196,7 @@ $(1)_OBJECTS = $$(OBJDIR)/$(1)-test.o ...@@ -196,7 +196,7 @@ $(1)_OBJECTS = $$(OBJDIR)/$(1)-test.o
test/$(1)-test: $(LIB_TARGET) $$($(1)_OBJECTS) test/$(1)-test: $(LIB_TARGET) $$($(1)_OBJECTS)
@ echo ">>> building $(1)-test" @ echo ">>> building $(1)-test"
$(LIBTOOL) --silent --tag=CXX --mode=link $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $$@ $$($(1)_OBJECTS) -L.libs -lcif++ $(LIBS) $(LIBTOOL) --silent --tag=CXX --mode=link $(CXX) $(CXXFLAGS) $(LDFLAGS) -o $$@ $$($(1)_OBJECTS) -L.libs -lcif++ -lboost_date_time $(LIBS)
.PHONY: $(1)-test .PHONY: $(1)-test
$(1)-test: test/$(1)-test $(1)-test: test/$(1)-test
...@@ -204,7 +204,7 @@ $(1)-test: test/$(1)-test ...@@ -204,7 +204,7 @@ $(1)-test: test/$(1)-test
endef endef
TESTS = unit TESTS = unit pdb2cif
$(foreach part,$(TESTS),$(eval $(call TEST_template,$(part)))) $(foreach part,$(TESTS),$(eval $(call TEST_template,$(part))))
......
...@@ -455,7 +455,8 @@ class PDBFileParser ...@@ -455,7 +455,8 @@ class PDBFileParser
int numHetAtoms; int numHetAtoms;
std::string text; std::string text;
std::string asymID; std::string asymID;
std::set<int> atoms; std::vector<PDBRecord*> atoms;
bool processedSugar = false;
}; };
struct UNOBS struct UNOBS
...@@ -1275,7 +1276,7 @@ void PDBFileParser::ParseTitle() ...@@ -1275,7 +1276,7 @@ void PDBFileParser::ParseTitle()
// HEADER // HEADER
// 1 - 6 Record name "HEADER" // 1 - 6 Record name "HEADER"
// 11 - 50 std::String(40) classification Classifies the molecule(s). // 11 - 50 String(40) classification Classifies the molecule(s).
// 51 - 59 Date depDate Deposition date. This is the date the // 51 - 59 Date depDate Deposition date. This is the date the
// coordinates were received at the PDB. // coordinates were received at the PDB.
// 63 - 66 IDcode idCode This identifier is unique within the PDB. // 63 - 66 IDcode idCode This identifier is unique within the PDB.
...@@ -1339,7 +1340,7 @@ void PDBFileParser::ParseTitle() ...@@ -1339,7 +1340,7 @@ void PDBFileParser::ParseTitle()
std::string title; std::string title;
if (mRec->is("TITLE ")) // 1 - 6 Record name "TITLE " if (mRec->is("TITLE ")) // 1 - 6 Record name "TITLE "
{ // 9 - 10 Continuation continuation Allows concatenation of multiple records. { // 9 - 10 Continuation continuation Allows concatenation of multiple records.
title = vS(11); // 11 - 80 std::String title Title of the experiment. title = vS(11); // 11 - 80 String title Title of the experiment.
GetNextRecord(); GetNextRecord();
} }
...@@ -1359,7 +1360,7 @@ void PDBFileParser::ParseTitle() ...@@ -1359,7 +1360,7 @@ void PDBFileParser::ParseTitle()
{ {
getCategory("database_PDB_caveat")->emplace({ getCategory("database_PDB_caveat")->emplace({
{ "id", caveatID++ }, { "id", caveatID++ },
{ "text", std::string{mRec->vS(20) } } // 20 - 79 std::String comment Free text giving the reason for the CAVEAT. { "text", std::string{mRec->vS(20) } } // 20 - 79 String comment Free text giving the reason for the CAVEAT.
}); });
GetNextRecord(); GetNextRecord();
...@@ -3255,7 +3256,7 @@ void PDBFileParser::ParsePrimaryStructure() ...@@ -3255,7 +3256,7 @@ void PDBFileParser::ParsePrimaryStructure()
// int seqNum = vI(19, 22); // 19 - 22 Integer seqNum Sequence number. // int seqNum = vI(19, 22); // 19 - 22 Integer seqNum Sequence number.
// char iCode = vC(23); // 23 AChar iCode Insertion code. // char iCode = vC(23); // 23 AChar iCode Insertion code.
std::string stdRes = vS(25, 27); // 25 - 27 Residue name stdRes Standard residue name. std::string stdRes = vS(25, 27); // 25 - 27 Residue name stdRes Standard residue name.
// std::string comment = vS(30, 70); // 30 - 70 std::String comment Description of the residue modification. // std::string comment = vS(30, 70); // 30 - 70 String comment Description of the residue modification.
mMod2parent[resName] = stdRes; mMod2parent[resName] = stdRes;
...@@ -3273,7 +3274,7 @@ void PDBFileParser::ParseHeterogen() ...@@ -3273,7 +3274,7 @@ void PDBFileParser::ParseHeterogen()
char iCode = vC(18); // 18 AChar iCode Insertion code. char iCode = vC(18); // 18 AChar iCode Insertion code.
int numHetAtoms = vI(21, 25); // 21 - 25 Integer numHetAtoms Number of HETATM records for the group int numHetAtoms = vI(21, 25); // 21 - 25 Integer numHetAtoms Number of HETATM records for the group
// present in the datablock. // present in the datablock.
std::string text = vS(31, 70); // 31 - 70 std::String text Text describing Het group. std::string text = vS(31, 70); // 31 - 70 String text Text describing Het group.
mHets.push_back({ hetID, chainID, seqNum, iCode, numHetAtoms, text }); mHets.push_back({ hetID, chainID, seqNum, iCode, numHetAtoms, text });
...@@ -3285,7 +3286,7 @@ void PDBFileParser::ParseHeterogen() ...@@ -3285,7 +3286,7 @@ void PDBFileParser::ParseHeterogen()
if (mRec->is("HETNAM")) // 1 - 6 Record name "HETNAM" if (mRec->is("HETNAM")) // 1 - 6 Record name "HETNAM"
{ // 9 - 10 Continuation continuation Allows concatenation of multiple records. { // 9 - 10 Continuation continuation Allows concatenation of multiple records.
std::string hetID = vS(12, 14); // 12 - 14 LString(3) hetID Het identifier, right-justified. std::string hetID = vS(12, 14); // 12 - 14 LString(3) hetID Het identifier, right-justified.
std::string text = vS(16); // 16 - 70 std::String text Chemical name. std::string text = vS(16); // 16 - 70 String text Chemical name.
mHetnams[hetID] = text; mHetnams[hetID] = text;
InsertChemComp(hetID); InsertChemComp(hetID);
...@@ -3313,7 +3314,7 @@ void PDBFileParser::ParseHeterogen() ...@@ -3313,7 +3314,7 @@ void PDBFileParser::ParseHeterogen()
std::string hetID = vS(13, 15); // 13 - 15 LString(3) hetID Het identifier. std::string hetID = vS(13, 15); // 13 - 15 LString(3) hetID Het identifier.
// 17 - 18 Integer continuation Continuation number. // 17 - 18 Integer continuation Continuation number.
char waterMark = vC(19); // 19 Character asterisk "*" for water. char waterMark = vC(19); // 19 Character asterisk "*" for water.
std::string formula = vS(20); // 20 - 70 std::String text Chemical formula. std::string formula = vS(20); // 20 - 70 String text Chemical formula.
mFormuls[hetID] = formula; mFormuls[hetID] = formula;
...@@ -3553,7 +3554,7 @@ void PDBFileParser::ConstructEntities() ...@@ -3553,7 +3554,7 @@ void PDBFileParser::ConstructEntities()
h = prev(mHets.end()); h = prev(mHets.end());
} }
h->atoms.insert(serial); h->atoms.push_back(r);
} }
continue; continue;
...@@ -3988,6 +3989,46 @@ void PDBFileParser::ConstructEntities() ...@@ -3988,6 +3989,46 @@ void PDBFileParser::ConstructEntities()
}); });
} }
// build sugar trees first
for (;;)
{
// find a first NAG/NDG
auto si = std::find_if(mHets.begin(), mHets.end(), [](const HET& h) { return (h.hetID == "NAG" or h.hetID == "NDG") and not h.processedSugar; });
if (si != mHets.end())
{
si->processedSugar = true;
// take the location of the C1 atom(s?)
std::vector<std::tuple<std::string,float,float,float>> ci;
for (auto a: si->atoms)
{
std::string name = a->vS(13, 16); // 13 - 16 Atom name Atom name.
if (name != "ND2")
continue;
ci.emplace_back(
std::string{ a->vC(17) }, // 17 Character altLoc Alternate location indicator.
std::stof(a->vF(31, 38)), // 31 - 38 Real(8.3) x Orthogonal coordinates for X in Angstroms.
std::stof(a->vF(39, 46)), // 39 - 46 Real(8.3) y Orthogonal coordinates for Y in Angstroms.
std::stof(a->vF(47, 54)) // 47 - 54 Real(8.3) z Orthogonal coordinates for Z in Angstroms.
);
}
// find a free ASN near by
}
}
// done with the sugar, resume operation as before
std::map<char,std::string> waterChains; std::map<char,std::string> waterChains;
std::map<std::tuple<std::string,std::string>,int> ndbSeqNum; // for nonpoly scheme std::map<std::tuple<std::string,std::string>,int> ndbSeqNum; // for nonpoly scheme
...@@ -4154,7 +4195,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4154,7 +4195,7 @@ void PDBFileParser::ConstructEntities()
int seqNum = rec->vI(19, 22); // 19 - 22 Integer seqNum Sequence number. int seqNum = rec->vI(19, 22); // 19 - 22 Integer seqNum Sequence number.
char iCode = rec->vC(23); // 23 AChar iCode Insertion code. char iCode = rec->vC(23); // 23 AChar iCode Insertion code.
std::string stdRes = rec->vS(25, 27); // 25 - 27 Residue name stdRes Standard residue name. std::string stdRes = rec->vS(25, 27); // 25 - 27 Residue name stdRes Standard residue name.
std::string comment = rec->vS(30, 70); // 30 - 70 std::String comment Description of the residue modification. std::string comment = rec->vS(30, 70); // 30 - 70 String comment Description of the residue modification.
std::string asymID; std::string asymID;
int seq; int seq;
...@@ -4324,7 +4365,7 @@ void PDBFileParser::ParseSecondaryStructure() ...@@ -4324,7 +4365,7 @@ void PDBFileParser::ParseSecondaryStructure()
// 34 - 37 Integer endSeqNum Sequence number of the terminal residue. // 34 - 37 Integer endSeqNum Sequence number of the terminal residue.
// 38 AChar endICode Insertion code of the terminal residue. // 38 AChar endICode Insertion code of the terminal residue.
// 39 - 40 Integer helixClass Helix class (see below). // 39 - 40 Integer helixClass Helix class (see below).
// 41 - 70 std::String comment Comment about this helix. // 41 - 70 String comment Comment about this helix.
// 72 - 76 Integer length Length of this helix. // 72 - 76 Integer length Length of this helix.
std::string begAsymID, endAsymID; std::string begAsymID, endAsymID;
...@@ -4858,7 +4899,7 @@ void PDBFileParser::ParseMiscellaneousFeatures() ...@@ -4858,7 +4899,7 @@ void PDBFileParser::ParseMiscellaneousFeatures()
while (mRec->is("SITE ")) while (mRec->is("SITE "))
{ // 1 - 6 Record name "SITE " { // 1 - 6 Record name "SITE "
// 8 - 10 Integer seqNum Sequence number. // 8 - 10 Integer seqNum Sequence number.
std::string siteID = vS(12, 14); // 12 - 14 LString(3) siteID Site name. std::string siteID = vS(12, 14);// 12 - 14 LString(3) siteID Site name.
int numRes = vI(16, 17); // 16 - 17 Integer numRes Number of residues that compose the site. int numRes = vI(16, 17); // 16 - 17 Integer numRes Number of residues that compose the site.
int o = 19; int o = 19;
...@@ -5177,7 +5218,7 @@ void PDBFileParser::ParseCoordinate(int modelNr) ...@@ -5177,7 +5218,7 @@ void PDBFileParser::ParseCoordinate(int modelNr)
std::string y = vF(39, 46); // 39 - 46 Real(8.3) y Orthogonal coordinates for Y in Angstroms. std::string y = vF(39, 46); // 39 - 46 Real(8.3) y Orthogonal coordinates for Y in Angstroms.
std::string z = vF(47, 54); // 47 - 54 Real(8.3) z Orthogonal coordinates for Z in Angstroms. std::string z = vF(47, 54); // 47 - 54 Real(8.3) z Orthogonal coordinates for Z in Angstroms.
std::string occupancy = vF(55, 60); // 55 - 60 Real(6.2) occupancy Occupancy. std::string occupancy = vF(55, 60); // 55 - 60 Real(6.2) occupancy Occupancy.
std::string tempFactor = vF(61, 66); // 61 - 66 Real(6.2) tempFactor Temperature factor. std::string tempFactor = vF(61, 66);// 61 - 66 Real(6.2) tempFactor Temperature factor.
std::string element = vS(77, 78); // 77 - 78 LString(2) element Element symbol, right-justified. std::string element = vS(77, 78); // 77 - 78 LString(2) element Element symbol, right-justified.
std::string charge = vS(79, 80); // 79 - 80 LString(2) charge Charge on the atom. std::string charge = vS(79, 80); // 79 - 80 LString(2) charge Charge on the atom.
......
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdexcept>
#include "cif++/Cif++.hpp"
#include <cif++/Structure.hpp>
int main(int argc, char* const argv[])
{
cif::rsrc_loader::init({
{ cif::rsrc_loader_type::file, "." },
{ cif::rsrc_loader_type::file, "rsrc" },
#if USE_RSRC
{ cif::rsrc_loader_type::mrsrc, "", { gResourceIndex, gResourceData, gResourceName } }
#endif
});
mmcif::File file("pdb2b8h.ent.gz");
return 0;
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment