Commit 3315fae8 by Maarten L. Hekkelman

Merge branch 'cif2fasta-develop' into develop-cif2fasta

parents f1ca916d d8c3c3f7
...@@ -537,6 +537,11 @@ write_basic_package_version_file( ...@@ -537,6 +537,11 @@ write_basic_package_version_file(
VERSION ${PROJECT_VERSION} VERSION ${PROJECT_VERSION}
COMPATIBILITY AnyNewerVersion) COMPATIBILITY AnyNewerVersion)
# In case we're included as sub_directory:
if(NOT PROJECT_IS_TOP_LEVEL)
set(CIFPP_SHARE_DIR ${PROJECT_SOURCE_DIR}/rsrc PARENT_SCOPE)
endif()
if(BUILD_TESTING) if(BUILD_TESTING)
# We're using the older version 2 of Catch2 # We're using the older version 2 of Catch2
FetchContent_Declare( FetchContent_Declare(
......
...@@ -45,14 +45,14 @@ using cif::to_lower_copy; ...@@ -45,14 +45,14 @@ using cif::to_lower_copy;
namespace error namespace error
{ {
enum pdbErrors enum pdbErrors
{ {
residueNotFound = 1000, residueNotFound = 1000,
invalidDate invalidDate
}; };
namespace detail namespace detail
{ {
class pdbCategory : public std::error_category class pdbCategory : public std::error_category
{ {
public: public:
...@@ -76,28 +76,28 @@ namespace error ...@@ -76,28 +76,28 @@ namespace error
} }
} }
}; };
} // namespace detail } // namespace detail
std::error_category &pdbCategory() std::error_category &pdbCategory()
{ {
static detail::pdbCategory impl; static detail::pdbCategory impl;
return impl; return impl;
} }
inline std::error_code make_error_code(pdbErrors e) inline std::error_code make_error_code(pdbErrors e)
{ {
return std::error_code(static_cast<int>(e), pdbCategory()); return std::error_code(static_cast<int>(e), pdbCategory());
} }
} // namespace error } // namespace error
namespace std namespace std
{ {
template <> template <>
struct is_error_code_enum<error::pdbErrors> struct is_error_code_enum<error::pdbErrors>
{ {
static const bool value = true; static const bool value = true;
}; };
} // namespace std } // namespace std
...@@ -835,8 +835,6 @@ class PDBFileParser ...@@ -835,8 +835,6 @@ class PDBFileParser
return result; return result;
} }
std::tuple<std::string, int, bool> MapResidue(char chainID, int resSeq, char iCode, const std::string &resName);
// ---------------------------------------------------------------- // ----------------------------------------------------------------
void PreParseInput(std::istream &is); void PreParseInput(std::istream &is);
...@@ -1492,8 +1490,7 @@ void PDBFileParser::ParseTitle() ...@@ -1492,8 +1490,7 @@ void PDBFileParser::ParseTitle()
auto cat = getCategory("entry"); auto cat = getCategory("entry");
// cat->addColumn("id"); // cat->addColumn("id");
cat->emplace({ cat->emplace({ { "id", mStructureID } });
{ "id", mStructureID } });
// OBSLTE // OBSLTE
if (mRec->is("OBSLTE")) if (mRec->is("OBSLTE"))
...@@ -1512,8 +1509,7 @@ void PDBFileParser::ParseTitle() ...@@ -1512,8 +1509,7 @@ void PDBFileParser::ParseTitle()
std::string value = mRec->vS(32); std::string value = mRec->vS(32);
for (auto i : cif::split<std::string>(value, " ", true)) for (auto i : cif::split<std::string>(value, " ", true))
{ {
cat->emplace({ cat->emplace({ { "id", "OBSLTE" },
{ "id", "OBSLTE" },
{ "date", date }, { "date", date },
{ "replace_pdb_id", old }, { "replace_pdb_id", old },
{ "pdb_id", i } }); { "pdb_id", i } });
...@@ -1545,10 +1541,12 @@ void PDBFileParser::ParseTitle() ...@@ -1545,10 +1541,12 @@ void PDBFileParser::ParseTitle()
int caveatID = 1; int caveatID = 1;
while (mRec->is("CAVEAT")) // 1 - 6 Record name "CAVEAT" while (mRec->is("CAVEAT")) // 1 - 6 Record name "CAVEAT"
{ {
// clang-format off
getCategory("database_PDB_caveat")->emplace({ getCategory("database_PDB_caveat")->emplace({
{ "id", caveatID++ }, { "id", caveatID++ },
{ "text", std::string{ mRec->vS(20) } } // 20 - 79 String comment Free text giving the reason for the CAVEAT. { "text", std::string{ mRec->vS(20) } } // 20 - 79 String comment Free text giving the reason for the CAVEAT.
}); });
// clang-format on
GetNextRecord(); GetNextRecord();
} }
...@@ -1679,10 +1677,13 @@ void PDBFileParser::ParseTitle() ...@@ -1679,10 +1677,13 @@ void PDBFileParser::ParseTitle()
if (not(keywords.empty() and pdbxKeywords.empty())) if (not(keywords.empty() and pdbxKeywords.empty()))
{ {
// clang-format off
getCategory("struct_keywords")->emplace({ getCategory("struct_keywords")->emplace({
{ "entry_id", mStructureID }, { "entry_id", mStructureID },
{ "pdbx_keywords", keywords }, { "pdbx_keywords", keywords },
{ "text", pdbxKeywords } }); { "text", pdbxKeywords }
});
// clang-format on
} }
// EXPDTA // EXPDTA
...@@ -1705,10 +1706,13 @@ void PDBFileParser::ParseTitle() ...@@ -1705,10 +1706,13 @@ void PDBFileParser::ParseTitle()
if (expMethod.empty()) if (expMethod.empty())
continue; continue;
// clang-format off
cat->emplace({ cat->emplace({
{ "entry_id", mStructureID }, { "entry_id", mStructureID },
{ "method", expMethod }, { "method", expMethod },
{ "crystals_number", ci != crystals.end() ? *ci : "" } }); { "crystals_number", ci != crystals.end() ? *ci : "" }
});
// clang-format ob
} }
GetNextRecord(); GetNextRecord();
...@@ -1739,9 +1743,12 @@ void PDBFileParser::ParseTitle() ...@@ -1739,9 +1743,12 @@ void PDBFileParser::ParseTitle()
value = { mRec->vS(11) }; value = { mRec->vS(11) };
for (auto author : cif::split<std::string>(value, ",", true)) for (auto author : cif::split<std::string>(value, ",", true))
{ {
// clang-format off
cat->emplace({ cat->emplace({
{ "name", pdb2cifAuth(author) }, { "name", pdb2cifAuth(author) },
{ "pdbx_ordinal", n } }); { "pdbx_ordinal", n }
});
// clang-format on
++n; ++n;
} }
...@@ -1785,10 +1792,12 @@ void PDBFileParser::ParseTitle() ...@@ -1785,10 +1792,12 @@ void PDBFileParser::ParseTitle()
if (firstRevDat) if (firstRevDat)
{ {
cat = getCategory("database_2"); // clang-format off
cat->emplace({ getCategory("database_2")->emplace({
{ "database_id", "PDB" }, { "database_id", "PDB" },
{ "database_code", modID } }); { "database_code", modID }
});
// clang-format on
} }
GetNextRecord(); GetNextRecord();
...@@ -1801,20 +1810,27 @@ void PDBFileParser::ParseTitle() ...@@ -1801,20 +1810,27 @@ void PDBFileParser::ParseTitle()
sort(revdats.begin(), revdats.end()); sort(revdats.begin(), revdats.end());
for (auto &revdat : revdats) for (auto &revdat : revdats)
{ {
// clang-format off
getCategory("database_PDB_rev")->emplace({ getCategory("database_PDB_rev")->emplace({
{ "num", revdat.revNum }, { "num", revdat.revNum },
{ "date", revdat.date }, { "date", revdat.date },
{ "date_original", revdat.dateOriginal }, { "date_original", revdat.dateOriginal },
{ "replaces", revdat.replaces }, { "replaces", revdat.replaces },
{ "mod_type", revdat.modType } }); { "mod_type", revdat.modType }
});
// clang-format on
for (auto &type : revdat.types) for (auto &type : revdat.types)
{ {
if (type.empty()) if (type.empty())
continue; continue;
// clang-format off
getCategory("database_PDB_rev_record")->emplace({ getCategory("database_PDB_rev_record")->emplace({
{ "rev_num", revdat.revNum }, { "rev_num", revdat.revNum },
{ "type", type } }); { "type", type }
});
// clang-format on
} }
} }
//*/ //*/
...@@ -1889,6 +1905,7 @@ void PDBFileParser::ParseCitation(const std::string &id) ...@@ -1889,6 +1905,7 @@ void PDBFileParser::ParseCitation(const std::string &id)
} }
auto cat = getCategory("citation"); auto cat = getCategory("citation");
// clang-format off
cat->emplace({ cat->emplace({
{ "id", id }, { "id", id },
{ "title", titl }, { "title", titl },
...@@ -1902,15 +1919,16 @@ void PDBFileParser::ParseCitation(const std::string &id) ...@@ -1902,15 +1919,16 @@ void PDBFileParser::ParseCitation(const std::string &id)
{ "journal_id_CSD", csd }, { "journal_id_CSD", csd },
{ "book_publisher", publ }, { "book_publisher", publ },
{ "pdbx_database_id_PubMed", pmid }, { "pdbx_database_id_PubMed", pmid },
{ "pdbx_database_id_DOI", doi } }); { "pdbx_database_id_DOI", doi }
});
// clang-format on
if (not auth.empty()) if (not auth.empty())
{ {
cat = getCategory("citation_author"); cat = getCategory("citation_author");
for (auto author : cif::split<std::string>(auth, ",", true)) for (auto author : cif::split<std::string>(auth, ",", true))
{ {
cat->emplace({ cat->emplace({ { "citation_id", id },
{ "citation_id", id },
{ "name", pdb2cifAuth(author) }, { "name", pdb2cifAuth(author) },
{ "ordinal", mCitationAuthorNr } }); { "ordinal", mCitationAuthorNr } });
...@@ -1923,8 +1941,7 @@ void PDBFileParser::ParseCitation(const std::string &id) ...@@ -1923,8 +1941,7 @@ void PDBFileParser::ParseCitation(const std::string &id)
cat = getCategory("citation_editor"); cat = getCategory("citation_editor");
for (auto editor : cif::split<std::string>(edit, ",", true)) for (auto editor : cif::split<std::string>(edit, ",", true))
{ {
cat->emplace({ cat->emplace({ { "citation_id", id },
{ "citation_id", id },
{ "name", pdb2cifAuth(editor) }, { "name", pdb2cifAuth(editor) },
{ "ordinal", mCitationEditorNr } }); { "ordinal", mCitationEditorNr } });
...@@ -1981,8 +1998,7 @@ void PDBFileParser::ParseRemarks() ...@@ -1981,8 +1998,7 @@ void PDBFileParser::ParseRemarks()
if (std::regex_match(r, m, rx)) if (std::regex_match(r, m, rx))
{ {
auto cat = getCategory("database_2"); auto cat = getCategory("database_2");
cat->emplace({ cat->emplace({ { "database_id", m[1].str() },
{ "database_id", m[1].str() },
{ "database_code", m[2].str() } }); { "database_code", m[2].str() } });
} }
...@@ -2050,11 +2066,14 @@ void PDBFileParser::ParseRemarks() ...@@ -2050,11 +2066,14 @@ void PDBFileParser::ParseRemarks()
if (desc == "NULL") if (desc == "NULL")
desc.clear(); desc.clear();
// clang-format off
getCategory("exptl_crystal")->emplace({ getCategory("exptl_crystal")->emplace({
{ "id", 1 }, { "id", 1 },
{ "density_Matthews", iequals(density_Matthews, "NULL") ? "" : density_Matthews }, { "density_Matthews", iequals(density_Matthews, "NULL") ? "" : density_Matthews },
{ "density_percent_sol", iequals(densityPercentSol, "NULL") ? "" : densityPercentSol }, { "density_percent_sol", iequals(densityPercentSol, "NULL") ? "" : densityPercentSol },
{ "description", desc } }); { "description", desc }
});
// clang-format on
// now try to parse the conditions // now try to parse the conditions
const std::regex rx3(R"(TEMPERATURE +(\d+)K)"), rx4(R"(PH *(?:: *)?(\d+(?:\.\d+)?))") /*, rx5(R"(\b(\d+)C\b)")*/; const std::regex rx3(R"(TEMPERATURE +(\d+)K)"), rx4(R"(PH *(?:: *)?(\d+(?:\.\d+)?))") /*, rx5(R"(\b(\d+)C\b)")*/;
...@@ -2081,12 +2100,15 @@ void PDBFileParser::ParseRemarks() ...@@ -2081,12 +2100,15 @@ void PDBFileParser::ParseRemarks()
if (not(method.empty() and temp.empty() and ph.empty() and (conditions.empty() or conditions == "NULL"))) if (not(method.empty() and temp.empty() and ph.empty() and (conditions.empty() or conditions == "NULL")))
{ {
// clang-format off
getCategory("exptl_crystal_grow")->emplace({ getCategory("exptl_crystal_grow")->emplace({
{ "crystal_id", 1 }, { "crystal_id", 1 },
{ "method", method }, { "method", method },
{ "temp", temp }, { "temp", temp },
{ "pH", ph }, { "pH", ph },
{ "pdbx_details", conditions } }); { "pdbx_details", conditions }
});
// clang-format on
} }
break; break;
...@@ -2302,6 +2324,7 @@ void PDBFileParser::ParseRemarks() ...@@ -2302,6 +2324,7 @@ void PDBFileParser::ParseRemarks()
std::string distance = vF(63, 71); std::string distance = vF(63, 71);
// clang-format off
getCategory("pdbx_validate_close_contact")->emplace({ getCategory("pdbx_validate_close_contact")->emplace({
{ "id", std::to_string(++id) }, { "id", std::to_string(++id) },
{ "PDB_model_num", 1 }, { "PDB_model_num", 1 },
...@@ -2317,7 +2340,9 @@ void PDBFileParser::ParseRemarks() ...@@ -2317,7 +2340,9 @@ void PDBFileParser::ParseRemarks()
{ "auth_seq_id_2", seq2 }, { "auth_seq_id_2", seq2 },
{ "PDB_ins_code_2", iCode2 }, { "PDB_ins_code_2", iCode2 },
{ "label_alt_id_2", alt2 }, { "label_alt_id_2", alt2 },
{ "dist", distance } }); { "dist", distance }
});
// clang-format on
} }
break; break;
} }
...@@ -2354,6 +2379,7 @@ void PDBFileParser::ParseRemarks() ...@@ -2354,6 +2379,7 @@ void PDBFileParser::ParseRemarks()
std::string distance = vF(63, 71); std::string distance = vF(63, 71);
// clang-format off
getCategory("pdbx_validate_symm_contact")->emplace({ getCategory("pdbx_validate_symm_contact")->emplace({
{ "id", std::to_string(++id) }, { "id", std::to_string(++id) },
{ "PDB_model_num", 1 }, { "PDB_model_num", 1 },
...@@ -2361,17 +2387,19 @@ void PDBFileParser::ParseRemarks() ...@@ -2361,17 +2387,19 @@ void PDBFileParser::ParseRemarks()
{ "auth_asym_id_1", std::string{ chain1 } }, { "auth_asym_id_1", std::string{ chain1 } },
{ "auth_comp_id_1", res1 }, { "auth_comp_id_1", res1 },
{ "auth_seq_id_1", seq1 }, { "auth_seq_id_1", seq1 },
// { "PDB_ins_code_1", "" }, // { "PDB_ins_code_1", "" },
// { "label_alt_id_1", "" }, // { "label_alt_id_1", "" },
{ "site_symmetry_1", "1_555" }, { "site_symmetry_1", "1_555" },
{ "auth_atom_id_2", atom2 }, { "auth_atom_id_2", atom2 },
{ "auth_asym_id_2", std::string{ chain2 } }, { "auth_asym_id_2", std::string{ chain2 } },
{ "auth_comp_id_2", res2 }, { "auth_comp_id_2", res2 },
{ "auth_seq_id_2", seq2 }, { "auth_seq_id_2", seq2 },
// { "PDB_ins_code_2", "" }, // { "PDB_ins_code_2", "" },
// { "label_alt_id_2", "" }, // { "label_alt_id_2", "" },
{ "site_symmetry_2", symop }, { "site_symmetry_2", symop },
{ "dist", distance } }); { "dist", distance }
});
// clang-format on
} }
break; break;
} }
...@@ -2411,6 +2439,7 @@ void PDBFileParser::ParseRemarks() ...@@ -2411,6 +2439,7 @@ void PDBFileParser::ParseRemarks()
if (iCode2 == " ") if (iCode2 == " ")
iCode2.clear(); iCode2.clear();
// clang-format off
getCategory("pdbx_validate_rmsd_bond")->emplace({ getCategory("pdbx_validate_rmsd_bond")->emplace({
{ "id", std::to_string(++id) }, { "id", std::to_string(++id) },
{ "PDB_model_num", model ? model : 1 }, { "PDB_model_num", model ? model : 1 },
...@@ -2426,7 +2455,9 @@ void PDBFileParser::ParseRemarks() ...@@ -2426,7 +2455,9 @@ void PDBFileParser::ParseRemarks()
{ "auth_seq_id_2", seqNum2 }, { "auth_seq_id_2", seqNum2 },
{ "PDB_ins_code_2", iCode2 }, { "PDB_ins_code_2", iCode2 },
{ "label_alt_id_2", alt2 }, { "label_alt_id_2", alt2 },
{ "bond_deviation", deviation } }); { "bond_deviation", deviation }
});
// clang-format on
} }
break; break;
...@@ -2458,6 +2489,7 @@ void PDBFileParser::ParseRemarks() ...@@ -2458,6 +2489,7 @@ void PDBFileParser::ParseRemarks()
if (deviation == "*****") if (deviation == "*****")
deviation.clear(); deviation.clear();
// clang-format off
getCategory("pdbx_validate_rmsd_angle")->emplace({ getCategory("pdbx_validate_rmsd_angle")->emplace({
{ "id", std::to_string(++id) }, { "id", std::to_string(++id) },
{ "PDB_model_num", model ? model : 1 }, { "PDB_model_num", model ? model : 1 },
...@@ -2476,7 +2508,9 @@ void PDBFileParser::ParseRemarks() ...@@ -2476,7 +2508,9 @@ void PDBFileParser::ParseRemarks()
{ "auth_comp_id_3", resNam }, { "auth_comp_id_3", resNam },
{ "auth_seq_id_3", seqNum }, { "auth_seq_id_3", seqNum },
{ "PDB_ins_code_3", iCode }, { "PDB_ins_code_3", iCode },
{ "angle_deviation", deviation } }); { "angle_deviation", deviation }
});
// clang-format on
} }
break; break;
...@@ -2505,6 +2539,7 @@ void PDBFileParser::ParseRemarks() ...@@ -2505,6 +2539,7 @@ void PDBFileParser::ParseRemarks()
std::string psi = vF(27, 35); std::string psi = vF(27, 35);
std::string phi = vF(37, 45); std::string phi = vF(37, 45);
// clang-format off
getCategory("pdbx_validate_torsion")->emplace({ getCategory("pdbx_validate_torsion")->emplace({
{ "id", std::to_string(++id) }, { "id", std::to_string(++id) },
{ "PDB_model_num", model ? model : 1 }, { "PDB_model_num", model ? model : 1 },
...@@ -2513,7 +2548,9 @@ void PDBFileParser::ParseRemarks() ...@@ -2513,7 +2548,9 @@ void PDBFileParser::ParseRemarks()
{ "auth_seq_id", seqNum }, { "auth_seq_id", seqNum },
{ "PDB_ins_code", iCode }, { "PDB_ins_code", iCode },
{ "phi", phi }, { "phi", phi },
{ "psi", psi } }); { "psi", psi }
});
// clang-format on
} }
break; break;
...@@ -2544,6 +2581,7 @@ void PDBFileParser::ParseRemarks() ...@@ -2544,6 +2581,7 @@ void PDBFileParser::ParseRemarks()
std::string omega = vF(54, 60); std::string omega = vF(54, 60);
// clang-format off
getCategory("pdbx_validate_peptide_omega")->emplace({ getCategory("pdbx_validate_peptide_omega")->emplace({
{ "id", std::to_string(++id) }, { "id", std::to_string(++id) },
{ "PDB_model_num", model ? model : 1 }, { "PDB_model_num", model ? model : 1 },
...@@ -2555,7 +2593,9 @@ void PDBFileParser::ParseRemarks() ...@@ -2555,7 +2593,9 @@ void PDBFileParser::ParseRemarks()
{ "auth_asym_id_2", chainID2 }, { "auth_asym_id_2", chainID2 },
{ "auth_seq_id_2", seqNum2 }, { "auth_seq_id_2", seqNum2 },
{ "PDB_ins_code_2", iCode2 }, { "PDB_ins_code_2", iCode2 },
{ "omega", omega } }); { "omega", omega }
});
// clang-format on
} }
break; break;
...@@ -2578,6 +2618,7 @@ void PDBFileParser::ParseRemarks() ...@@ -2578,6 +2618,7 @@ void PDBFileParser::ParseRemarks()
std::string rmsd = vF(32, 36); std::string rmsd = vF(32, 36);
std::string type = vS(41); std::string type = vS(41);
// clang-format off
getCategory("pdbx_validate_planes")->emplace({ getCategory("pdbx_validate_planes")->emplace({
{ "id", std::to_string(++id) }, { "id", std::to_string(++id) },
{ "PDB_model_num", model ? model : 1 }, { "PDB_model_num", model ? model : 1 },
...@@ -2586,7 +2627,9 @@ void PDBFileParser::ParseRemarks() ...@@ -2586,7 +2627,9 @@ void PDBFileParser::ParseRemarks()
{ "auth_seq_id", seqNum }, { "auth_seq_id", seqNum },
{ "PDB_ins_code", iCode }, { "PDB_ins_code", iCode },
{ "rmsd", rmsd }, { "rmsd", rmsd },
{ "type", type } }); { "type", type }
});
// clang-format on
} }
break; break;
...@@ -2666,6 +2709,7 @@ void PDBFileParser::ParseRemarks() ...@@ -2666,6 +2709,7 @@ void PDBFileParser::ParseRemarks()
throw std::runtime_error("Invalid REMARK 800, no SITE record for id " + id); throw std::runtime_error("Invalid REMARK 800, no SITE record for id " + id);
// next record, store what we have // next record, store what we have
// clang-format off
getCategory("struct_site")->emplace({ getCategory("struct_site")->emplace({
{ "id", id }, { "id", id },
{ "details", desc }, { "details", desc },
...@@ -2673,7 +2717,9 @@ void PDBFileParser::ParseRemarks() ...@@ -2673,7 +2717,9 @@ void PDBFileParser::ParseRemarks()
{ "pdbx_auth_comp_id", pdbxAuthCompID }, { "pdbx_auth_comp_id", pdbxAuthCompID },
{ "pdbx_auth_seq_id", pdbxAuthSeqID }, { "pdbx_auth_seq_id", pdbxAuthSeqID },
{ "pdbx_num_residues", site->vI(16, 17) }, { "pdbx_num_residues", site->vI(16, 17) },
{ "pdbx_evidence_code", evidence } }); { "pdbx_evidence_code", evidence }
});
// clang-format on
}; };
for (; mRec->is("REMARK 800"); GetNextRecord()) for (; mRec->is("REMARK 800"); GetNextRecord())
...@@ -2787,9 +2833,12 @@ void PDBFileParser::ParseRemarks() ...@@ -2787,9 +2833,12 @@ void PDBFileParser::ParseRemarks()
GetNextRecord(); GetNextRecord();
} }
// clang-format off
getCategory("pdbx_database_remark")->emplace({ getCategory("pdbx_database_remark")->emplace({
{ "id", remarkNr }, { "id", remarkNr },
{ "text", s.str() } }); { "text", s.str() }
});
// clang-format on
break; break;
} }
...@@ -2803,11 +2852,14 @@ void PDBFileParser::ParseRemarks() ...@@ -2803,11 +2852,14 @@ void PDBFileParser::ParseRemarks()
if (not(compoundDetails.empty() and sequenceDetails.empty() and sourceDetails.empty())) if (not(compoundDetails.empty() and sequenceDetails.empty() and sourceDetails.empty()))
{ {
// clang-format off
getCategory("pdbx_entry_details")->emplace({ getCategory("pdbx_entry_details")->emplace({
{ "entry_id", mStructureID }, { "entry_id", mStructureID },
{ "compound_details", compoundDetails }, { "compound_details", compoundDetails },
{ "sequence_details", sequenceDetails }, { "sequence_details", sequenceDetails },
{ "source_details", sourceDetails } }); { "source_details", sourceDetails }
});
// clang-format on
} }
// store remark 200 info (special case) // store remark 200 info (special case)
...@@ -2884,11 +2936,14 @@ void PDBFileParser::ParseRemark200() ...@@ -2884,11 +2936,14 @@ void PDBFileParser::ParseRemark200()
if (mRemark200[sw.b].empty()) if (mRemark200[sw.b].empty())
continue; continue;
// clang-format off
getCategory("software")->emplace({ getCategory("software")->emplace({
{ "name", mRemark200[sw.b] }, { "name", mRemark200[sw.b] },
{ "classification", sw.a }, { "classification", sw.a },
{ "version", "." }, { "version", "." },
{ "pdbx_ordinal", mNextSoftwareOrd++ } }); { "pdbx_ordinal", mNextSoftwareOrd++ }
});
// clang-format on
} }
std::string scatteringType; std::string scatteringType;
...@@ -2908,11 +2963,13 @@ void PDBFileParser::ParseRemark200() ...@@ -2908,11 +2963,13 @@ void PDBFileParser::ParseRemark200()
if (cif::ends_with(ambientTemp, "K")) if (cif::ends_with(ambientTemp, "K"))
ambientTemp.erase(ambientTemp.length() - 1, 1); ambientTemp.erase(ambientTemp.length() - 1, 1);
// clang-format off
getCategory("diffrn")->emplace({ getCategory("diffrn")->emplace({
{ "id", diffrnNr }, { "id", diffrnNr },
{ "ambient_temp", ambientTemp }, { "ambient_temp", ambientTemp },
// { "ambient_temp_details", seqID }, // { "ambient_temp_details", seqID },
{ "crystal_id", 1 } }); { "crystal_id", 1 } });
// clang-format on
std::string collectionDate; std::string collectionDate;
std::error_code ec; std::error_code ec;
...@@ -2927,21 +2984,27 @@ void PDBFileParser::ParseRemark200() ...@@ -2927,21 +2984,27 @@ void PDBFileParser::ParseRemark200()
collectionDate.clear(); collectionDate.clear();
} }
// clang-format off
getCategory("diffrn_detector")->emplace({ getCategory("diffrn_detector")->emplace({
{ "diffrn_id", diffrnNr }, { "diffrn_id", diffrnNr },
{ "detector", rm200("DETECTOR TYPE", diffrnNr) }, { "detector", rm200("DETECTOR TYPE", diffrnNr) },
{ "type", rm200("DETECTOR MANUFACTURER", diffrnNr) }, { "type", rm200("DETECTOR MANUFACTURER", diffrnNr) },
{ "pdbx_collection_date", collectionDate }, { "pdbx_collection_date", collectionDate },
{ "details", rm200("OPTICS", diffrnNr) } }); { "details", rm200("OPTICS", diffrnNr) }
});
// clang-format on
if (inRM200({ "MONOCHROMATIC OR LAUE (M/L)", "MONOCHROMATOR", "DIFFRACTION PROTOCOL" }) or not scatteringType.empty()) if (inRM200({ "MONOCHROMATIC OR LAUE (M/L)", "MONOCHROMATOR", "DIFFRACTION PROTOCOL" }) or not scatteringType.empty())
// clang-format off
getCategory("diffrn_radiation")->emplace({ getCategory("diffrn_radiation")->emplace({
{ "diffrn_id", diffrnNr }, { "diffrn_id", diffrnNr },
{ "wavelength_id", 1 }, { "wavelength_id", 1 },
{ "pdbx_monochromatic_or_laue_m_l", rm200("MONOCHROMATIC OR LAUE (M/L)", diffrnNr) }, { "pdbx_monochromatic_or_laue_m_l", rm200("MONOCHROMATIC OR LAUE (M/L)", diffrnNr) },
{ "monochromator", rm200("MONOCHROMATOR", diffrnNr) }, { "monochromator", rm200("MONOCHROMATOR", diffrnNr) },
{ "pdbx_diffrn_protocol", rm200("DIFFRACTION PROTOCOL", diffrnNr) }, { "pdbx_diffrn_protocol", rm200("DIFFRACTION PROTOCOL", diffrnNr) },
{ "pdbx_scattering_type", scatteringType } }); { "pdbx_scattering_type", scatteringType }
});
// clang-format on
std::string wl = rm200("WAVELENGTH OR RANGE (A)", diffrnNr); std::string wl = rm200("WAVELENGTH OR RANGE (A)", diffrnNr);
auto wavelengths = cif::split<std::string>(wl, ", -", true); auto wavelengths = cif::split<std::string>(wl, ", -", true);
...@@ -2951,6 +3014,7 @@ void PDBFileParser::ParseRemark200() ...@@ -2951,6 +3014,7 @@ void PDBFileParser::ParseRemark200()
std::string source; std::string source;
if (rm200("SYNCHROTRON (Y/N)", diffrnNr) == "Y") if (rm200("SYNCHROTRON (Y/N)", diffrnNr) == "Y")
{ {
// clang-format off
getCategory("diffrn_source")->emplace({ getCategory("diffrn_source")->emplace({
{ "diffrn_id", diffrnNr }, { "diffrn_id", diffrnNr },
{ "source", "SYNCHROTRON" }, { "source", "SYNCHROTRON" },
...@@ -2961,9 +3025,11 @@ void PDBFileParser::ParseRemark200() ...@@ -2961,9 +3025,11 @@ void PDBFileParser::ParseRemark200()
{ "pdbx_wavelength", wavelengths.size() == 1 ? wavelengths[0] : "" }, { "pdbx_wavelength", wavelengths.size() == 1 ? wavelengths[0] : "" },
{ "pdbx_wavelength_list", wavelengths.size() == 1 ? "" : cif::join(wavelengths, ", ") }, { "pdbx_wavelength_list", wavelengths.size() == 1 ? "" : cif::join(wavelengths, ", ") },
}); });
// clang-format on
} }
else if (inRM200({ "X-RAY GENERATOR MODEL", "RADIATION SOURCE", "BEAMLINE", "WAVELENGTH OR RANGE (A)" })) else if (inRM200({ "X-RAY GENERATOR MODEL", "RADIATION SOURCE", "BEAMLINE", "WAVELENGTH OR RANGE (A)" }))
{ {
// clang-format off
getCategory("diffrn_source")->emplace({ getCategory("diffrn_source")->emplace({
{ "diffrn_id", diffrnNr }, { "diffrn_id", diffrnNr },
{ "source", rm200("RADIATION SOURCE", diffrnNr) }, { "source", rm200("RADIATION SOURCE", diffrnNr) },
...@@ -2972,6 +3038,7 @@ void PDBFileParser::ParseRemark200() ...@@ -2972,6 +3038,7 @@ void PDBFileParser::ParseRemark200()
{ "pdbx_wavelength", wavelengths.size() == 1 ? wavelengths[0] : "" }, { "pdbx_wavelength", wavelengths.size() == 1 ? wavelengths[0] : "" },
{ "pdbx_wavelength_list", wavelengths.size() == 1 ? "" : cif::join(wavelengths, ", ") }, { "pdbx_wavelength_list", wavelengths.size() == 1 ? "" : cif::join(wavelengths, ", ") },
}); });
// clang-format on
} }
} }
...@@ -2981,10 +3048,13 @@ void PDBFileParser::ParseRemark200() ...@@ -2981,10 +3048,13 @@ void PDBFileParser::ParseRemark200()
if (cif::ends_with(wl, "A")) if (cif::ends_with(wl, "A"))
wl.erase(wl.length() - 1, 1); wl.erase(wl.length() - 1, 1);
// clang-format off
getCategory("diffrn_radiation_wavelength")->emplace({ getCategory("diffrn_radiation_wavelength")->emplace({
{ "id", wavelengthNr++ }, { "id", wavelengthNr++ },
{ "wavelength", wl.empty() ? "." : wl }, { "wavelength", wl.empty() ? "." : wl },
{ "wt", "1.0" } }); { "wt", "1.0" }
});
// clang-format on
} }
if (inRM200({ "METHOD USED TO DETERMINE THE STRUCTURE", "STARTING MODEL" })) if (inRM200({ "METHOD USED TO DETERMINE THE STRUCTURE", "STARTING MODEL" }))
...@@ -2996,6 +3066,7 @@ void PDBFileParser::ParseRemark200() ...@@ -2996,6 +3066,7 @@ void PDBFileParser::ParseRemark200()
if (resolution.empty()) if (resolution.empty())
resolution = "."; resolution = ".";
// clang-format off
cat->emplace({ cat->emplace({
{ "pdbx_method_to_determine_struct", mRemark200["METHOD USED TO DETERMINE THE STRUCTURE"] }, { "pdbx_method_to_determine_struct", mRemark200["METHOD USED TO DETERMINE THE STRUCTURE"] },
{ "pdbx_starting_model", mRemark200["STARTING MODEL"] }, { "pdbx_starting_model", mRemark200["STARTING MODEL"] },
...@@ -3003,11 +3074,13 @@ void PDBFileParser::ParseRemark200() ...@@ -3003,11 +3074,13 @@ void PDBFileParser::ParseRemark200()
{ "pdbx_diffrn_id", 1 }, { "pdbx_diffrn_id", 1 },
{ "pdbx_refine_id", mExpMethod }, { "pdbx_refine_id", mExpMethod },
{ "entry_id", mStructureID } }); { "entry_id", mStructureID } });
// clang-format on
} }
if (inRM200({ "REJECTION CRITERIA (SIGMA(I))", "RESOLUTION RANGE HIGH (A)", "RESOLUTION RANGE LOW (A)", "NUMBER OF UNIQUE REFLECTIONS", "COMPLETENESS FOR RANGE (%)", "<I/SIGMA(I)> FOR THE DATA SET", "R MERGE (I)", "R SYM (I)", "DATA REDUNDANCY" })) if (inRM200({ "REJECTION CRITERIA (SIGMA(I))", "RESOLUTION RANGE HIGH (A)", "RESOLUTION RANGE LOW (A)", "NUMBER OF UNIQUE REFLECTIONS", "COMPLETENESS FOR RANGE (%)", "<I/SIGMA(I)> FOR THE DATA SET", "R MERGE (I)", "R SYM (I)", "DATA REDUNDANCY" }))
{ {
auto cat = getCategory("reflns"); auto cat = getCategory("reflns");
// clang-format off
cat->emplace({ cat->emplace({
{ "entry_id", mStructureID }, { "entry_id", mStructureID },
{ "observed_criterion_sigma_I", mRemark200["REJECTION CRITERIA (SIGMA(I))"] }, { "observed_criterion_sigma_I", mRemark200["REJECTION CRITERIA (SIGMA(I))"] },
...@@ -3022,10 +3095,12 @@ void PDBFileParser::ParseRemark200() ...@@ -3022,10 +3095,12 @@ void PDBFileParser::ParseRemark200()
{ "pdbx_ordinal", 1 }, { "pdbx_ordinal", 1 },
{ "pdbx_diffrn_id", 1 } { "pdbx_diffrn_id", 1 }
}); });
// clang-format on
} }
if (inRM200({ "HIGHEST RESOLUTION SHELL, RANGE HIGH (A)" })) // that one field is mandatory... if (inRM200({ "HIGHEST RESOLUTION SHELL, RANGE HIGH (A)" })) // that one field is mandatory...
{ {
// clang-format off
getCategory("reflns_shell")->emplace({ getCategory("reflns_shell")->emplace({
{ "d_res_high", mRemark200["HIGHEST RESOLUTION SHELL, RANGE HIGH (A)"] }, { "d_res_high", mRemark200["HIGHEST RESOLUTION SHELL, RANGE HIGH (A)"] },
{ "d_res_low", mRemark200["HIGHEST RESOLUTION SHELL, RANGE LOW (A)"] }, { "d_res_low", mRemark200["HIGHEST RESOLUTION SHELL, RANGE LOW (A)"] },
...@@ -3035,7 +3110,9 @@ void PDBFileParser::ParseRemark200() ...@@ -3035,7 +3110,9 @@ void PDBFileParser::ParseRemark200()
{ "meanI_over_sigI_obs", mRemark200["<I/SIGMA(I)> FOR SHELL"] }, { "meanI_over_sigI_obs", mRemark200["<I/SIGMA(I)> FOR SHELL"] },
{ "pdbx_redundancy", mRemark200["DATA REDUNDANCY IN SHELL"] }, { "pdbx_redundancy", mRemark200["DATA REDUNDANCY IN SHELL"] },
{ "pdbx_ordinal", 1 }, { "pdbx_ordinal", 1 },
{ "pdbx_diffrn_id", 1 } }); { "pdbx_diffrn_id", 1 }
});
// clang-format on
} }
else if (inRM200({ "HIGHEST RESOLUTION SHELL, RANGE LOW (A)", "COMPLETENESS FOR SHELL (%)", else if (inRM200({ "HIGHEST RESOLUTION SHELL, RANGE LOW (A)", "COMPLETENESS FOR SHELL (%)",
"R MERGE FOR SHELL (I)", "R SYM FOR SHELL (I)", "<I/SIGMA(I)> FOR SHELL", "DATA REDUNDANCY IN SHELL" })) "R MERGE FOR SHELL (I)", "R SYM FOR SHELL (I)", "<I/SIGMA(I)> FOR SHELL", "DATA REDUNDANCY IN SHELL" }))
...@@ -3220,12 +3297,14 @@ void PDBFileParser::ParseRemark350() ...@@ -3220,12 +3297,14 @@ void PDBFileParser::ParseRemark350()
else else
details = "author_and_software_defined_assembly"; details = "author_and_software_defined_assembly";
// clang-format off
getCategory("pdbx_struct_assembly")->emplace({ getCategory("pdbx_struct_assembly")->emplace({
{ "id", biomolecule }, { "id", biomolecule },
{ "details", details }, { "details", details },
{ "method_details", values["SOFTWARE USED"] }, { "method_details", values["SOFTWARE USED"] },
{ "oligomeric_details", oligomer }, { "oligomeric_details", oligomer },
{ "oligomeric_count", count > 0 ? std::to_string(count) : "" } }); { "oligomeric_count", count > 0 ? std::to_string(count) : "" }
});
auto cat = getCategory("pdbx_struct_assembly_prop"); auto cat = getCategory("pdbx_struct_assembly_prop");
...@@ -3233,19 +3312,23 @@ void PDBFileParser::ParseRemark350() ...@@ -3233,19 +3312,23 @@ void PDBFileParser::ParseRemark350()
cat->emplace({ cat->emplace({
{ "biol_id", biomolecule }, { "biol_id", biomolecule },
{ "type", "ABSA (A^2)" }, { "type", "ABSA (A^2)" },
{ "value", values["TOTAL BURIED SURFACE AREA"] } }); { "value", values["TOTAL BURIED SURFACE AREA"] }
});
if (not values["CHANGE IN SOLVENT FREE ENERGY"].empty()) if (not values["CHANGE IN SOLVENT FREE ENERGY"].empty())
cat->emplace({ cat->emplace({
{ "biol_id", biomolecule }, { "biol_id", biomolecule },
{ "type", "MORE" }, { "type", "MORE" },
{ "value", values["CHANGE IN SOLVENT FREE ENERGY"] } }); { "value", values["CHANGE IN SOLVENT FREE ENERGY"] }
});
if (not values["SURFACE AREA OF THE COMPLEX"].empty()) if (not values["SURFACE AREA OF THE COMPLEX"].empty())
cat->emplace({ cat->emplace({
{ "biol_id", biomolecule }, { "biol_id", biomolecule },
{ "type", "SSA (A^2)" }, { "type", "SSA (A^2)" },
{ "value", values["SURFACE AREA OF THE COMPLEX"] } }); { "value", values["SURFACE AREA OF THE COMPLEX"] }
});
// clang-format on
values.clear(); values.clear();
} }
...@@ -3259,6 +3342,7 @@ void PDBFileParser::ParseRemark350() ...@@ -3259,6 +3342,7 @@ void PDBFileParser::ParseRemark350()
// else // else
try try
{ {
// clang-format off
getCategory("pdbx_struct_oper_list")->emplace({ getCategory("pdbx_struct_oper_list")->emplace({
{ "id", operID }, { "id", operID },
{ "type", type }, { "type", type },
...@@ -3275,7 +3359,9 @@ void PDBFileParser::ParseRemark350() ...@@ -3275,7 +3359,9 @@ void PDBFileParser::ParseRemark350()
{ "matrix[3][1]", cif::format("%12.10f", mat[6]).str() }, { "matrix[3][1]", cif::format("%12.10f", mat[6]).str() },
{ "matrix[3][2]", cif::format("%12.10f", mat[7]).str() }, { "matrix[3][2]", cif::format("%12.10f", mat[7]).str() },
{ "matrix[3][3]", cif::format("%12.10f", mat[8]).str() }, { "matrix[3][3]", cif::format("%12.10f", mat[8]).str() },
{ "vector[3]", cif::format("%12.10f", vec[2]).str() } }); { "vector[3]", cif::format("%12.10f", vec[2]).str() }
});
// clang-format on
} }
catch (duplicate_key_error &ex) catch (duplicate_key_error &ex)
{ {
...@@ -3291,10 +3377,13 @@ void PDBFileParser::ParseRemark350() ...@@ -3291,10 +3377,13 @@ void PDBFileParser::ParseRemark350()
if (not(vec.empty() and mat.empty())) if (not(vec.empty() and mat.empty()))
throw std::runtime_error("Invalid REMARK 350"); throw std::runtime_error("Invalid REMARK 350");
// clang-format off
getCategory("pdbx_struct_assembly_gen")->emplace({ getCategory("pdbx_struct_assembly_gen")->emplace({
{ "assembly_id", biomolecule }, { "assembly_id", biomolecule },
{ "oper_expression", cif::join(operExpression, ",") }, { "oper_expression", cif::join(operExpression, ",") },
{ "asym_id_list", cif::join(asymIdList, ",") } }); { "asym_id_list", cif::join(asymIdList, ",") }
});
// clang-format on
biomolecule = stoi(m[1].str()); biomolecule = stoi(m[1].str());
asymIdList.clear(); asymIdList.clear();
...@@ -3308,10 +3397,13 @@ void PDBFileParser::ParseRemark350() ...@@ -3308,10 +3397,13 @@ void PDBFileParser::ParseRemark350()
if (not operExpression.empty()) if (not operExpression.empty())
{ {
// clang-format off
getCategory("pdbx_struct_assembly_gen")->emplace({ getCategory("pdbx_struct_assembly_gen")->emplace({
{ "assembly_id", biomolecule }, { "assembly_id", biomolecule },
{ "oper_expression", cif::join(operExpression, ",") }, { "oper_expression", cif::join(operExpression, ",") },
{ "asym_id_list", cif::join(asymIdList, ",") } }); { "asym_id_list", cif::join(asymIdList, ",") }
});
// clang-format on
} }
mRec = saved; mRec = saved;
...@@ -3797,6 +3889,7 @@ void PDBFileParser::ConstructEntities() ...@@ -3797,6 +3889,7 @@ void PDBFileParser::ConstructEntities()
mAsymID2EntityID[asymID] = entityID; mAsymID2EntityID[asymID] = entityID;
// clang-format off
getCategory("struct_asym")->emplace({ getCategory("struct_asym")->emplace({
{ "id", asymID }, { "id", asymID },
{ "pdbx_blank_PDB_chainid_flag", chain.mDbref.chainID == ' ' ? "Y" : "N" }, { "pdbx_blank_PDB_chainid_flag", chain.mDbref.chainID == ' ' ? "Y" : "N" },
...@@ -3804,6 +3897,7 @@ void PDBFileParser::ConstructEntities() ...@@ -3804,6 +3897,7 @@ void PDBFileParser::ConstructEntities()
{ "entity_id", entityID }, { "entity_id", entityID },
// details // details
}); });
// clang-format on
int seqNr = 1; int seqNr = 1;
for (auto &res : chain.mSeqres) for (auto &res : chain.mSeqres)
...@@ -3818,7 +3912,7 @@ void PDBFileParser::ConstructEntities() ...@@ -3818,7 +3912,7 @@ void PDBFileParser::ConstructEntities()
for (std::string monID : monIds) for (std::string monID : monIds)
{ {
std::string authMonID, authSeqNum, authInsCode{'.'}; std::string authMonID, authSeqNum, authInsCode{ '.' };
if (res.mSeen) if (res.mSeen)
{ {
...@@ -3827,6 +3921,7 @@ void PDBFileParser::ConstructEntities() ...@@ -3827,6 +3921,7 @@ void PDBFileParser::ConstructEntities()
if (res.mIcode != ' ' and res.mIcode != 0) if (res.mIcode != ' ' and res.mIcode != 0)
authInsCode = std::string{ res.mIcode }; authInsCode = std::string{ res.mIcode };
// clang-format off
cat->emplace({ cat->emplace({
{ "asym_id", asymID }, { "asym_id", asymID },
{ "entity_id", mMolID2EntityID[chain.mMolID] }, { "entity_id", mMolID2EntityID[chain.mMolID] },
...@@ -3839,13 +3934,16 @@ void PDBFileParser::ConstructEntities() ...@@ -3839,13 +3934,16 @@ void PDBFileParser::ConstructEntities()
{ "auth_mon_id", authMonID }, { "auth_mon_id", authMonID },
{ "pdb_strand_id", std::string{ chain.mDbref.chainID } }, { "pdb_strand_id", std::string{ chain.mDbref.chainID } },
{ "pdb_ins_code", authInsCode }, { "pdb_ins_code", authInsCode },
{ "hetero", res.mAlts.empty() ? "n" : "y" } }); { "hetero", res.mAlts.empty() ? "n" : "y" }
});
// clang-format on
} }
else else
{ {
if (res.mIcode != ' ' and res.mIcode != 0) if (res.mIcode != ' ' and res.mIcode != 0)
authInsCode = std::string{ res.mIcode } + "A"; authInsCode = std::string{ res.mIcode } + "A";
// clang-format off
cat->emplace({ cat->emplace({
{ "asym_id", asymID }, { "asym_id", asymID },
{ "entity_id", mMolID2EntityID[chain.mMolID] }, { "entity_id", mMolID2EntityID[chain.mMolID] },
...@@ -3858,7 +3956,9 @@ void PDBFileParser::ConstructEntities() ...@@ -3858,7 +3956,9 @@ void PDBFileParser::ConstructEntities()
{ "auth_mon_id", "." }, { "auth_mon_id", "." },
{ "pdb_strand_id", std::string{ chain.mDbref.chainID } }, { "pdb_strand_id", std::string{ chain.mDbref.chainID } },
{ "pdb_ins_code", authInsCode }, { "pdb_ins_code", authInsCode },
{ "hetero", res.mAlts.empty() ? "n" : "y" } }); { "hetero", res.mAlts.empty() ? "n" : "y" }
});
// clang-format on
} }
} }
} }
...@@ -3877,18 +3977,21 @@ void PDBFileParser::ConstructEntities() ...@@ -3877,18 +3977,21 @@ void PDBFileParser::ConstructEntities()
{ {
srcMethod = "syn"; srcMethod = "syn";
// clang-format off
getCategory("pdbx_entity_src_syn")->emplace({ getCategory("pdbx_entity_src_syn")->emplace({
{ "entity_id", mMolID2EntityID[cmp.mMolID] }, { "entity_id", mMolID2EntityID[cmp.mMolID] },
{ "pdbx_src_id", structRefID }, { "pdbx_src_id", structRefID },
{ "organism_scientific", cmp.mSource["ORGANISM_SCIENTIFIC"] }, { "organism_scientific", cmp.mSource["ORGANISM_SCIENTIFIC"] },
{ "ncbi_taxonomy_id", cmp.mSource["ORGANISM_TAXID"] }, { "ncbi_taxonomy_id", cmp.mSource["ORGANISM_TAXID"] },
}); });
// clang-format on
} }
else if (cmp.mInfo["ENGINEERED"] == "YES" or else if (cmp.mInfo["ENGINEERED"] == "YES" or
not cmp.mSource["EXPRESSION_SYSTEM"].empty()) not cmp.mSource["EXPRESSION_SYSTEM"].empty())
{ {
srcMethod = "man"; srcMethod = "man";
// clang-format off
getCategory("entity_src_gen")->emplace({ getCategory("entity_src_gen")->emplace({
{ "entity_id", mMolID2EntityID[cmp.mMolID] }, { "entity_id", mMolID2EntityID[cmp.mMolID] },
{ "pdbx_src_id", structRefID }, { "pdbx_src_id", structRefID },
...@@ -3914,12 +4017,15 @@ void PDBFileParser::ConstructEntities() ...@@ -3914,12 +4017,15 @@ void PDBFileParser::ConstructEntities()
{ "pdbx_host_org_vector", cmp.mSource["EXPRESSION_SYSTEM_VECTOR"] }, { "pdbx_host_org_vector", cmp.mSource["EXPRESSION_SYSTEM_VECTOR"] },
{ "pdbx_host_org_gene", cmp.mSource["EXPRESSION_SYSTEM_GENE"] }, { "pdbx_host_org_gene", cmp.mSource["EXPRESSION_SYSTEM_GENE"] },
{ "plasmid_name", cmp.mSource["EXPRESSION_SYSTEM_PLASMID"] }, { "plasmid_name", cmp.mSource["EXPRESSION_SYSTEM_PLASMID"] },
{ "pdbx_description", cmp.mSource["OTHER_DETAILS"] } }); { "pdbx_description", cmp.mSource["OTHER_DETAILS"] }
});
// clang-format on
} }
else if (not cmp.mSource["ORGANISM_SCIENTIFIC"].empty()) else if (not cmp.mSource["ORGANISM_SCIENTIFIC"].empty())
{ {
srcMethod = "nat"; srcMethod = "nat";
// clang-format off
getCategory("entity_src_nat")->emplace({ getCategory("entity_src_nat")->emplace({
{ "entity_id", mMolID2EntityID[cmp.mMolID] }, { "entity_id", mMolID2EntityID[cmp.mMolID] },
{ "pdbx_src_id", structRefID }, { "pdbx_src_id", structRefID },
...@@ -3932,25 +4038,32 @@ void PDBFileParser::ConstructEntities() ...@@ -3932,25 +4038,32 @@ void PDBFileParser::ConstructEntities()
{ "pdbx_plasmid_name", cmp.mSource["PLASMID"] }, { "pdbx_plasmid_name", cmp.mSource["PLASMID"] },
{ "pdbx_organ", cmp.mSource["ORGAN"] }, { "pdbx_organ", cmp.mSource["ORGAN"] },
}); });
// clang-format on
} }
// clang-format off
getCategory("entity")->emplace({ getCategory("entity")->emplace({
{ "id", mMolID2EntityID[cmp.mMolID] }, { "id", mMolID2EntityID[cmp.mMolID] },
{ "type", "polymer" }, { "type", "polymer" },
{ "src_method", srcMethod }, { "src_method", srcMethod },
{ "pdbx_description", cmp.mInfo["MOLECULE"] }, { "pdbx_description", cmp.mInfo["MOLECULE"] },
// { "pdbx_formula_weight", }, // { "pdbx_formula_weight", },
{ "pdbx_number_of_molecules", cmp.mChains.size() }, { "pdbx_number_of_molecules", cmp.mChains.size() },
{ "details", cmp.mInfo["OTHER_DETAILS"] }, { "details", cmp.mInfo["OTHER_DETAILS"] },
{ "pdbx_mutation", cmp.mInfo["MUTATION"] }, { "pdbx_mutation", cmp.mInfo["MUTATION"] },
{ "pdbx_fragment", cmp.mInfo["FRAGMENT"] }, { "pdbx_fragment", cmp.mInfo["FRAGMENT"] },
{ "pdbx_ec", cmp.mInfo["EC"] } }); { "pdbx_ec", cmp.mInfo["EC"] }
});
// clang-format on
if (not cmp.mInfo["SYNONYM"].empty()) if (not cmp.mInfo["SYNONYM"].empty())
{ {
// clang-format off
getCategory("entity_name_com")->emplace({ getCategory("entity_name_com")->emplace({
{ "entity_id", mMolID2EntityID[cmp.mMolID] }, { "entity_id", mMolID2EntityID[cmp.mMolID] },
{ "name", cmp.mInfo["SYNONYM"] } }); { "name", cmp.mInfo["SYNONYM"] }
});
// clang-format on
} }
std::string desc = cmp.mInfo["MOLECULE"]; std::string desc = cmp.mInfo["MOLECULE"];
...@@ -3969,14 +4082,16 @@ void PDBFileParser::ConstructEntities() ...@@ -3969,14 +4082,16 @@ void PDBFileParser::ConstructEntities()
if (ci != mChains.end() and not ci->mDbref.dbIdCode.empty()) if (ci != mChains.end() and not ci->mDbref.dbIdCode.empty())
{ {
// clang-format off
getCategory("struct_ref")->emplace({ getCategory("struct_ref")->emplace({
{ "id", structRefID }, { "id", structRefID },
{ "entity_id", mMolID2EntityID[cmp.mMolID] }, { "entity_id", mMolID2EntityID[cmp.mMolID] },
{ "db_name", ci->mDbref.database }, { "db_name", ci->mDbref.database },
{ "db_code", ci->mDbref.dbIdCode }, { "db_code", ci->mDbref.dbIdCode },
{ "pdbx_db_accession", ci->mDbref.dbAccession }, { "pdbx_db_accession", ci->mDbref.dbAccession },
// { "pdbx_align_begin", ci->mDbref.dbSeqBegin } // { "pdbx_align_begin", ci->mDbref.dbSeqBegin }
}); });
// clang-format on
} }
bool nstdMonomer = false, nonstandardLinkage = false; bool nstdMonomer = false, nonstandardLinkage = false;
...@@ -3999,7 +4114,9 @@ void PDBFileParser::ConstructEntities() ...@@ -3999,7 +4114,9 @@ void PDBFileParser::ConstructEntities()
if (not dbref.database.empty()) if (not dbref.database.empty())
{ {
auto insToStr = [](char i) -> std::string auto insToStr = [](char i) -> std::string
{ return i == ' ' or not isprint(i) ? "" : std::string{ i }; }; {
return i == ' ' or not isprint(i) ? "" : std::string{ i };
};
auto &pdbxPolySeqScheme = *getCategory("pdbx_poly_seq_scheme"); auto &pdbxPolySeqScheme = *getCategory("pdbx_poly_seq_scheme");
...@@ -4007,12 +4124,12 @@ void PDBFileParser::ConstructEntities() ...@@ -4007,12 +4124,12 @@ void PDBFileParser::ConstructEntities()
try try
{ {
seqAlignBeg = pdbxPolySeqScheme.find1<int>(key("pdb_strand_id") == std::string { dbref.chainID } and seqAlignBeg = pdbxPolySeqScheme.find1<int>(key("pdb_strand_id") == std::string{ dbref.chainID } and
key("pdb_seq_num") == dbref.seqBegin and key("pdb_seq_num") == dbref.seqBegin and
(key("pdb_ins_code") == insToStr(dbref.insertBegin) or key("pdb_ins_code") == cif::null), (key("pdb_ins_code") == insToStr(dbref.insertBegin) or key("pdb_ins_code") == cif::null),
"seq_id"); "seq_id");
seqAlignEnd = pdbxPolySeqScheme.find1<int>(key("pdb_strand_id") == std::string { dbref.chainID } and seqAlignEnd = pdbxPolySeqScheme.find1<int>(key("pdb_strand_id") == std::string{ dbref.chainID } and
key("pdb_seq_num") == dbref.seqEnd and key("pdb_seq_num") == dbref.seqEnd and
(key("pdb_ins_code") == insToStr(dbref.insertEnd) or key("pdb_ins_code") == cif::null), (key("pdb_ins_code") == insToStr(dbref.insertEnd) or key("pdb_ins_code") == cif::null),
"seq_id"); "seq_id");
...@@ -4021,6 +4138,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4021,6 +4138,7 @@ void PDBFileParser::ConstructEntities()
{ {
} }
// clang-format off
getCategory("struct_ref_seq")->emplace({ getCategory("struct_ref_seq")->emplace({
{ "align_id", structRefSeqAlignID }, { "align_id", structRefSeqAlignID },
{ "ref_id", structRefID }, { "ref_id", structRefID },
...@@ -4036,7 +4154,9 @@ void PDBFileParser::ConstructEntities() ...@@ -4036,7 +4154,9 @@ void PDBFileParser::ConstructEntities()
{ "db_align_end", dbref.dbSeqEnd }, { "db_align_end", dbref.dbSeqEnd },
{ "pdbx_db_align_end_ins_code", insToStr(dbref.dbinsEnd) }, { "pdbx_db_align_end_ins_code", insToStr(dbref.dbinsEnd) },
{ "pdbx_auth_seq_align_beg", dbref.seqBegin }, { "pdbx_auth_seq_align_beg", dbref.seqBegin },
{ "pdbx_auth_seq_align_end", dbref.seqEnd } }); { "pdbx_auth_seq_align_end", dbref.seqEnd }
});
// clang-format on
// write the struct_ref_seq_dif // write the struct_ref_seq_dif
for (auto &seqadv : mSeqadvs) for (auto &seqadv : mSeqadvs)
...@@ -4058,6 +4178,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4058,6 +4178,7 @@ void PDBFileParser::ConstructEntities()
seqNum = std::to_string(labelSeq); seqNum = std::to_string(labelSeq);
// clang-format off
getCategory("struct_ref_seq_dif")->emplace({ getCategory("struct_ref_seq_dif")->emplace({
{ "align_id", structRefSeqAlignID }, { "align_id", structRefSeqAlignID },
{ "pdbx_PDB_id_code", dbref.PDBIDCode }, { "pdbx_PDB_id_code", dbref.PDBIDCode },
...@@ -4071,7 +4192,9 @@ void PDBFileParser::ConstructEntities() ...@@ -4071,7 +4192,9 @@ void PDBFileParser::ConstructEntities()
{ "pdbx_seq_db_seq_num", seqadv.dbSeq }, { "pdbx_seq_db_seq_num", seqadv.dbSeq },
{ "details", seqadv.conflict }, { "details", seqadv.conflict },
{ "pdbx_auth_seq_num", seqadv.seqNum }, { "pdbx_auth_seq_num", seqadv.seqNum },
{ "pdbx_ordinal", ++mPdbxDifOrdinal } }); { "pdbx_ordinal", ++mPdbxDifOrdinal }
});
// clang-format on
} }
} }
...@@ -4153,19 +4276,25 @@ void PDBFileParser::ConstructEntities() ...@@ -4153,19 +4276,25 @@ void PDBFileParser::ConstructEntities()
if (std::find(mChemComp.begin(), mChemComp.end(), rs.mMonID) == mChemComp.end()) if (std::find(mChemComp.begin(), mChemComp.end(), rs.mMonID) == mChemComp.end())
mChemComp.emplace_back(rs.mMonID); mChemComp.emplace_back(rs.mMonID);
// clang-format off
cat_ps->emplace({ cat_ps->emplace({
{ "entity_id", mMolID2EntityID[cmp.mMolID] }, { "entity_id", mMolID2EntityID[cmp.mMolID] },
{ "num", i + 1 }, { "num", i + 1 },
{ "mon_id", rs.mMonID }, { "mon_id", rs.mMonID },
{ "hetero", rs.mAlts.empty() ? "n" : "y" } }); { "hetero", rs.mAlts.empty() ? "n" : "y" }
});
// clang-format on
for (auto &a : rs.mAlts) for (auto &a : rs.mAlts)
{ {
// clang-format off
cat_ps->emplace({ cat_ps->emplace({
{ "entity_id", mMolID2EntityID[cmp.mMolID] }, { "entity_id", mMolID2EntityID[cmp.mMolID] },
{ "num", i + 1 }, { "num", i + 1 },
{ "mon_id", a }, { "mon_id", a },
{ "hetero", "y" } }); { "hetero", "y" }
});
// clang-format on
} }
} }
} }
...@@ -4176,6 +4305,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4176,6 +4305,7 @@ void PDBFileParser::ConstructEntities()
else if (mightBeDNA and not mightBePolyPeptide) else if (mightBeDNA and not mightBePolyPeptide)
type = "polyribonucleotide"; type = "polyribonucleotide";
// clang-format off
getCategory("entity_poly")->emplace({ getCategory("entity_poly")->emplace({
{ "entity_id", mMolID2EntityID[cmp.mMolID] }, { "entity_id", mMolID2EntityID[cmp.mMolID] },
{ "pdbx_seq_one_letter_code", seq }, { "pdbx_seq_one_letter_code", seq },
...@@ -4183,16 +4313,21 @@ void PDBFileParser::ConstructEntities() ...@@ -4183,16 +4313,21 @@ void PDBFileParser::ConstructEntities()
{ "nstd_monomer", (nstdMonomer ? "yes" : "no") }, { "nstd_monomer", (nstdMonomer ? "yes" : "no") },
{ "pdbx_strand_id", cif::join(chains, ",") }, { "pdbx_strand_id", cif::join(chains, ",") },
{ "nstd_linkage", nonstandardLinkage ? "yes" : "no" }, { "nstd_linkage", nonstandardLinkage ? "yes" : "no" },
{ "type", type } }); { "type", type }
});
// clang-format on
} }
if (not(structTitle.empty() and structDescription.empty())) if (not(structTitle.empty() and structDescription.empty()))
{ {
// clang-format off
getCategory("struct")->emplace({ getCategory("struct")->emplace({
{ "entry_id", mStructureID }, { "entry_id", mStructureID },
{ "title", cif::join(structTitle, ", ") }, { "title", cif::join(structTitle, ", ") },
{ "pdbx_descriptor", cif::join(structDescription, ", ") }, { "pdbx_descriptor", cif::join(structDescription, ", ") },
{ "pdbx_model_type_details", mModelTypeDetails } }); { "pdbx_model_type_details", mModelTypeDetails }
});
// clang-format on
} }
// build sugar trees first // build sugar trees first
...@@ -4202,7 +4337,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4202,7 +4337,7 @@ void PDBFileParser::ConstructEntities()
std::map<char, std::string> waterChains; std::map<char, std::string> waterChains;
std::map<std::tuple<std::string, std::string>, int> ndbSeqNum; // for nonpoly scheme std::map<std::tuple<std::string, std::string>, int> ndbSeqNum; // for nonpoly scheme
std::map<std::string,int> entityAuthSeqNum; // for nonpoly scheme too std::map<std::string, int> entityAuthSeqNum; // for nonpoly scheme too
for (size_t i = 0; i < mHets.size(); ++i) for (size_t i = 0; i < mHets.size(); ++i)
{ {
...@@ -4252,12 +4387,15 @@ void PDBFileParser::ConstructEntities() ...@@ -4252,12 +4387,15 @@ void PDBFileParser::ConstructEntities()
if (hetID == mWaterHetID) if (hetID == mWaterHetID)
{ {
// clang-format off
getCategory("entity")->emplace({ getCategory("entity")->emplace({
{ "id", entityID }, { "id", entityID },
{ "type", "water" }, { "type", "water" },
{ "src_method", "nat" }, { "src_method", "nat" },
{ "pdbx_description", "water" }, { "pdbx_description", "water" },
{ "pdbx_number_of_molecules", hetCount[hetID] } }); { "pdbx_number_of_molecules", hetCount[hetID] }
});
// clang-format on
} }
else else
{ {
...@@ -4268,23 +4406,30 @@ void PDBFileParser::ConstructEntities() ...@@ -4268,23 +4406,30 @@ void PDBFileParser::ConstructEntities()
mHetnams[hetID] = compound->name(); mHetnams[hetID] = compound->name();
} }
// clang-format off
getCategory("entity")->emplace({ getCategory("entity")->emplace({
{ "id", entityID }, { "id", entityID },
{ "type", "non-polymer" }, { "type", "non-polymer" },
{ "src_method", "syn" }, { "src_method", "syn" },
{ "pdbx_description", mHetnams[hetID] }, { "pdbx_description", mHetnams[hetID] },
{ "details", mHetsyns[hetID] }, { "details", mHetsyns[hetID] },
{ "pdbx_number_of_molecules", hetCount[hetID] } }); { "pdbx_number_of_molecules", hetCount[hetID] }
});
// clang-format on
} }
// write a pdbx_entity_nonpoly record // write a pdbx_entity_nonpoly record
std::string name = mHetnams[hetID]; std::string name = mHetnams[hetID];
if (name.empty() and hetID == mWaterHetID) if (name.empty() and hetID == mWaterHetID)
name = "water"; name = "water";
// clang-format off
getCategory("pdbx_entity_nonpoly")->emplace({ getCategory("pdbx_entity_nonpoly")->emplace({
{ "entity_id", entityID }, { "entity_id", entityID },
{ "name", name }, { "name", name },
{ "comp_id", hetID } }); { "comp_id", hetID }
});
// clang-format on
} }
// create an asym for this het/chain combo, if needed // create an asym for this het/chain combo, if needed
...@@ -4320,6 +4465,8 @@ void PDBFileParser::ConstructEntities() ...@@ -4320,6 +4465,8 @@ void PDBFileParser::ConstructEntities()
if (writtenAsyms.count(asymID) == 0) if (writtenAsyms.count(asymID) == 0)
{ {
writtenAsyms.insert(asymID); writtenAsyms.insert(asymID);
// clang-format off
getCategory("struct_asym")->emplace({ getCategory("struct_asym")->emplace({
{ "id", asymID }, { "id", asymID },
{ "pdbx_blank_PDB_chainid_flag", het.chainID == ' ' ? "Y" : "N" }, { "pdbx_blank_PDB_chainid_flag", het.chainID == ' ' ? "Y" : "N" },
...@@ -4327,6 +4474,8 @@ void PDBFileParser::ConstructEntities() ...@@ -4327,6 +4474,8 @@ void PDBFileParser::ConstructEntities()
{ "entity_id", mHet2EntityID[hetID] }, { "entity_id", mHet2EntityID[hetID] },
// details // details
}); });
// clang-format on
} }
} }
...@@ -4338,6 +4487,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4338,6 +4487,7 @@ void PDBFileParser::ConstructEntities()
if (iCode.empty()) if (iCode.empty())
iCode = { '.' }; iCode = { '.' };
// clang-format off
getCategory("pdbx_nonpoly_scheme")->emplace({ getCategory("pdbx_nonpoly_scheme")->emplace({
{ "asym_id", asymID }, { "asym_id", asymID },
{ "entity_id", mHet2EntityID[hetID] }, { "entity_id", mHet2EntityID[hetID] },
...@@ -4348,7 +4498,9 @@ void PDBFileParser::ConstructEntities() ...@@ -4348,7 +4498,9 @@ void PDBFileParser::ConstructEntities()
{ "pdb_mon_id", hetID }, { "pdb_mon_id", hetID },
{ "auth_mon_id", hetID }, { "auth_mon_id", hetID },
{ "pdb_strand_id", std::string{ het.chainID } }, { "pdb_strand_id", std::string{ het.chainID } },
{ "pdb_ins_code", iCode } }); { "pdb_ins_code", iCode }
});
// clang-format on
// mapping needed? // mapping needed?
mChainSeq2AsymSeq[std::make_tuple(het.chainID, het.seqNum, het.iCode)] = std::make_tuple(asymID, seqNr, false); mChainSeq2AsymSeq[std::make_tuple(het.chainID, het.seqNum, het.iCode)] = std::make_tuple(asymID, seqNr, false);
...@@ -4378,6 +4530,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4378,6 +4530,7 @@ void PDBFileParser::ConstructEntities()
continue; continue;
} }
// clang-format off
getCategory("pdbx_struct_mod_residue")->emplace({ getCategory("pdbx_struct_mod_residue")->emplace({
{ "id", modResID++ }, { "id", modResID++ },
{ "label_asym_id", asymID }, { "label_asym_id", asymID },
...@@ -4390,6 +4543,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4390,6 +4543,7 @@ void PDBFileParser::ConstructEntities()
{ "parent_comp_id", stdRes }, { "parent_comp_id", stdRes },
{ "details", comment } { "details", comment }
}); });
// clang-format on
modResSet.insert(resName); modResSet.insert(resName);
} }
...@@ -4438,6 +4592,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4438,6 +4592,7 @@ void PDBFileParser::ConstructEntities()
if (modResSet.count(cc)) if (modResSet.count(cc))
nstd = "n"; nstd = "n";
// clang-format off
getCategory("chem_comp")->emplace({ getCategory("chem_comp")->emplace({
{ "id", cc }, { "id", cc },
{ "name", name }, { "name", name },
...@@ -4446,6 +4601,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4446,6 +4601,7 @@ void PDBFileParser::ConstructEntities()
{ "mon_nstd_flag", nstd }, { "mon_nstd_flag", nstd },
{ "type", type } { "type", type }
}); });
// clang-format on
} }
getCategory("chem_comp")->reorder_by_index(); getCategory("chem_comp")->reorder_by_index();
...@@ -4477,6 +4633,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4477,6 +4633,7 @@ void PDBFileParser::ConstructEntities()
if (unobs.atoms.empty()) if (unobs.atoms.empty())
{ {
// clang-format off
getCategory("pdbx_unobs_or_zero_occ_residues")->emplace({ getCategory("pdbx_unobs_or_zero_occ_residues")->emplace({
{ "id", std::to_string(++idRes) }, { "id", std::to_string(++idRes) },
{ "polymer_flag", isPolymer ? "Y" : "N" }, { "polymer_flag", isPolymer ? "Y" : "N" },
...@@ -4488,12 +4645,15 @@ void PDBFileParser::ConstructEntities() ...@@ -4488,12 +4645,15 @@ void PDBFileParser::ConstructEntities()
{ "PDB_ins_code", unobs.iCode == ' ' ? "" : std::string{ unobs.iCode } }, { "PDB_ins_code", unobs.iCode == ' ' ? "" : std::string{ unobs.iCode } },
{ "label_asym_id", asymID }, { "label_asym_id", asymID },
{ "label_comp_id", compID }, // TODO: change to correct comp_id { "label_comp_id", compID }, // TODO: change to correct comp_id
{ "label_seq_id", seqNr > 0 ? std::to_string(seqNr) : "" } }); { "label_seq_id", seqNr > 0 ? std::to_string(seqNr) : "" }
});
// clang-format on
} }
else else
{ {
for (auto &atom : unobs.atoms) for (auto &atom : unobs.atoms)
{ {
// clang-format off
getCategory("pdbx_unobs_or_zero_occ_atoms")->emplace({ getCategory("pdbx_unobs_or_zero_occ_atoms")->emplace({
{ "id", std::to_string(++idAtom) }, { "id", std::to_string(++idAtom) },
{ "polymer_flag", isPolymer ? "Y" : "N" }, { "polymer_flag", isPolymer ? "Y" : "N" },
...@@ -4507,7 +4667,9 @@ void PDBFileParser::ConstructEntities() ...@@ -4507,7 +4667,9 @@ void PDBFileParser::ConstructEntities()
{ "label_asym_id", asymID }, { "label_asym_id", asymID },
{ "label_comp_id", compID }, // TODO: change to correct comp_id { "label_comp_id", compID }, // TODO: change to correct comp_id
{ "label_seq_id", seqNr > 0 ? std::to_string(seqNr) : "" }, { "label_seq_id", seqNr > 0 ? std::to_string(seqNr) : "" },
{ "label_atom_id", atom } }); { "label_atom_id", atom }
});
// clang-format on
} }
} }
} }
...@@ -4627,26 +4789,33 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr) ...@@ -4627,26 +4789,33 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr)
entityID = std::to_string(mNextEntityNr++); entityID = std::to_string(mNextEntityNr++);
mBranch2EntityID[branchName] = entityID; mBranch2EntityID[branchName] = entityID;
// clang-format off
getCategory("entity")->emplace({ getCategory("entity")->emplace({
{ "id", entityID }, { "id", entityID },
{ "type", "branched" }, { "type", "branched" },
{ "src_method", "man" }, { "src_method", "man" },
{ "pdbx_description", branchName } }); { "pdbx_description", branchName }
});
getCategory("pdbx_entity_branch")->emplace({ getCategory("pdbx_entity_branch")->emplace({
{ "entity_id", entityID }, { "entity_id", entityID },
{ "type", "oligosaccharide" } }); { "type", "oligosaccharide" }
});
// clang-format on
int num = 0; int num = 0;
std::map<ATOM_REF, int> branch_list; std::map<ATOM_REF, int> branch_list;
for (auto &s : sugarTree) for (auto &s : sugarTree)
{ {
// clang-format off
getCategory("pdbx_entity_branch_list")->emplace({ getCategory("pdbx_entity_branch_list")->emplace({
{ "entity_id", entityID }, { "entity_id", entityID },
{ "comp_id", s.c1.resName }, { "comp_id", s.c1.resName },
{ "num", ++num }, { "num", ++num },
{ "hetero", ci.size() == 1 ? "n" : "y" } }); { "hetero", ci.size() == 1 ? "n" : "y" }
});
// clang-format on
branch_list[s.c1] = num; branch_list[s.c1] = num;
} }
...@@ -4658,6 +4827,7 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr) ...@@ -4658,6 +4827,7 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr)
if (s.leaving_o == 0) if (s.leaving_o == 0)
continue; continue;
// clang-format off
branch_link.emplace({ branch_link.emplace({
{ "link_id", branch_link.size() + 1 }, { "link_id", branch_link.size() + 1 },
{ "entity_id", entityID }, { "entity_id", entityID },
...@@ -4671,6 +4841,7 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr) ...@@ -4671,6 +4841,7 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr)
{ "leaving_atom_id_2", "HO" + std::to_string(s.leaving_o) }, { "leaving_atom_id_2", "HO" + std::to_string(s.leaving_o) },
{ "value_order", "sing" } /// ?? { "value_order", "sing" } /// ??
}); });
// clang-format on
} }
} }
...@@ -4682,11 +4853,14 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr) ...@@ -4682,11 +4853,14 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr)
mAsymID2EntityID[asymID] = entityID; mAsymID2EntityID[asymID] = entityID;
// clang-format off
getCategory("struct_asym")->emplace({ getCategory("struct_asym")->emplace({
{ "id", asymID }, { "id", asymID },
{ "pdbx_blank_PDB_chainid_flag", si->chainID == ' ' ? "Y" : "N" }, { "pdbx_blank_PDB_chainid_flag", si->chainID == ' ' ? "Y" : "N" },
{ "pdbx_modified", "N" }, { "pdbx_modified", "N" },
{ "entity_id", entityID } }); { "entity_id", entityID }
});
// clang-format on
std::string iCode{ si->iCode }; std::string iCode{ si->iCode };
cif::trim(iCode); cif::trim(iCode);
...@@ -4696,6 +4870,7 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr) ...@@ -4696,6 +4870,7 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr)
int num = 0; int num = 0;
for (auto s : sugarTree) for (auto s : sugarTree)
{ {
// clang-format off
getCategory("pdbx_branch_scheme")->emplace({ getCategory("pdbx_branch_scheme")->emplace({
{ "asym_id", asymID }, { "asym_id", asymID },
{ "entity_id", entityID }, { "entity_id", entityID },
...@@ -4707,7 +4882,9 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr) ...@@ -4707,7 +4882,9 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr)
{ "auth_asym_id", std::string{ s.c1.chainID } }, { "auth_asym_id", std::string{ s.c1.chainID } },
{ "auth_mon_id", s.next.resName }, { "auth_mon_id", s.next.resName },
{ "auth_seq_num", s.c1.resSeq }, { "auth_seq_num", s.c1.resSeq },
{ "hetero", ci.size() == 1 ? "n" : "y" } }); { "hetero", ci.size() == 1 ? "n" : "y" }
});
// clang-format on
auto k = std::make_tuple(s.c1.chainID, s.c1.resSeq, s.c1.iCode); auto k = std::make_tuple(s.c1.chainID, s.c1.resSeq, s.c1.iCode);
assert(mChainSeq2AsymSeq.count(k) == 0); assert(mChainSeq2AsymSeq.count(k) == 0);
...@@ -4783,6 +4960,7 @@ void PDBFileParser::ParseSecondaryStructure() ...@@ -4783,6 +4960,7 @@ void PDBFileParser::ParseSecondaryStructure()
else else
{ {
auto cat = getCategory("struct_conf"); auto cat = getCategory("struct_conf");
// clang-format off
cat->emplace({ cat->emplace({
{ "conf_type_id", "HELX_P" }, { "conf_type_id", "HELX_P" },
{ "id", "HELX_P" + std::to_string(vI(8, 10)) }, { "id", "HELX_P" + std::to_string(vI(8, 10)) },
...@@ -4805,13 +4983,14 @@ void PDBFileParser::ParseSecondaryStructure() ...@@ -4805,13 +4983,14 @@ void PDBFileParser::ParseSecondaryStructure()
{ "pdbx_PDB_helix_class", vS(39, 40) }, { "pdbx_PDB_helix_class", vS(39, 40) },
{ "details", vS(41, 70) }, { "details", vS(41, 70) },
{ "pdbx_PDB_helix_length", vI(72, 76) } }); { "pdbx_PDB_helix_length", vI(72, 76) }
});
// clang-format off
if (firstHelix) if (firstHelix)
{ {
cat = getCategory("struct_conf_type"); cat = getCategory("struct_conf_type");
cat->emplace({ cat->emplace({ { "id", "HELX_P" } });
{ "id", "HELX_P" } });
firstHelix = false; firstHelix = false;
} }
} }
...@@ -4878,11 +5057,14 @@ void PDBFileParser::ParseSecondaryStructure() ...@@ -4878,11 +5057,14 @@ void PDBFileParser::ParseSecondaryStructure()
if (sense != 0) if (sense != 0)
{ {
// clang-format off
getCategory("struct_sheet_order")->emplace({ getCategory("struct_sheet_order")->emplace({
{ "sheet_id", sheetID }, { "sheet_id", sheetID },
{ "range_id_1", rangeID }, { "range_id_1", rangeID },
{ "range_id_2", rangeID + 1 }, { "range_id_2", rangeID + 1 },
{ "sense", sense == -1 ? "anti-parallel" : "parallel" } }); { "sense", sense == -1 ? "anti-parallel" : "parallel" }
});
// clang-format on
} }
std::string begAsymID, endAsymID; std::string begAsymID, endAsymID;
...@@ -4900,6 +5082,7 @@ void PDBFileParser::ParseSecondaryStructure() ...@@ -4900,6 +5082,7 @@ void PDBFileParser::ParseSecondaryStructure()
} }
else else
{ {
// clang-format off
getCategory("struct_sheet_range")->emplace({ getCategory("struct_sheet_range")->emplace({
{ "sheet_id", sheetID }, { "sheet_id", sheetID },
{ "id", vI(8, 10) }, { "id", vI(8, 10) },
...@@ -4919,6 +5102,7 @@ void PDBFileParser::ParseSecondaryStructure() ...@@ -4919,6 +5102,7 @@ void PDBFileParser::ParseSecondaryStructure()
{ "end_auth_asym_id", vS(33, 33) }, { "end_auth_asym_id", vS(33, 33) },
{ "end_auth_seq_id", vI(34, 37) }, { "end_auth_seq_id", vI(34, 37) },
}); });
// clang-format on
if (sense != 0 and mRec->mVlen > 34) if (sense != 0 and mRec->mVlen > 34)
{ {
...@@ -4935,6 +5119,7 @@ void PDBFileParser::ParseSecondaryStructure() ...@@ -4935,6 +5119,7 @@ void PDBFileParser::ParseSecondaryStructure()
std::cerr << "skipping unmatched pdbx_struct_sheet_hbond record\n"; std::cerr << "skipping unmatched pdbx_struct_sheet_hbond record\n";
} }
else else
// clang-format off
getCategory("pdbx_struct_sheet_hbond")->emplace({ getCategory("pdbx_struct_sheet_hbond")->emplace({
{ "sheet_id", sheetID }, { "sheet_id", sheetID },
{ "range_id_1", rangeID }, { "range_id_1", rangeID },
...@@ -4957,7 +5142,9 @@ void PDBFileParser::ParseSecondaryStructure() ...@@ -4957,7 +5142,9 @@ void PDBFileParser::ParseSecondaryStructure()
{ "range_2_auth_atom_id", vS(42, 45) }, { "range_2_auth_atom_id", vS(42, 45) },
{ "range_2_auth_comp_id", vS(46, 48) }, { "range_2_auth_comp_id", vS(46, 48) },
{ "range_2_auth_asym_id", vS(50, 50) }, { "range_2_auth_asym_id", vS(50, 50) },
{ "range_2_auth_seq_id", vI(51, 54) } }); { "range_2_auth_seq_id", vI(51, 54) }
});
// clang-format on
} }
if (sense != 0) if (sense != 0)
...@@ -5060,6 +5247,7 @@ void PDBFileParser::ParseConnectivtyAnnotation() ...@@ -5060,6 +5247,7 @@ void PDBFileParser::ParseConnectivtyAnnotation()
{ {
for (auto a2 : alt2) for (auto a2 : alt2)
{ {
// clang-format off
getCategory("struct_conn")->emplace({ getCategory("struct_conn")->emplace({
{ "id", "disulf" + std::to_string(++ssBondNr) }, { "id", "disulf" + std::to_string(++ssBondNr) },
{ "conn_type_id", "disulf" }, { "conn_type_id", "disulf" },
...@@ -5088,6 +5276,7 @@ void PDBFileParser::ParseConnectivtyAnnotation() ...@@ -5088,6 +5276,7 @@ void PDBFileParser::ParseConnectivtyAnnotation()
{ "pdbx_dist_value", vS(74, 78) }, { "pdbx_dist_value", vS(74, 78) },
}); });
// clang-format on
} }
} }
...@@ -5185,6 +5374,7 @@ void PDBFileParser::ParseConnectivtyAnnotation() ...@@ -5185,6 +5374,7 @@ void PDBFileParser::ParseConnectivtyAnnotation()
continue; continue;
} }
// clang-format off
getCategory("struct_conn")->emplace({ getCategory("struct_conn")->emplace({
{ "id", type + std::to_string(linkNr) }, { "id", type + std::to_string(linkNr) },
{ "conn_type_id", type }, { "conn_type_id", type },
...@@ -5219,7 +5409,9 @@ void PDBFileParser::ParseConnectivtyAnnotation() ...@@ -5219,7 +5409,9 @@ void PDBFileParser::ParseConnectivtyAnnotation()
{ "ptnr2_symmetry", sym2 }, { "ptnr2_symmetry", sym2 },
{ "pdbx_dist_value", distance } }); { "pdbx_dist_value", distance }
});
// clang-format on
continue; continue;
} }
...@@ -5260,6 +5452,7 @@ void PDBFileParser::ParseConnectivtyAnnotation() ...@@ -5260,6 +5452,7 @@ void PDBFileParser::ParseConnectivtyAnnotation()
std::string iCode1str = iCode1 == ' ' ? std::string() : std::string{ iCode1 }; std::string iCode1str = iCode1 == ' ' ? std::string() : std::string{ iCode1 };
std::string iCode2str = iCode2 == ' ' ? std::string() : std::string{ iCode2 }; std::string iCode2str = iCode2 == ' ' ? std::string() : std::string{ iCode2 };
// clang-format off
getCategory("struct_mon_prot_cis")->emplace({ getCategory("struct_mon_prot_cis")->emplace({
{ "pdbx_id", serNum }, { "pdbx_id", serNum },
{ "label_comp_id", pep1 }, { "label_comp_id", pep1 },
...@@ -5278,7 +5471,9 @@ void PDBFileParser::ParseConnectivtyAnnotation() ...@@ -5278,7 +5471,9 @@ void PDBFileParser::ParseConnectivtyAnnotation()
{ "pdbx_auth_seq_id_2", seqNum2 }, { "pdbx_auth_seq_id_2", seqNum2 },
{ "pdbx_auth_asym_id_2", std::string{ chainID2 } }, { "pdbx_auth_asym_id_2", std::string{ chainID2 } },
{ "pdbx_PDB_model_num", modNum }, { "pdbx_PDB_model_num", modNum },
{ "pdbx_omega_angle", measure } }); { "pdbx_omega_angle", measure }
});
// clang-format on
continue; continue;
} }
...@@ -5323,6 +5518,7 @@ void PDBFileParser::ParseMiscellaneousFeatures() ...@@ -5323,6 +5518,7 @@ void PDBFileParser::ParseMiscellaneousFeatures()
std::cerr << "skipping struct_site_gen record\n"; std::cerr << "skipping struct_site_gen record\n";
} }
else else
// clang-format off
cat->emplace({ cat->emplace({
{ "id", structSiteGenID++ }, { "id", structSiteGenID++ },
{ "site_id", siteID }, { "site_id", siteID },
...@@ -5337,6 +5533,7 @@ void PDBFileParser::ParseMiscellaneousFeatures() ...@@ -5337,6 +5533,7 @@ void PDBFileParser::ParseMiscellaneousFeatures()
{ "label_atom_id", "." }, { "label_atom_id", "." },
{ "label_alt_id", "." }, { "label_alt_id", "." },
}); });
// clang-format on
o += 11; o += 11;
} }
...@@ -5351,6 +5548,7 @@ void PDBFileParser::ParseCrystallographic() ...@@ -5351,6 +5548,7 @@ void PDBFileParser::ParseCrystallographic()
{ {
Match("CRYST1", true); Match("CRYST1", true);
// clang-format off
getCategory("cell")->emplace({ getCategory("cell")->emplace({
{ "entry_id", mStructureID }, // 1 - 6 Record name "CRYST1" { "entry_id", mStructureID }, // 1 - 6 Record name "CRYST1"
{ "length_a", vF(7, 15) }, // 7 - 15 Real(9.3) a a (Angstroms). { "length_a", vF(7, 15) }, // 7 - 15 Real(9.3) a a (Angstroms).
...@@ -5362,6 +5560,7 @@ void PDBFileParser::ParseCrystallographic() ...@@ -5362,6 +5560,7 @@ void PDBFileParser::ParseCrystallographic()
/* goes into symmetry */ // 56 - 66 LString sGroup Space group. /* goes into symmetry */ // 56 - 66 LString sGroup Space group.
{ "Z_PDB", vF(67, 70) } // 67 - 70 Integer z Z value. { "Z_PDB", vF(67, 70) } // 67 - 70 Integer z Z value.
}); });
// clang-format on
std::string spaceGroup, intTablesNr; std::string spaceGroup, intTablesNr;
try try
...@@ -5373,15 +5572,19 @@ void PDBFileParser::ParseCrystallographic() ...@@ -5373,15 +5572,19 @@ void PDBFileParser::ParseCrystallographic()
{ {
} }
// clang-format off
getCategory("symmetry")->emplace({ getCategory("symmetry")->emplace({
{ "entry_id", mStructureID }, { "entry_id", mStructureID },
{ "space_group_name_H-M", spaceGroup }, { "space_group_name_H-M", spaceGroup },
{ "Int_Tables_number", intTablesNr } }); { "Int_Tables_number", intTablesNr }
});
GetNextRecord(); GetNextRecord();
} }
else else
{ {
// clang-format off
// no cryst1, make a simple one, like this: // no cryst1, make a simple one, like this:
// CRYST1 1.000 1.000 1.000 90.00 90.00 90.00 P 1 1 // CRYST1 1.000 1.000 1.000 90.00 90.00 90.00 P 1 1
getCategory("cell")->emplace({ getCategory("cell")->emplace({
...@@ -5401,6 +5604,7 @@ void PDBFileParser::ParseCrystallographic() ...@@ -5401,6 +5604,7 @@ void PDBFileParser::ParseCrystallographic()
{ "space_group_name_H-M", "P 1" }, { "space_group_name_H-M", "P 1" },
{ "Int_Tables_number", 1 } { "Int_Tables_number", 1 }
}); });
// clang-format on
} }
} }
...@@ -5423,6 +5627,7 @@ void PDBFileParser::ParseCoordinateTransformation() ...@@ -5423,6 +5627,7 @@ void PDBFileParser::ParseCoordinateTransformation()
GetNextRecord(); GetNextRecord();
} }
// clang-format off
getCategory("database_PDB_matrix")->emplace({ getCategory("database_PDB_matrix")->emplace({
{ "entry_id", mStructureID }, { "entry_id", mStructureID },
{ "origx[1][1]", m[0][0] }, { "origx[1][1]", m[0][0] },
...@@ -5438,6 +5643,7 @@ void PDBFileParser::ParseCoordinateTransformation() ...@@ -5438,6 +5643,7 @@ void PDBFileParser::ParseCoordinateTransformation()
{ "origx_vector[2]", v[1] }, { "origx_vector[2]", v[1] },
{ "origx_vector[3]", v[2] }, { "origx_vector[3]", v[2] },
}); });
// clang-format on
} }
if (cif::starts_with(mRec->mName, "SCALE")) if (cif::starts_with(mRec->mName, "SCALE"))
...@@ -5455,6 +5661,7 @@ void PDBFileParser::ParseCoordinateTransformation() ...@@ -5455,6 +5661,7 @@ void PDBFileParser::ParseCoordinateTransformation()
GetNextRecord(); GetNextRecord();
} }
// clang-format off
getCategory("atom_sites")->emplace({ getCategory("atom_sites")->emplace({
{ "entry_id", mStructureID }, { "entry_id", mStructureID },
{ "fract_transf_matrix[1][1]", m[0][0] }, { "fract_transf_matrix[1][1]", m[0][0] },
...@@ -5470,6 +5677,7 @@ void PDBFileParser::ParseCoordinateTransformation() ...@@ -5470,6 +5677,7 @@ void PDBFileParser::ParseCoordinateTransformation()
{ "fract_transf_vector[2]", v[1] }, { "fract_transf_vector[2]", v[1] },
{ "fract_transf_vector[3]", v[2] }, { "fract_transf_vector[3]", v[2] },
}); });
// clang-format on
} }
while (cif::starts_with(mRec->mName, "MTRIX1")) while (cif::starts_with(mRec->mName, "MTRIX1"))
...@@ -5491,6 +5699,7 @@ void PDBFileParser::ParseCoordinateTransformation() ...@@ -5491,6 +5699,7 @@ void PDBFileParser::ParseCoordinateTransformation()
GetNextRecord(); // transformations of the molecule are GetNextRecord(); // transformations of the molecule are
} // contained in the datablock. Otherwise, blank. } // contained in the datablock. Otherwise, blank.
// clang-format off
getCategory("struct_ncs_oper")->emplace({ getCategory("struct_ncs_oper")->emplace({
{ "id", serial }, { "id", serial },
{ "matrix[1][1]", m[0][0] }, { "matrix[1][1]", m[0][0] },
...@@ -5505,7 +5714,9 @@ void PDBFileParser::ParseCoordinateTransformation() ...@@ -5505,7 +5714,9 @@ void PDBFileParser::ParseCoordinateTransformation()
{ "vector[1]", v[0] }, { "vector[1]", v[0] },
{ "vector[2]", v[1] }, { "vector[2]", v[1] },
{ "vector[3]", v[2] }, { "vector[3]", v[2] },
{ "code", igiven ? "given" : "" } }); { "code", igiven ? "given" : "" }
});
// clang-format on
} }
} }
...@@ -5673,6 +5884,7 @@ void PDBFileParser::ParseCoordinate(int modelNr) ...@@ -5673,6 +5884,7 @@ void PDBFileParser::ParseCoordinate(int modelNr)
resSeq = branch_scheme.find1<int>("asym_id"_key == asymID and "auth_seq_num"_key == resSeq, "pdb_seq_num"); resSeq = branch_scheme.find1<int>("asym_id"_key == asymID and "auth_seq_num"_key == resSeq, "pdb_seq_num");
} }
// clang-format off
getCategory("atom_site")->emplace({ getCategory("atom_site")->emplace({
{ "group_PDB", groupPDB }, { "group_PDB", groupPDB },
{ "id", mAtomID }, { "id", mAtomID },
...@@ -5694,7 +5906,9 @@ void PDBFileParser::ParseCoordinate(int modelNr) ...@@ -5694,7 +5906,9 @@ void PDBFileParser::ParseCoordinate(int modelNr)
{ "auth_comp_id", resName }, { "auth_comp_id", resName },
{ "auth_asym_id", std::string{ chainID } }, { "auth_asym_id", std::string{ chainID } },
{ "auth_atom_id", name }, { "auth_atom_id", name },
{ "pdbx_PDB_model_num", modelNr } }); { "pdbx_PDB_model_num", modelNr }
});
// clang-format on
InsertAtomType(element); InsertAtomType(element);
...@@ -5714,8 +5928,11 @@ void PDBFileParser::ParseCoordinate(int modelNr) ...@@ -5714,8 +5928,11 @@ void PDBFileParser::ParseCoordinate(int modelNr)
throw std::runtime_error("ANISOU record should follow corresponding ATOM record"); throw std::runtime_error("ANISOU record should follow corresponding ATOM record");
auto f = [](float f) -> std::string auto f = [](float f) -> std::string
{ return cif::format("%6.4f", f).str(); }; {
return cif::format("%6.4f", f).str();
};
// clang-format off
getCategory("atom_site_anisotrop")->emplace({ getCategory("atom_site_anisotrop")->emplace({
{ "id", mAtomID }, { "id", mAtomID },
{ "type_symbol", element }, { "type_symbol", element },
...@@ -5733,7 +5950,9 @@ void PDBFileParser::ParseCoordinate(int modelNr) ...@@ -5733,7 +5950,9 @@ void PDBFileParser::ParseCoordinate(int modelNr)
{ "pdbx_auth_seq_id", resSeq }, { "pdbx_auth_seq_id", resSeq },
{ "pdbx_auth_comp_id", resName }, { "pdbx_auth_comp_id", resName },
{ "pdbx_auth_asym_id", std::string{ chainID } }, { "pdbx_auth_asym_id", std::string{ chainID } },
{ "pdbx_auth_atom_id", name } }); { "pdbx_auth_atom_id", name }
});
// clang-format on
} }
} }
...@@ -5812,8 +6031,7 @@ void PDBFileParser::Parse(std::istream &is, cif::file &result) ...@@ -5812,8 +6031,7 @@ void PDBFileParser::Parse(std::istream &is, cif::file &result)
throw std::runtime_error("Either the PDB file has no atom records, or the field " + std::string(mRec->mName) + " is not at the correct location"); throw std::runtime_error("Either the PDB file has no atom records, or the field " + std::string(mRec->mName) + " is not at the correct location");
for (auto e : mAtomTypes) for (auto e : mAtomTypes)
getCategory("atom_type")->emplace({ getCategory("atom_type")->emplace({ { "symbol", e } });
{ "symbol", e } });
// in V5, atom_type is sorted // in V5, atom_type is sorted
getCategory("atom_type")->reorder_by_index(); getCategory("atom_type")->reorder_by_index();
...@@ -5833,8 +6051,7 @@ void PDBFileParser::Parse(std::istream &is, cif::file &result) ...@@ -5833,8 +6051,7 @@ void PDBFileParser::Parse(std::istream &is, cif::file &result)
auto exptl = getCategory("exptl"); auto exptl = getCategory("exptl");
if (exptl->empty()) if (exptl->empty())
{ {
exptl->emplace({ exptl->emplace({ { "entry_id", mStructureID },
{ "entry_id", mStructureID },
{ "method", mExpMethod }, { "method", mExpMethod },
{ "crystals_number", mRemark200["NUMBER OF CRYSTALS USED"] } }); { "crystals_number", mRemark200["NUMBER OF CRYSTALS USED"] } });
} }
...@@ -5900,8 +6117,7 @@ void PDBFileParser::Parse(std::istream &is, cif::file &result) ...@@ -5900,8 +6117,7 @@ void PDBFileParser::Parse(std::istream &is, cif::file &result)
distance = std::sqrt( distance = std::sqrt(
(x1 - x2) * (x1 - x2) + (x1 - x2) * (x1 - x2) +
(y1 - y2) * (y1 - y2) + (y1 - y2) * (y1 - y2) +
(z1 - z2) * (z1 - z2) (z1 - z2) * (z1 - z2));
);
else if (cif::VERBOSE > 0) else if (cif::VERBOSE > 0)
std::cerr << "Cannot calculate distance for link since one of the atoms is in another dimension\n"; std::cerr << "Cannot calculate distance for link since one of the atoms is in another dimension\n";
} }
...@@ -6288,4 +6504,4 @@ file read(const std::filesystem::path &file) ...@@ -6288,4 +6504,4 @@ file read(const std::filesystem::path &file)
} }
} }
} // namespace pdbx } // namespace cif::pdb
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment