Commit 3315fae8 by Maarten L. Hekkelman

Merge branch 'cif2fasta-develop' into develop-cif2fasta

parents f1ca916d d8c3c3f7
......@@ -537,6 +537,11 @@ write_basic_package_version_file(
VERSION ${PROJECT_VERSION}
COMPATIBILITY AnyNewerVersion)
# In case we're included as sub_directory:
if(NOT PROJECT_IS_TOP_LEVEL)
set(CIFPP_SHARE_DIR ${PROJECT_SOURCE_DIR}/rsrc PARENT_SCOPE)
endif()
if(BUILD_TESTING)
# We're using the older version 2 of Catch2
FetchContent_Declare(
......
......@@ -45,59 +45,59 @@ using cif::to_lower_copy;
namespace error
{
enum pdbErrors
{
residueNotFound = 1000,
invalidDate
};
enum pdbErrors
{
residueNotFound = 1000,
invalidDate
};
namespace detail
namespace detail
{
class pdbCategory : public std::error_category
{
class pdbCategory : public std::error_category
public:
const char *name() const noexcept
{
public:
const char *name() const noexcept
{
return "pdb";
}
return "pdb";
}
std::string message(int value) const
std::string message(int value) const
{
switch (value)
{
switch (value)
{
case residueNotFound:
return "Residue not found";
case residueNotFound:
return "Residue not found";
case invalidDate:
return "Invalid date";
case invalidDate:
return "Invalid date";
default:
return "Error in PDB format";
}
default:
return "Error in PDB format";
}
};
} // namespace detail
}
};
} // namespace detail
std::error_category &pdbCategory()
{
static detail::pdbCategory impl;
return impl;
}
std::error_category &pdbCategory()
{
static detail::pdbCategory impl;
return impl;
}
inline std::error_code make_error_code(pdbErrors e)
{
return std::error_code(static_cast<int>(e), pdbCategory());
}
inline std::error_code make_error_code(pdbErrors e)
{
return std::error_code(static_cast<int>(e), pdbCategory());
}
} // namespace error
namespace std
{
template <>
struct is_error_code_enum<error::pdbErrors>
{
static const bool value = true;
};
template <>
struct is_error_code_enum<error::pdbErrors>
{
static const bool value = true;
};
} // namespace std
......@@ -835,8 +835,6 @@ class PDBFileParser
return result;
}
std::tuple<std::string, int, bool> MapResidue(char chainID, int resSeq, char iCode, const std::string &resName);
// ----------------------------------------------------------------
void PreParseInput(std::istream &is);
......@@ -1106,10 +1104,10 @@ void PDBFileParser::MapChainID2AsymIDS(char chainID, std::vector<std::string> &a
std::sort(asymIds.begin(), asymIds.end(), [](const std::string &a, const std::string &b)
{
int d = static_cast<int>(a.length() - b.length());
if (d == 0)
d = a.compare(b);
return d < 0; });
int d = static_cast<int>(a.length() - b.length());
if (d == 0)
d = a.compare(b);
return d < 0; });
asymIds.erase(std::unique(asymIds.begin(), asymIds.end()), asymIds.end());
}
......@@ -1167,7 +1165,7 @@ void PDBFileParser::PreParseInput(std::istream &is)
std::string value;
if (lookahead.length() > 6)
value = cif::trim_right_copy(lookahead.substr(6));
lookahead.clear();
uint32_t curLineNr = lineNr;
......@@ -1492,8 +1490,7 @@ void PDBFileParser::ParseTitle()
auto cat = getCategory("entry");
// cat->addColumn("id");
cat->emplace({
{ "id", mStructureID } });
cat->emplace({ { "id", mStructureID } });
// OBSLTE
if (mRec->is("OBSLTE"))
......@@ -1512,8 +1509,7 @@ void PDBFileParser::ParseTitle()
std::string value = mRec->vS(32);
for (auto i : cif::split<std::string>(value, " ", true))
{
cat->emplace({
{ "id", "OBSLTE" },
cat->emplace({ { "id", "OBSLTE" },
{ "date", date },
{ "replace_pdb_id", old },
{ "pdb_id", i } });
......@@ -1545,10 +1541,12 @@ void PDBFileParser::ParseTitle()
int caveatID = 1;
while (mRec->is("CAVEAT")) // 1 - 6 Record name "CAVEAT"
{
// clang-format off
getCategory("database_PDB_caveat")->emplace({
{ "id", caveatID++ },
{ "text", std::string{ mRec->vS(20) } } // 20 - 79 String comment Free text giving the reason for the CAVEAT.
});
// clang-format on
GetNextRecord();
}
......@@ -1679,10 +1677,13 @@ void PDBFileParser::ParseTitle()
if (not(keywords.empty() and pdbxKeywords.empty()))
{
// clang-format off
getCategory("struct_keywords")->emplace({
{ "entry_id", mStructureID },
{ "pdbx_keywords", keywords },
{ "text", pdbxKeywords } });
{ "pdbx_keywords", keywords },
{ "text", pdbxKeywords }
});
// clang-format on
}
// EXPDTA
......@@ -1705,10 +1706,13 @@ void PDBFileParser::ParseTitle()
if (expMethod.empty())
continue;
// clang-format off
cat->emplace({
{ "entry_id", mStructureID },
{ "method", expMethod },
{ "crystals_number", ci != crystals.end() ? *ci : "" } });
{ "crystals_number", ci != crystals.end() ? *ci : "" }
});
// clang-format ob
}
GetNextRecord();
......@@ -1739,9 +1743,12 @@ void PDBFileParser::ParseTitle()
value = { mRec->vS(11) };
for (auto author : cif::split<std::string>(value, ",", true))
{
// clang-format off
cat->emplace({
{ "name", pdb2cifAuth(author) },
{ "pdbx_ordinal", n } });
{ "pdbx_ordinal", n }
});
// clang-format on
++n;
}
......@@ -1785,10 +1792,12 @@ void PDBFileParser::ParseTitle()
if (firstRevDat)
{
cat = getCategory("database_2");
cat->emplace({
// clang-format off
getCategory("database_2")->emplace({
{ "database_id", "PDB" },
{ "database_code", modID } });
{ "database_code", modID }
});
// clang-format on
}
GetNextRecord();
......@@ -1801,20 +1810,27 @@ void PDBFileParser::ParseTitle()
sort(revdats.begin(), revdats.end());
for (auto &revdat : revdats)
{
// clang-format off
getCategory("database_PDB_rev")->emplace({
{ "num", revdat.revNum },
{ "date", revdat.date },
{ "date_original", revdat.dateOriginal },
{ "replaces", revdat.replaces },
{ "mod_type", revdat.modType } });
{ "date", revdat.date },
{ "date_original", revdat.dateOriginal },
{ "replaces", revdat.replaces },
{ "mod_type", revdat.modType }
});
// clang-format on
for (auto &type : revdat.types)
{
if (type.empty())
continue;
// clang-format off
getCategory("database_PDB_rev_record")->emplace({
{ "rev_num", revdat.revNum },
{ "type", type } });
{ "type", type }
});
// clang-format on
}
}
//*/
......@@ -1889,6 +1905,7 @@ void PDBFileParser::ParseCitation(const std::string &id)
}
auto cat = getCategory("citation");
// clang-format off
cat->emplace({
{ "id", id },
{ "title", titl },
......@@ -1902,15 +1919,16 @@ void PDBFileParser::ParseCitation(const std::string &id)
{ "journal_id_CSD", csd },
{ "book_publisher", publ },
{ "pdbx_database_id_PubMed", pmid },
{ "pdbx_database_id_DOI", doi } });
{ "pdbx_database_id_DOI", doi }
});
// clang-format on
if (not auth.empty())
{
cat = getCategory("citation_author");
for (auto author : cif::split<std::string>(auth, ",", true))
{
cat->emplace({
{ "citation_id", id },
cat->emplace({ { "citation_id", id },
{ "name", pdb2cifAuth(author) },
{ "ordinal", mCitationAuthorNr } });
......@@ -1923,8 +1941,7 @@ void PDBFileParser::ParseCitation(const std::string &id)
cat = getCategory("citation_editor");
for (auto editor : cif::split<std::string>(edit, ",", true))
{
cat->emplace({
{ "citation_id", id },
cat->emplace({ { "citation_id", id },
{ "name", pdb2cifAuth(editor) },
{ "ordinal", mCitationEditorNr } });
......@@ -1981,8 +1998,7 @@ void PDBFileParser::ParseRemarks()
if (std::regex_match(r, m, rx))
{
auto cat = getCategory("database_2");
cat->emplace({
{ "database_id", m[1].str() },
cat->emplace({ { "database_id", m[1].str() },
{ "database_code", m[2].str() } });
}
......@@ -2050,11 +2066,14 @@ void PDBFileParser::ParseRemarks()
if (desc == "NULL")
desc.clear();
// clang-format off
getCategory("exptl_crystal")->emplace({
{ "id", 1 },
{ "density_Matthews", iequals(density_Matthews, "NULL") ? "" : density_Matthews },
{ "density_percent_sol", iequals(densityPercentSol, "NULL") ? "" : densityPercentSol },
{ "description", desc } });
{ "density_Matthews", iequals(density_Matthews, "NULL") ? "" : density_Matthews },
{ "density_percent_sol", iequals(densityPercentSol, "NULL") ? "" : densityPercentSol },
{ "description", desc }
});
// clang-format on
// now try to parse the conditions
const std::regex rx3(R"(TEMPERATURE +(\d+)K)"), rx4(R"(PH *(?:: *)?(\d+(?:\.\d+)?))") /*, rx5(R"(\b(\d+)C\b)")*/;
......@@ -2081,12 +2100,15 @@ void PDBFileParser::ParseRemarks()
if (not(method.empty() and temp.empty() and ph.empty() and (conditions.empty() or conditions == "NULL")))
{
// clang-format off
getCategory("exptl_crystal_grow")->emplace({
{ "crystal_id", 1 },
{ "method", method },
{ "temp", temp },
{ "pH", ph },
{ "pdbx_details", conditions } });
{ "method", method },
{ "temp", temp },
{ "pH", ph },
{ "pdbx_details", conditions }
});
// clang-format on
}
break;
......@@ -2302,22 +2324,25 @@ void PDBFileParser::ParseRemarks()
std::string distance = vF(63, 71);
// clang-format off
getCategory("pdbx_validate_close_contact")->emplace({
{ "id", std::to_string(++id) },
{ "PDB_model_num", 1 },
{ "auth_atom_id_1", atom1 },
{ "auth_asym_id_1", std::string{ chain1 } },
{ "auth_comp_id_1", res1 },
{ "auth_seq_id_1", seq1 },
{ "PDB_ins_code_1", iCode1 },
{ "label_alt_id_1", alt1 },
{ "auth_atom_id_2", atom2 },
{ "auth_asym_id_2", std::string{ chain2 } },
{ "auth_comp_id_2", res2 },
{ "auth_seq_id_2", seq2 },
{ "PDB_ins_code_2", iCode2 },
{ "label_alt_id_2", alt2 },
{ "dist", distance } });
{ "PDB_model_num", 1 },
{ "auth_atom_id_1", atom1 },
{ "auth_asym_id_1", std::string{ chain1 } },
{ "auth_comp_id_1", res1 },
{ "auth_seq_id_1", seq1 },
{ "PDB_ins_code_1", iCode1 },
{ "label_alt_id_1", alt1 },
{ "auth_atom_id_2", atom2 },
{ "auth_asym_id_2", std::string{ chain2 } },
{ "auth_comp_id_2", res2 },
{ "auth_seq_id_2", seq2 },
{ "PDB_ins_code_2", iCode2 },
{ "label_alt_id_2", alt2 },
{ "dist", distance }
});
// clang-format on
}
break;
}
......@@ -2354,24 +2379,27 @@ void PDBFileParser::ParseRemarks()
std::string distance = vF(63, 71);
// clang-format off
getCategory("pdbx_validate_symm_contact")->emplace({
{ "id", std::to_string(++id) },
{ "PDB_model_num", 1 },
{ "auth_atom_id_1", atom1 },
{ "auth_asym_id_1", std::string{ chain1 } },
{ "auth_comp_id_1", res1 },
{ "auth_seq_id_1", seq1 },
// { "PDB_ins_code_1", "" },
// { "label_alt_id_1", "" },
{ "PDB_model_num", 1 },
{ "auth_atom_id_1", atom1 },
{ "auth_asym_id_1", std::string{ chain1 } },
{ "auth_comp_id_1", res1 },
{ "auth_seq_id_1", seq1 },
// { "PDB_ins_code_1", "" },
// { "label_alt_id_1", "" },
{ "site_symmetry_1", "1_555" },
{ "auth_atom_id_2", atom2 },
{ "auth_asym_id_2", std::string{ chain2 } },
{ "auth_comp_id_2", res2 },
{ "auth_seq_id_2", seq2 },
// { "PDB_ins_code_2", "" },
// { "label_alt_id_2", "" },
// { "PDB_ins_code_2", "" },
// { "label_alt_id_2", "" },
{ "site_symmetry_2", symop },
{ "dist", distance } });
{ "dist", distance }
});
// clang-format on
}
break;
}
......@@ -2411,22 +2439,25 @@ void PDBFileParser::ParseRemarks()
if (iCode2 == " ")
iCode2.clear();
// clang-format off
getCategory("pdbx_validate_rmsd_bond")->emplace({
{ "id", std::to_string(++id) },
{ "PDB_model_num", model ? model : 1 },
{ "auth_atom_id_1", atm1 },
{ "auth_asym_id_1", chainID1 },
{ "auth_comp_id_1", resNam1 },
{ "auth_seq_id_1", seqNum1 },
{ "PDB_ins_code_1", iCode1 },
{ "label_alt_id_1", alt1 },
{ "auth_atom_id_2", atm2 },
{ "auth_asym_id_2", chainID2 },
{ "auth_comp_id_2", resNam2 },
{ "auth_seq_id_2", seqNum2 },
{ "PDB_ins_code_2", iCode2 },
{ "label_alt_id_2", alt2 },
{ "bond_deviation", deviation } });
{ "PDB_model_num", model ? model : 1 },
{ "auth_atom_id_1", atm1 },
{ "auth_asym_id_1", chainID1 },
{ "auth_comp_id_1", resNam1 },
{ "auth_seq_id_1", seqNum1 },
{ "PDB_ins_code_1", iCode1 },
{ "label_alt_id_1", alt1 },
{ "auth_atom_id_2", atm2 },
{ "auth_asym_id_2", chainID2 },
{ "auth_comp_id_2", resNam2 },
{ "auth_seq_id_2", seqNum2 },
{ "PDB_ins_code_2", iCode2 },
{ "label_alt_id_2", alt2 },
{ "bond_deviation", deviation }
});
// clang-format on
}
break;
......@@ -2458,25 +2489,28 @@ void PDBFileParser::ParseRemarks()
if (deviation == "*****")
deviation.clear();
// clang-format off
getCategory("pdbx_validate_rmsd_angle")->emplace({
{ "id", std::to_string(++id) },
{ "PDB_model_num", model ? model : 1 },
{ "auth_atom_id_1", atoms[0] },
{ "auth_asym_id_1", chainID },
{ "auth_comp_id_1", resNam },
{ "auth_seq_id_1", seqNum },
{ "PDB_ins_code_1", iCode },
{ "auth_atom_id_2", atoms[1] },
{ "auth_asym_id_2", chainID },
{ "auth_comp_id_2", resNam },
{ "auth_seq_id_2", seqNum },
{ "PDB_ins_code_2", iCode },
{ "auth_atom_id_3", atoms[2] },
{ "auth_asym_id_3", chainID },
{ "auth_comp_id_3", resNam },
{ "auth_seq_id_3", seqNum },
{ "PDB_ins_code_3", iCode },
{ "angle_deviation", deviation } });
{ "PDB_model_num", model ? model : 1 },
{ "auth_atom_id_1", atoms[0] },
{ "auth_asym_id_1", chainID },
{ "auth_comp_id_1", resNam },
{ "auth_seq_id_1", seqNum },
{ "PDB_ins_code_1", iCode },
{ "auth_atom_id_2", atoms[1] },
{ "auth_asym_id_2", chainID },
{ "auth_comp_id_2", resNam },
{ "auth_seq_id_2", seqNum },
{ "PDB_ins_code_2", iCode },
{ "auth_atom_id_3", atoms[2] },
{ "auth_asym_id_3", chainID },
{ "auth_comp_id_3", resNam },
{ "auth_seq_id_3", seqNum },
{ "PDB_ins_code_3", iCode },
{ "angle_deviation", deviation }
});
// clang-format on
}
break;
......@@ -2505,6 +2539,7 @@ void PDBFileParser::ParseRemarks()
std::string psi = vF(27, 35);
std::string phi = vF(37, 45);
// clang-format off
getCategory("pdbx_validate_torsion")->emplace({
{ "id", std::to_string(++id) },
{ "PDB_model_num", model ? model : 1 },
......@@ -2513,7 +2548,9 @@ void PDBFileParser::ParseRemarks()
{ "auth_seq_id", seqNum },
{ "PDB_ins_code", iCode },
{ "phi", phi },
{ "psi", psi } });
{ "psi", psi }
});
// clang-format on
}
break;
......@@ -2544,18 +2581,21 @@ void PDBFileParser::ParseRemarks()
std::string omega = vF(54, 60);
// clang-format off
getCategory("pdbx_validate_peptide_omega")->emplace({
{ "id", std::to_string(++id) },
{ "PDB_model_num", model ? model : 1 },
{ "auth_comp_id_1", resNam1 },
{ "auth_asym_id_1", chainID1 },
{ "auth_seq_id_1", seqNum1 },
{ "PDB_ins_code_1", iCode1 },
{ "auth_comp_id_2", resNam2 },
{ "auth_asym_id_2", chainID2 },
{ "auth_seq_id_2", seqNum2 },
{ "PDB_ins_code_2", iCode2 },
{ "omega", omega } });
{ "PDB_model_num", model ? model : 1 },
{ "auth_comp_id_1", resNam1 },
{ "auth_asym_id_1", chainID1 },
{ "auth_seq_id_1", seqNum1 },
{ "PDB_ins_code_1", iCode1 },
{ "auth_comp_id_2", resNam2 },
{ "auth_asym_id_2", chainID2 },
{ "auth_seq_id_2", seqNum2 },
{ "PDB_ins_code_2", iCode2 },
{ "omega", omega }
});
// clang-format on
}
break;
......@@ -2578,15 +2618,18 @@ void PDBFileParser::ParseRemarks()
std::string rmsd = vF(32, 36);
std::string type = vS(41);
// clang-format off
getCategory("pdbx_validate_planes")->emplace({
{ "id", std::to_string(++id) },
{ "PDB_model_num", model ? model : 1 },
{ "auth_comp_id", resNam },
{ "auth_asym_id", chainID },
{ "auth_seq_id", seqNum },
{ "PDB_ins_code", iCode },
{ "rmsd", rmsd },
{ "type", type } });
{ "PDB_model_num", model ? model : 1 },
{ "auth_comp_id", resNam },
{ "auth_asym_id", chainID },
{ "auth_seq_id", seqNum },
{ "PDB_ins_code", iCode },
{ "rmsd", rmsd },
{ "type", type }
});
// clang-format on
}
break;
......@@ -2666,14 +2709,17 @@ void PDBFileParser::ParseRemarks()
throw std::runtime_error("Invalid REMARK 800, no SITE record for id " + id);
// next record, store what we have
// clang-format off
getCategory("struct_site")->emplace({
{ "id", id },
{ "details", desc },
{ "pdbx_auth_asym_id", pdbxAuthAsymID },
{ "pdbx_auth_comp_id", pdbxAuthCompID },
{ "pdbx_auth_seq_id", pdbxAuthSeqID },
{ "pdbx_num_residues", site->vI(16, 17) },
{ "pdbx_evidence_code", evidence } });
{ "details", desc },
{ "pdbx_auth_asym_id", pdbxAuthAsymID },
{ "pdbx_auth_comp_id", pdbxAuthCompID },
{ "pdbx_auth_seq_id", pdbxAuthSeqID },
{ "pdbx_num_residues", site->vI(16, 17) },
{ "pdbx_evidence_code", evidence }
});
// clang-format on
};
for (; mRec->is("REMARK 800"); GetNextRecord())
......@@ -2787,9 +2833,12 @@ void PDBFileParser::ParseRemarks()
GetNextRecord();
}
// clang-format off
getCategory("pdbx_database_remark")->emplace({
{ "id", remarkNr },
{ "text", s.str() } });
{ "text", s.str() }
});
// clang-format on
break;
}
......@@ -2803,11 +2852,14 @@ void PDBFileParser::ParseRemarks()
if (not(compoundDetails.empty() and sequenceDetails.empty() and sourceDetails.empty()))
{
// clang-format off
getCategory("pdbx_entry_details")->emplace({
{ "entry_id", mStructureID },
{ "compound_details", compoundDetails },
{ "sequence_details", sequenceDetails },
{ "source_details", sourceDetails } });
{ "compound_details", compoundDetails },
{ "sequence_details", sequenceDetails },
{ "source_details", sourceDetails }
});
// clang-format on
}
// store remark 200 info (special case)
......@@ -2884,11 +2936,14 @@ void PDBFileParser::ParseRemark200()
if (mRemark200[sw.b].empty())
continue;
// clang-format off
getCategory("software")->emplace({
{ "name", mRemark200[sw.b] },
{ "classification", sw.a },
{ "version", "." },
{ "pdbx_ordinal", mNextSoftwareOrd++ } });
{ "classification", sw.a },
{ "version", "." },
{ "pdbx_ordinal", mNextSoftwareOrd++ }
});
// clang-format on
}
std::string scatteringType;
......@@ -2908,11 +2963,13 @@ void PDBFileParser::ParseRemark200()
if (cif::ends_with(ambientTemp, "K"))
ambientTemp.erase(ambientTemp.length() - 1, 1);
// clang-format off
getCategory("diffrn")->emplace({
{ "id", diffrnNr },
{ "ambient_temp", ambientTemp },
// { "ambient_temp_details", seqID },
// { "ambient_temp_details", seqID },
{ "crystal_id", 1 } });
// clang-format on
std::string collectionDate;
std::error_code ec;
......@@ -2927,21 +2984,27 @@ void PDBFileParser::ParseRemark200()
collectionDate.clear();
}
// clang-format off
getCategory("diffrn_detector")->emplace({
{ "diffrn_id", diffrnNr },
{ "detector", rm200("DETECTOR TYPE", diffrnNr) },
{ "type", rm200("DETECTOR MANUFACTURER", diffrnNr) },
{ "pdbx_collection_date", collectionDate },
{ "details", rm200("OPTICS", diffrnNr) } });
{ "detector", rm200("DETECTOR TYPE", diffrnNr) },
{ "type", rm200("DETECTOR MANUFACTURER", diffrnNr) },
{ "pdbx_collection_date", collectionDate },
{ "details", rm200("OPTICS", diffrnNr) }
});
// clang-format on
if (inRM200({ "MONOCHROMATIC OR LAUE (M/L)", "MONOCHROMATOR", "DIFFRACTION PROTOCOL" }) or not scatteringType.empty())
// clang-format off
getCategory("diffrn_radiation")->emplace({
{ "diffrn_id", diffrnNr },
{ "wavelength_id", 1 },
{ "pdbx_monochromatic_or_laue_m_l", rm200("MONOCHROMATIC OR LAUE (M/L)", diffrnNr) },
{ "monochromator", rm200("MONOCHROMATOR", diffrnNr) },
{ "pdbx_diffrn_protocol", rm200("DIFFRACTION PROTOCOL", diffrnNr) },
{ "pdbx_scattering_type", scatteringType } });
{ "wavelength_id", 1 },
{ "pdbx_monochromatic_or_laue_m_l", rm200("MONOCHROMATIC OR LAUE (M/L)", diffrnNr) },
{ "monochromator", rm200("MONOCHROMATOR", diffrnNr) },
{ "pdbx_diffrn_protocol", rm200("DIFFRACTION PROTOCOL", diffrnNr) },
{ "pdbx_scattering_type", scatteringType }
});
// clang-format on
std::string wl = rm200("WAVELENGTH OR RANGE (A)", diffrnNr);
auto wavelengths = cif::split<std::string>(wl, ", -", true);
......@@ -2951,6 +3014,7 @@ void PDBFileParser::ParseRemark200()
std::string source;
if (rm200("SYNCHROTRON (Y/N)", diffrnNr) == "Y")
{
// clang-format off
getCategory("diffrn_source")->emplace({
{ "diffrn_id", diffrnNr },
{ "source", "SYNCHROTRON" },
......@@ -2961,9 +3025,11 @@ void PDBFileParser::ParseRemark200()
{ "pdbx_wavelength", wavelengths.size() == 1 ? wavelengths[0] : "" },
{ "pdbx_wavelength_list", wavelengths.size() == 1 ? "" : cif::join(wavelengths, ", ") },
});
// clang-format on
}
else if (inRM200({ "X-RAY GENERATOR MODEL", "RADIATION SOURCE", "BEAMLINE", "WAVELENGTH OR RANGE (A)" }))
{
// clang-format off
getCategory("diffrn_source")->emplace({
{ "diffrn_id", diffrnNr },
{ "source", rm200("RADIATION SOURCE", diffrnNr) },
......@@ -2972,6 +3038,7 @@ void PDBFileParser::ParseRemark200()
{ "pdbx_wavelength", wavelengths.size() == 1 ? wavelengths[0] : "" },
{ "pdbx_wavelength_list", wavelengths.size() == 1 ? "" : cif::join(wavelengths, ", ") },
});
// clang-format on
}
}
......@@ -2981,10 +3048,13 @@ void PDBFileParser::ParseRemark200()
if (cif::ends_with(wl, "A"))
wl.erase(wl.length() - 1, 1);
// clang-format off
getCategory("diffrn_radiation_wavelength")->emplace({
{ "id", wavelengthNr++ },
{ "wavelength", wl.empty() ? "." : wl },
{ "wt", "1.0" } });
{ "wavelength", wl.empty() ? "." : wl },
{ "wt", "1.0" }
});
// clang-format on
}
if (inRM200({ "METHOD USED TO DETERMINE THE STRUCTURE", "STARTING MODEL" }))
......@@ -2996,6 +3066,7 @@ void PDBFileParser::ParseRemark200()
if (resolution.empty())
resolution = ".";
// clang-format off
cat->emplace({
{ "pdbx_method_to_determine_struct", mRemark200["METHOD USED TO DETERMINE THE STRUCTURE"] },
{ "pdbx_starting_model", mRemark200["STARTING MODEL"] },
......@@ -3003,11 +3074,13 @@ void PDBFileParser::ParseRemark200()
{ "pdbx_diffrn_id", 1 },
{ "pdbx_refine_id", mExpMethod },
{ "entry_id", mStructureID } });
// clang-format on
}
if (inRM200({ "REJECTION CRITERIA (SIGMA(I))", "RESOLUTION RANGE HIGH (A)", "RESOLUTION RANGE LOW (A)", "NUMBER OF UNIQUE REFLECTIONS", "COMPLETENESS FOR RANGE (%)", "<I/SIGMA(I)> FOR THE DATA SET", "R MERGE (I)", "R SYM (I)", "DATA REDUNDANCY" }))
{
auto cat = getCategory("reflns");
// clang-format off
cat->emplace({
{ "entry_id", mStructureID },
{ "observed_criterion_sigma_I", mRemark200["REJECTION CRITERIA (SIGMA(I))"] },
......@@ -3022,20 +3095,24 @@ void PDBFileParser::ParseRemark200()
{ "pdbx_ordinal", 1 },
{ "pdbx_diffrn_id", 1 }
});
// clang-format on
}
if (inRM200({ "HIGHEST RESOLUTION SHELL, RANGE HIGH (A)" })) // that one field is mandatory...
{
// clang-format off
getCategory("reflns_shell")->emplace({
{ "d_res_high", mRemark200["HIGHEST RESOLUTION SHELL, RANGE HIGH (A)"] },
{ "d_res_low", mRemark200["HIGHEST RESOLUTION SHELL, RANGE LOW (A)"] },
{ "percent_possible_all", mRemark200["COMPLETENESS FOR SHELL (%)"] },
{ "Rmerge_I_obs", mRemark200["R MERGE FOR SHELL (I)"] },
{ "pdbx_Rsym_value", mRemark200["R SYM FOR SHELL (I)"] },
{ "meanI_over_sigI_obs", mRemark200["<I/SIGMA(I)> FOR SHELL"] },
{ "pdbx_redundancy", mRemark200["DATA REDUNDANCY IN SHELL"] },
{ "pdbx_ordinal", 1 },
{ "pdbx_diffrn_id", 1 } });
{ "d_res_low", mRemark200["HIGHEST RESOLUTION SHELL, RANGE LOW (A)"] },
{ "percent_possible_all", mRemark200["COMPLETENESS FOR SHELL (%)"] },
{ "Rmerge_I_obs", mRemark200["R MERGE FOR SHELL (I)"] },
{ "pdbx_Rsym_value", mRemark200["R SYM FOR SHELL (I)"] },
{ "meanI_over_sigI_obs", mRemark200["<I/SIGMA(I)> FOR SHELL"] },
{ "pdbx_redundancy", mRemark200["DATA REDUNDANCY IN SHELL"] },
{ "pdbx_ordinal", 1 },
{ "pdbx_diffrn_id", 1 }
});
// clang-format on
}
else if (inRM200({ "HIGHEST RESOLUTION SHELL, RANGE LOW (A)", "COMPLETENESS FOR SHELL (%)",
"R MERGE FOR SHELL (I)", "R SYM FOR SHELL (I)", "<I/SIGMA(I)> FOR SHELL", "DATA REDUNDANCY IN SHELL" }))
......@@ -3220,12 +3297,14 @@ void PDBFileParser::ParseRemark350()
else
details = "author_and_software_defined_assembly";
// clang-format off
getCategory("pdbx_struct_assembly")->emplace({
{ "id", biomolecule },
{ "details", details },
{ "method_details", values["SOFTWARE USED"] },
{ "oligomeric_details", oligomer },
{ "oligomeric_count", count > 0 ? std::to_string(count) : "" } });
{ "details", details },
{ "method_details", values["SOFTWARE USED"] },
{ "oligomeric_details", oligomer },
{ "oligomeric_count", count > 0 ? std::to_string(count) : "" }
});
auto cat = getCategory("pdbx_struct_assembly_prop");
......@@ -3233,19 +3312,23 @@ void PDBFileParser::ParseRemark350()
cat->emplace({
{ "biol_id", biomolecule },
{ "type", "ABSA (A^2)" },
{ "value", values["TOTAL BURIED SURFACE AREA"] } });
{ "value", values["TOTAL BURIED SURFACE AREA"] }
});
if (not values["CHANGE IN SOLVENT FREE ENERGY"].empty())
cat->emplace({
{ "biol_id", biomolecule },
{ "type", "MORE" },
{ "value", values["CHANGE IN SOLVENT FREE ENERGY"] } });
{ "value", values["CHANGE IN SOLVENT FREE ENERGY"] }
});
if (not values["SURFACE AREA OF THE COMPLEX"].empty())
cat->emplace({
{ "biol_id", biomolecule },
{ "type", "SSA (A^2)" },
{ "value", values["SURFACE AREA OF THE COMPLEX"] } });
{ "value", values["SURFACE AREA OF THE COMPLEX"] }
});
// clang-format on
values.clear();
}
......@@ -3254,16 +3337,17 @@ void PDBFileParser::ParseRemark350()
// if (type == "identity operation")
// {
// }
// else
try
{
// clang-format off
getCategory("pdbx_struct_oper_list")->emplace({
{ "id", operID },
{ "type", type },
// { "name", "" },
// { "symmetryOperation", "" },
// { "symmetryOperation", "" },
{ "matrix[1][1]", cif::format("%12.10f", mat[0]).str() },
{ "matrix[1][2]", cif::format("%12.10f", mat[1]).str() },
{ "matrix[1][3]", cif::format("%12.10f", mat[2]).str() },
......@@ -3275,7 +3359,9 @@ void PDBFileParser::ParseRemark350()
{ "matrix[3][1]", cif::format("%12.10f", mat[6]).str() },
{ "matrix[3][2]", cif::format("%12.10f", mat[7]).str() },
{ "matrix[3][3]", cif::format("%12.10f", mat[8]).str() },
{ "vector[3]", cif::format("%12.10f", vec[2]).str() } });
{ "vector[3]", cif::format("%12.10f", vec[2]).str() }
});
// clang-format on
}
catch (duplicate_key_error &ex)
{
......@@ -3291,10 +3377,13 @@ void PDBFileParser::ParseRemark350()
if (not(vec.empty() and mat.empty()))
throw std::runtime_error("Invalid REMARK 350");
// clang-format off
getCategory("pdbx_struct_assembly_gen")->emplace({
{ "assembly_id", biomolecule },
{ "oper_expression", cif::join(operExpression, ",") },
{ "asym_id_list", cif::join(asymIdList, ",") } });
{ "oper_expression", cif::join(operExpression, ",") },
{ "asym_id_list", cif::join(asymIdList, ",") }
});
// clang-format on
biomolecule = stoi(m[1].str());
asymIdList.clear();
......@@ -3308,10 +3397,13 @@ void PDBFileParser::ParseRemark350()
if (not operExpression.empty())
{
// clang-format off
getCategory("pdbx_struct_assembly_gen")->emplace({
{ "assembly_id", biomolecule },
{ "oper_expression", cif::join(operExpression, ",") },
{ "asym_id_list", cif::join(asymIdList, ",") } });
{ "oper_expression", cif::join(operExpression, ",") },
{ "asym_id_list", cif::join(asymIdList, ",") }
});
// clang-format on
}
mRec = saved;
......@@ -3562,23 +3654,23 @@ void PDBFileParser::ConstructEntities()
// now that we're iterating atoms anyway, clean up the mUnobs array
mUnobs.erase(remove_if(mUnobs.begin(), mUnobs.end(), [=](UNOBS &a)
{
bool result = false;
if (modelNr == a.modelNr and
resName == a.res and
chainID == a.chain and
resSeq == a.seq and
iCode == a.iCode)
{
auto i = find(a.atoms.begin(), a.atoms.end(), name);
if (i != a.atoms.end())
{
a.atoms.erase(i);
result = a.atoms.empty();
}
}
return result; }),
bool result = false;
if (modelNr == a.modelNr and
resName == a.res and
chainID == a.chain and
resSeq == a.seq and
iCode == a.iCode)
{
auto i = find(a.atoms.begin(), a.atoms.end(), name);
if (i != a.atoms.end())
{
a.atoms.erase(i);
result = a.atoms.empty();
}
}
return result; }),
mUnobs.end());
continue;
......@@ -3797,6 +3889,7 @@ void PDBFileParser::ConstructEntities()
mAsymID2EntityID[asymID] = entityID;
// clang-format off
getCategory("struct_asym")->emplace({
{ "id", asymID },
{ "pdbx_blank_PDB_chainid_flag", chain.mDbref.chainID == ' ' ? "Y" : "N" },
......@@ -3804,6 +3897,7 @@ void PDBFileParser::ConstructEntities()
{ "entity_id", entityID },
// details
});
// clang-format on
int seqNr = 1;
for (auto &res : chain.mSeqres)
......@@ -3818,7 +3912,7 @@ void PDBFileParser::ConstructEntities()
for (std::string monID : monIds)
{
std::string authMonID, authSeqNum, authInsCode{'.'};
std::string authMonID, authSeqNum, authInsCode{ '.' };
if (res.mSeen)
{
......@@ -3827,6 +3921,7 @@ void PDBFileParser::ConstructEntities()
if (res.mIcode != ' ' and res.mIcode != 0)
authInsCode = std::string{ res.mIcode };
// clang-format off
cat->emplace({
{ "asym_id", asymID },
{ "entity_id", mMolID2EntityID[chain.mMolID] },
......@@ -3839,13 +3934,16 @@ void PDBFileParser::ConstructEntities()
{ "auth_mon_id", authMonID },
{ "pdb_strand_id", std::string{ chain.mDbref.chainID } },
{ "pdb_ins_code", authInsCode },
{ "hetero", res.mAlts.empty() ? "n" : "y" } });
{ "hetero", res.mAlts.empty() ? "n" : "y" }
});
// clang-format on
}
else
{
if (res.mIcode != ' ' and res.mIcode != 0)
authInsCode = std::string{ res.mIcode } + "A";
// clang-format off
cat->emplace({
{ "asym_id", asymID },
{ "entity_id", mMolID2EntityID[chain.mMolID] },
......@@ -3858,7 +3956,9 @@ void PDBFileParser::ConstructEntities()
{ "auth_mon_id", "." },
{ "pdb_strand_id", std::string{ chain.mDbref.chainID } },
{ "pdb_ins_code", authInsCode },
{ "hetero", res.mAlts.empty() ? "n" : "y" } });
{ "hetero", res.mAlts.empty() ? "n" : "y" }
});
// clang-format on
}
}
}
......@@ -3877,49 +3977,55 @@ void PDBFileParser::ConstructEntities()
{
srcMethod = "syn";
// clang-format off
getCategory("pdbx_entity_src_syn")->emplace({
{ "entity_id", mMolID2EntityID[cmp.mMolID] },
{ "pdbx_src_id", structRefID },
{ "organism_scientific", cmp.mSource["ORGANISM_SCIENTIFIC"] },
{ "ncbi_taxonomy_id", cmp.mSource["ORGANISM_TAXID"] },
});
// clang-format on
}
else if (cmp.mInfo["ENGINEERED"] == "YES" or
not cmp.mSource["EXPRESSION_SYSTEM"].empty())
{
srcMethod = "man";
// clang-format off
getCategory("entity_src_gen")->emplace({
{ "entity_id", mMolID2EntityID[cmp.mMolID] },
{ "pdbx_src_id", structRefID },
{ "gene_src_common_name", cmp.mSource["ORGANISM_COMMON"] },
{ "pdbx_gene_src_gene", cmp.mSource["GENE"] },
{ "gene_src_strain", cmp.mSource["STRAIN"] },
{ "gene_src_tissue", cmp.mSource["TISSUE"] },
{ "gene_src_tissue_fraction", cmp.mSource["TISSUE_FRACTION"] },
{ "pdbx_gene_src_cell_line", cmp.mSource["CELL_LINE"] },
{ "pdbx_gene_src_organelle", cmp.mSource["ORGANELLE"] },
{ "pdbx_gene_src_cell", cmp.mSource["CELL"] },
{ "pdbx_gene_src_cellular_location", cmp.mSource["CELLULAR_LOCATION"] },
{ "host_org_common_name", cmp.mSource["EXPRESSION_SYSTEM_COMMON"] },
{ "pdbx_gene_src_scientific_name", cmp.mSource["ORGANISM_SCIENTIFIC"] },
{ "pdbx_gene_src_ncbi_taxonomy_id", cmp.mSource["ORGANISM_TAXID"] },
{ "pdbx_host_org_scientific_name", cmp.mSource["EXPRESSION_SYSTEM"] },
{ "pdbx_host_org_ncbi_taxonomy_id", cmp.mSource["EXPRESSION_SYSTEM_TAXID"] },
{ "pdbx_host_org_strain", cmp.mSource["EXPRESSION_SYSTEM_STRAIN"] },
{ "pdbx_host_org_variant", cmp.mSource["EXPRESSION_SYSTEM_VARIANT"] },
{ "pdbx_host_org_cell_line", cmp.mSource["EXPRESSION_SYSTEM_CELL_LINE"] },
{ "pdbx_host_org_cellular_location", cmp.mSource["EXPRESSION_SYSTEM_CELLULAR_LOCATION"] },
{ "pdbx_host_org_vector_type", cmp.mSource["EXPRESSION_SYSTEM_VECTOR_TYPE"] },
{ "pdbx_host_org_vector", cmp.mSource["EXPRESSION_SYSTEM_VECTOR"] },
{ "pdbx_host_org_gene", cmp.mSource["EXPRESSION_SYSTEM_GENE"] },
{ "plasmid_name", cmp.mSource["EXPRESSION_SYSTEM_PLASMID"] },
{ "pdbx_description", cmp.mSource["OTHER_DETAILS"] } });
{ "pdbx_src_id", structRefID },
{ "gene_src_common_name", cmp.mSource["ORGANISM_COMMON"] },
{ "pdbx_gene_src_gene", cmp.mSource["GENE"] },
{ "gene_src_strain", cmp.mSource["STRAIN"] },
{ "gene_src_tissue", cmp.mSource["TISSUE"] },
{ "gene_src_tissue_fraction", cmp.mSource["TISSUE_FRACTION"] },
{ "pdbx_gene_src_cell_line", cmp.mSource["CELL_LINE"] },
{ "pdbx_gene_src_organelle", cmp.mSource["ORGANELLE"] },
{ "pdbx_gene_src_cell", cmp.mSource["CELL"] },
{ "pdbx_gene_src_cellular_location", cmp.mSource["CELLULAR_LOCATION"] },
{ "host_org_common_name", cmp.mSource["EXPRESSION_SYSTEM_COMMON"] },
{ "pdbx_gene_src_scientific_name", cmp.mSource["ORGANISM_SCIENTIFIC"] },
{ "pdbx_gene_src_ncbi_taxonomy_id", cmp.mSource["ORGANISM_TAXID"] },
{ "pdbx_host_org_scientific_name", cmp.mSource["EXPRESSION_SYSTEM"] },
{ "pdbx_host_org_ncbi_taxonomy_id", cmp.mSource["EXPRESSION_SYSTEM_TAXID"] },
{ "pdbx_host_org_strain", cmp.mSource["EXPRESSION_SYSTEM_STRAIN"] },
{ "pdbx_host_org_variant", cmp.mSource["EXPRESSION_SYSTEM_VARIANT"] },
{ "pdbx_host_org_cell_line", cmp.mSource["EXPRESSION_SYSTEM_CELL_LINE"] },
{ "pdbx_host_org_cellular_location", cmp.mSource["EXPRESSION_SYSTEM_CELLULAR_LOCATION"] },
{ "pdbx_host_org_vector_type", cmp.mSource["EXPRESSION_SYSTEM_VECTOR_TYPE"] },
{ "pdbx_host_org_vector", cmp.mSource["EXPRESSION_SYSTEM_VECTOR"] },
{ "pdbx_host_org_gene", cmp.mSource["EXPRESSION_SYSTEM_GENE"] },
{ "plasmid_name", cmp.mSource["EXPRESSION_SYSTEM_PLASMID"] },
{ "pdbx_description", cmp.mSource["OTHER_DETAILS"] }
});
// clang-format on
}
else if (not cmp.mSource["ORGANISM_SCIENTIFIC"].empty())
{
srcMethod = "nat";
// clang-format off
getCategory("entity_src_nat")->emplace({
{ "entity_id", mMolID2EntityID[cmp.mMolID] },
{ "pdbx_src_id", structRefID },
......@@ -3932,25 +4038,32 @@ void PDBFileParser::ConstructEntities()
{ "pdbx_plasmid_name", cmp.mSource["PLASMID"] },
{ "pdbx_organ", cmp.mSource["ORGAN"] },
});
// clang-format on
}
// clang-format off
getCategory("entity")->emplace({
{ "id", mMolID2EntityID[cmp.mMolID] },
{ "type", "polymer" },
{ "src_method", srcMethod },
{ "pdbx_description", cmp.mInfo["MOLECULE"] },
// { "pdbx_formula_weight", },
// { "pdbx_formula_weight", },
{ "pdbx_number_of_molecules", cmp.mChains.size() },
{ "details", cmp.mInfo["OTHER_DETAILS"] },
{ "pdbx_mutation", cmp.mInfo["MUTATION"] },
{ "pdbx_fragment", cmp.mInfo["FRAGMENT"] },
{ "pdbx_ec", cmp.mInfo["EC"] } });
{ "pdbx_ec", cmp.mInfo["EC"] }
});
// clang-format on
if (not cmp.mInfo["SYNONYM"].empty())
{
// clang-format off
getCategory("entity_name_com")->emplace({
{ "entity_id", mMolID2EntityID[cmp.mMolID] },
{ "name", cmp.mInfo["SYNONYM"] } });
{ "name", cmp.mInfo["SYNONYM"] }
});
// clang-format on
}
std::string desc = cmp.mInfo["MOLECULE"];
......@@ -3969,14 +4082,16 @@ void PDBFileParser::ConstructEntities()
if (ci != mChains.end() and not ci->mDbref.dbIdCode.empty())
{
// clang-format off
getCategory("struct_ref")->emplace({
{ "id", structRefID },
{ "entity_id", mMolID2EntityID[cmp.mMolID] },
{ "db_name", ci->mDbref.database },
{ "db_code", ci->mDbref.dbIdCode },
{ "pdbx_db_accession", ci->mDbref.dbAccession },
// { "pdbx_align_begin", ci->mDbref.dbSeqBegin }
// { "pdbx_align_begin", ci->mDbref.dbSeqBegin }
});
// clang-format on
}
bool nstdMonomer = false, nonstandardLinkage = false;
......@@ -3999,7 +4114,9 @@ void PDBFileParser::ConstructEntities()
if (not dbref.database.empty())
{
auto insToStr = [](char i) -> std::string
{ return i == ' ' or not isprint(i) ? "" : std::string{ i }; };
{
return i == ' ' or not isprint(i) ? "" : std::string{ i };
};
auto &pdbxPolySeqScheme = *getCategory("pdbx_poly_seq_scheme");
......@@ -4007,20 +4124,21 @@ void PDBFileParser::ConstructEntities()
try
{
seqAlignBeg = pdbxPolySeqScheme.find1<int>(key("pdb_strand_id") == std::string { dbref.chainID } and
key("pdb_seq_num") == dbref.seqBegin and
(key("pdb_ins_code") == insToStr(dbref.insertBegin) or key("pdb_ins_code") == cif::null),
"seq_id");
seqAlignEnd = pdbxPolySeqScheme.find1<int>(key("pdb_strand_id") == std::string { dbref.chainID } and
key("pdb_seq_num") == dbref.seqEnd and
(key("pdb_ins_code") == insToStr(dbref.insertEnd) or key("pdb_ins_code") == cif::null),
"seq_id");
seqAlignBeg = pdbxPolySeqScheme.find1<int>(key("pdb_strand_id") == std::string{ dbref.chainID } and
key("pdb_seq_num") == dbref.seqBegin and
(key("pdb_ins_code") == insToStr(dbref.insertBegin) or key("pdb_ins_code") == cif::null),
"seq_id");
seqAlignEnd = pdbxPolySeqScheme.find1<int>(key("pdb_strand_id") == std::string{ dbref.chainID } and
key("pdb_seq_num") == dbref.seqEnd and
(key("pdb_ins_code") == insToStr(dbref.insertEnd) or key("pdb_ins_code") == cif::null),
"seq_id");
}
catch (...)
{
}
// clang-format off
getCategory("struct_ref_seq")->emplace({
{ "align_id", structRefSeqAlignID },
{ "ref_id", structRefID },
......@@ -4036,7 +4154,9 @@ void PDBFileParser::ConstructEntities()
{ "db_align_end", dbref.dbSeqEnd },
{ "pdbx_db_align_end_ins_code", insToStr(dbref.dbinsEnd) },
{ "pdbx_auth_seq_align_beg", dbref.seqBegin },
{ "pdbx_auth_seq_align_end", dbref.seqEnd } });
{ "pdbx_auth_seq_align_end", dbref.seqEnd }
});
// clang-format on
// write the struct_ref_seq_dif
for (auto &seqadv : mSeqadvs)
......@@ -4058,20 +4178,23 @@ void PDBFileParser::ConstructEntities()
seqNum = std::to_string(labelSeq);
// clang-format off
getCategory("struct_ref_seq_dif")->emplace({
{ "align_id", structRefSeqAlignID },
{ "pdbx_PDB_id_code", dbref.PDBIDCode },
{ "mon_id", seqadv.resName },
{ "pdbx_pdb_strand_id", seqadv.chainID },
{ "seq_num", seqNum },
{ "pdbx_pdb_ins_code", seqadv.iCode == ' ' ? std::string{} : std::string{ seqadv.iCode } },
{ "pdbx_seq_db_name", seqadv.database },
{ "pdbx_seq_db_accession_code", seqadv.dbAccession },
{ "db_mon_id", seqadv.dbRes },
{ "pdbx_seq_db_seq_num", seqadv.dbSeq },
{ "details", seqadv.conflict },
{ "pdbx_auth_seq_num", seqadv.seqNum },
{ "pdbx_ordinal", ++mPdbxDifOrdinal } });
{ "pdbx_PDB_id_code", dbref.PDBIDCode },
{ "mon_id", seqadv.resName },
{ "pdbx_pdb_strand_id", seqadv.chainID },
{ "seq_num", seqNum },
{ "pdbx_pdb_ins_code", seqadv.iCode == ' ' ? std::string{} : std::string{ seqadv.iCode } },
{ "pdbx_seq_db_name", seqadv.database },
{ "pdbx_seq_db_accession_code", seqadv.dbAccession },
{ "db_mon_id", seqadv.dbRes },
{ "pdbx_seq_db_seq_num", seqadv.dbSeq },
{ "details", seqadv.conflict },
{ "pdbx_auth_seq_num", seqadv.seqNum },
{ "pdbx_ordinal", ++mPdbxDifOrdinal }
});
// clang-format on
}
}
......@@ -4153,19 +4276,25 @@ void PDBFileParser::ConstructEntities()
if (std::find(mChemComp.begin(), mChemComp.end(), rs.mMonID) == mChemComp.end())
mChemComp.emplace_back(rs.mMonID);
// clang-format off
cat_ps->emplace({
{ "entity_id", mMolID2EntityID[cmp.mMolID] },
{ "num", i + 1 },
{ "mon_id", rs.mMonID },
{ "hetero", rs.mAlts.empty() ? "n" : "y" } });
{ "hetero", rs.mAlts.empty() ? "n" : "y" }
});
// clang-format on
for (auto &a : rs.mAlts)
{
// clang-format off
cat_ps->emplace({
{ "entity_id", mMolID2EntityID[cmp.mMolID] },
{ "num", i + 1 },
{ "mon_id", a },
{ "hetero", "y" } });
{ "hetero", "y" }
});
// clang-format on
}
}
}
......@@ -4176,6 +4305,7 @@ void PDBFileParser::ConstructEntities()
else if (mightBeDNA and not mightBePolyPeptide)
type = "polyribonucleotide";
// clang-format off
getCategory("entity_poly")->emplace({
{ "entity_id", mMolID2EntityID[cmp.mMolID] },
{ "pdbx_seq_one_letter_code", seq },
......@@ -4183,16 +4313,21 @@ void PDBFileParser::ConstructEntities()
{ "nstd_monomer", (nstdMonomer ? "yes" : "no") },
{ "pdbx_strand_id", cif::join(chains, ",") },
{ "nstd_linkage", nonstandardLinkage ? "yes" : "no" },
{ "type", type } });
{ "type", type }
});
// clang-format on
}
if (not(structTitle.empty() and structDescription.empty()))
{
// clang-format off
getCategory("struct")->emplace({
{ "entry_id", mStructureID },
{ "title", cif::join(structTitle, ", ") },
{ "pdbx_descriptor", cif::join(structDescription, ", ") },
{ "pdbx_model_type_details", mModelTypeDetails } });
{ "pdbx_model_type_details", mModelTypeDetails }
});
// clang-format on
}
// build sugar trees first
......@@ -4201,8 +4336,8 @@ void PDBFileParser::ConstructEntities()
// done with the sugar, resume operation as before
std::map<char, std::string> waterChains;
std::map<std::tuple<std::string, std::string>, int> ndbSeqNum; // for nonpoly scheme
std::map<std::string,int> entityAuthSeqNum; // for nonpoly scheme too
std::map<std::tuple<std::string, std::string>, int> ndbSeqNum; // for nonpoly scheme
std::map<std::string, int> entityAuthSeqNum; // for nonpoly scheme too
for (size_t i = 0; i < mHets.size(); ++i)
{
......@@ -4252,12 +4387,15 @@ void PDBFileParser::ConstructEntities()
if (hetID == mWaterHetID)
{
// clang-format off
getCategory("entity")->emplace({
{ "id", entityID },
{ "type", "water" },
{ "src_method", "nat" },
{ "pdbx_description", "water" },
{ "pdbx_number_of_molecules", hetCount[hetID] } });
{ "pdbx_number_of_molecules", hetCount[hetID] }
});
// clang-format on
}
else
{
......@@ -4268,23 +4406,30 @@ void PDBFileParser::ConstructEntities()
mHetnams[hetID] = compound->name();
}
// clang-format off
getCategory("entity")->emplace({
{ "id", entityID },
{ "type", "non-polymer" },
{ "src_method", "syn" },
{ "pdbx_description", mHetnams[hetID] },
{ "details", mHetsyns[hetID] },
{ "pdbx_number_of_molecules", hetCount[hetID] } });
{ "pdbx_number_of_molecules", hetCount[hetID] }
});
// clang-format on
}
// write a pdbx_entity_nonpoly record
std::string name = mHetnams[hetID];
if (name.empty() and hetID == mWaterHetID)
name = "water";
// clang-format off
getCategory("pdbx_entity_nonpoly")->emplace({
{ "entity_id", entityID },
{ "name", name },
{ "comp_id", hetID } });
{ "name", name },
{ "comp_id", hetID }
});
// clang-format on
}
// create an asym for this het/chain combo, if needed
......@@ -4320,6 +4465,8 @@ void PDBFileParser::ConstructEntities()
if (writtenAsyms.count(asymID) == 0)
{
writtenAsyms.insert(asymID);
// clang-format off
getCategory("struct_asym")->emplace({
{ "id", asymID },
{ "pdbx_blank_PDB_chainid_flag", het.chainID == ' ' ? "Y" : "N" },
......@@ -4327,6 +4474,8 @@ void PDBFileParser::ConstructEntities()
{ "entity_id", mHet2EntityID[hetID] },
// details
});
// clang-format on
}
}
......@@ -4338,17 +4487,20 @@ void PDBFileParser::ConstructEntities()
if (iCode.empty())
iCode = { '.' };
// clang-format off
getCategory("pdbx_nonpoly_scheme")->emplace({
{ "asym_id", asymID },
{ "entity_id", mHet2EntityID[hetID] },
{ "mon_id", hetID },
{ "ndb_seq_num", seqNr },
{ "pdb_seq_num", het.seqNum },
{ "auth_seq_num", authSeqNr }, // Yes
{ "auth_seq_num", authSeqNr }, // Yes
{ "pdb_mon_id", hetID },
{ "auth_mon_id", hetID },
{ "pdb_strand_id", std::string{ het.chainID } },
{ "pdb_ins_code", iCode } });
{ "pdb_ins_code", iCode }
});
// clang-format on
// mapping needed?
mChainSeq2AsymSeq[std::make_tuple(het.chainID, het.seqNum, het.iCode)] = std::make_tuple(asymID, seqNr, false);
......@@ -4378,6 +4530,7 @@ void PDBFileParser::ConstructEntities()
continue;
}
// clang-format off
getCategory("pdbx_struct_mod_residue")->emplace({
{ "id", modResID++ },
{ "label_asym_id", asymID },
......@@ -4390,6 +4543,7 @@ void PDBFileParser::ConstructEntities()
{ "parent_comp_id", stdRes },
{ "details", comment }
});
// clang-format on
modResSet.insert(resName);
}
......@@ -4438,6 +4592,7 @@ void PDBFileParser::ConstructEntities()
if (modResSet.count(cc))
nstd = "n";
// clang-format off
getCategory("chem_comp")->emplace({
{ "id", cc },
{ "name", name },
......@@ -4446,6 +4601,7 @@ void PDBFileParser::ConstructEntities()
{ "mon_nstd_flag", nstd },
{ "type", type }
});
// clang-format on
}
getCategory("chem_comp")->reorder_by_index();
......@@ -4477,25 +4633,9 @@ void PDBFileParser::ConstructEntities()
if (unobs.atoms.empty())
{
// clang-format off
getCategory("pdbx_unobs_or_zero_occ_residues")->emplace({
{ "id", std::to_string(++idRes) },
{ "polymer_flag", isPolymer ? "Y" : "N" },
{ "occupancy_flag", 1 },
{ "PDB_model_num", unobs.modelNr ? unobs.modelNr : 1 },
{ "auth_asym_id", std::string{ unobs.chain } },
{ "auth_comp_id", unobs.res },
{ "auth_seq_id", unobs.seq },
{ "PDB_ins_code", unobs.iCode == ' ' ? "" : std::string{ unobs.iCode } },
{ "label_asym_id", asymID },
{ "label_comp_id", compID }, // TODO: change to correct comp_id
{ "label_seq_id", seqNr > 0 ? std::to_string(seqNr) : "" } });
}
else
{
for (auto &atom : unobs.atoms)
{
getCategory("pdbx_unobs_or_zero_occ_atoms")->emplace({
{ "id", std::to_string(++idAtom) },
{ "polymer_flag", isPolymer ? "Y" : "N" },
{ "occupancy_flag", 1 },
{ "PDB_model_num", unobs.modelNr ? unobs.modelNr : 1 },
......@@ -4503,11 +4643,33 @@ void PDBFileParser::ConstructEntities()
{ "auth_comp_id", unobs.res },
{ "auth_seq_id", unobs.seq },
{ "PDB_ins_code", unobs.iCode == ' ' ? "" : std::string{ unobs.iCode } },
{ "auth_atom_id", atom },
{ "label_asym_id", asymID },
{ "label_comp_id", compID }, // TODO: change to correct comp_id
{ "label_seq_id", seqNr > 0 ? std::to_string(seqNr) : "" }
});
// clang-format on
}
else
{
for (auto &atom : unobs.atoms)
{
// clang-format off
getCategory("pdbx_unobs_or_zero_occ_atoms")->emplace({
{ "id", std::to_string(++idAtom) },
{ "polymer_flag", isPolymer ? "Y" : "N" },
{ "occupancy_flag", 1 },
{ "PDB_model_num", unobs.modelNr ? unobs.modelNr : 1 },
{ "auth_asym_id", std::string{ unobs.chain } },
{ "auth_comp_id", unobs.res },
{ "auth_seq_id", unobs.seq },
{ "PDB_ins_code", unobs.iCode == ' ' ? "" : std::string{ unobs.iCode } },
{ "auth_atom_id", atom },
{ "label_asym_id", asymID },
{ "label_comp_id", compID }, // TODO: change to correct comp_id
{ "label_seq_id", seqNr > 0 ? std::to_string(seqNr) : "" },
{ "label_atom_id", atom } });
{ "label_atom_id", atom }
});
// clang-format on
}
}
}
......@@ -4627,26 +4789,33 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr)
entityID = std::to_string(mNextEntityNr++);
mBranch2EntityID[branchName] = entityID;
// clang-format off
getCategory("entity")->emplace({
{ "id", entityID },
{ "type", "branched" },
{ "src_method", "man" },
{ "pdbx_description", branchName } });
{ "pdbx_description", branchName }
});
getCategory("pdbx_entity_branch")->emplace({
{ "entity_id", entityID },
{ "type", "oligosaccharide" } });
{ "type", "oligosaccharide" }
});
// clang-format on
int num = 0;
std::map<ATOM_REF, int> branch_list;
for (auto &s : sugarTree)
{
// clang-format off
getCategory("pdbx_entity_branch_list")->emplace({
{ "entity_id", entityID },
{ "comp_id", s.c1.resName },
{ "num", ++num },
{ "hetero", ci.size() == 1 ? "n" : "y" } });
{ "comp_id", s.c1.resName },
{ "num", ++num },
{ "hetero", ci.size() == 1 ? "n" : "y" }
});
// clang-format on
branch_list[s.c1] = num;
}
......@@ -4658,6 +4827,7 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr)
if (s.leaving_o == 0)
continue;
// clang-format off
branch_link.emplace({
{ "link_id", branch_link.size() + 1 },
{ "entity_id", entityID },
......@@ -4671,6 +4841,7 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr)
{ "leaving_atom_id_2", "HO" + std::to_string(s.leaving_o) },
{ "value_order", "sing" } /// ??
});
// clang-format on
}
}
......@@ -4682,11 +4853,14 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr)
mAsymID2EntityID[asymID] = entityID;
// clang-format off
getCategory("struct_asym")->emplace({
{ "id", asymID },
{ "pdbx_blank_PDB_chainid_flag", si->chainID == ' ' ? "Y" : "N" },
{ "pdbx_modified", "N" },
{ "entity_id", entityID } });
{ "pdbx_blank_PDB_chainid_flag", si->chainID == ' ' ? "Y" : "N" },
{ "pdbx_modified", "N" },
{ "entity_id", entityID }
});
// clang-format on
std::string iCode{ si->iCode };
cif::trim(iCode);
......@@ -4696,18 +4870,21 @@ void PDBFileParser::ConstructSugarTrees(int &asymNr)
int num = 0;
for (auto s : sugarTree)
{
// clang-format off
getCategory("pdbx_branch_scheme")->emplace({
{ "asym_id", asymID },
{ "entity_id", entityID },
{ "mon_id", s.c1.resName },
{ "num", ++num },
{ "pdb_asym_id", asymID },
{ "pdb_mon_id", s.c1.resName },
{ "pdb_seq_num", num },
{ "auth_asym_id", std::string{ s.c1.chainID } },
{ "auth_mon_id", s.next.resName },
{ "auth_seq_num", s.c1.resSeq },
{ "hetero", ci.size() == 1 ? "n" : "y" } });
{ "entity_id", entityID },
{ "mon_id", s.c1.resName },
{ "num", ++num },
{ "pdb_asym_id", asymID },
{ "pdb_mon_id", s.c1.resName },
{ "pdb_seq_num", num },
{ "auth_asym_id", std::string{ s.c1.chainID } },
{ "auth_mon_id", s.next.resName },
{ "auth_seq_num", s.c1.resSeq },
{ "hetero", ci.size() == 1 ? "n" : "y" }
});
// clang-format on
auto k = std::make_tuple(s.c1.chainID, s.c1.resSeq, s.c1.iCode);
assert(mChainSeq2AsymSeq.count(k) == 0);
......@@ -4783,6 +4960,7 @@ void PDBFileParser::ParseSecondaryStructure()
else
{
auto cat = getCategory("struct_conf");
// clang-format off
cat->emplace({
{ "conf_type_id", "HELX_P" },
{ "id", "HELX_P" + std::to_string(vI(8, 10)) },
......@@ -4805,13 +4983,14 @@ void PDBFileParser::ParseSecondaryStructure()
{ "pdbx_PDB_helix_class", vS(39, 40) },
{ "details", vS(41, 70) },
{ "pdbx_PDB_helix_length", vI(72, 76) } });
{ "pdbx_PDB_helix_length", vI(72, 76) }
});
// clang-format off
if (firstHelix)
{
cat = getCategory("struct_conf_type");
cat->emplace({
{ "id", "HELX_P" } });
cat->emplace({ { "id", "HELX_P" } });
firstHelix = false;
}
}
......@@ -4878,11 +5057,14 @@ void PDBFileParser::ParseSecondaryStructure()
if (sense != 0)
{
// clang-format off
getCategory("struct_sheet_order")->emplace({
{ "sheet_id", sheetID },
{ "range_id_1", rangeID },
{ "range_id_2", rangeID + 1 },
{ "sense", sense == -1 ? "anti-parallel" : "parallel" } });
{ "range_id_1", rangeID },
{ "range_id_2", rangeID + 1 },
{ "sense", sense == -1 ? "anti-parallel" : "parallel" }
});
// clang-format on
}
std::string begAsymID, endAsymID;
......@@ -4900,6 +5082,7 @@ void PDBFileParser::ParseSecondaryStructure()
}
else
{
// clang-format off
getCategory("struct_sheet_range")->emplace({
{ "sheet_id", sheetID },
{ "id", vI(8, 10) },
......@@ -4919,6 +5102,7 @@ void PDBFileParser::ParseSecondaryStructure()
{ "end_auth_asym_id", vS(33, 33) },
{ "end_auth_seq_id", vI(34, 37) },
});
// clang-format on
if (sense != 0 and mRec->mVlen > 34)
{
......@@ -4935,19 +5119,20 @@ void PDBFileParser::ParseSecondaryStructure()
std::cerr << "skipping unmatched pdbx_struct_sheet_hbond record\n";
}
else
// clang-format off
getCategory("pdbx_struct_sheet_hbond")->emplace({
{ "sheet_id", sheetID },
{ "range_id_1", rangeID },
{ "range_id_2", rangeID + 1 },
{ "range_1_label_atom_id", vS(57, 60) },
{ "range_1_label_comp_id", vS(61, 63) },
{ "range_1_label_asym_id", r1AsymID },
{ "range_1_label_seq_id", r1Seq },
{ "range_1_PDB_ins_code", vS(70, 70) },
{ "range_1_auth_atom_id", vS(57, 60) },
{ "range_1_auth_comp_id", vS(61, 63) },
{ "range_1_auth_asym_id", vS(65, 65) },
{ "range_1_auth_seq_id", vI(66, 69) },
{ "range_id_1", rangeID },
{ "range_id_2", rangeID + 1 },
{ "range_1_label_atom_id", vS(57, 60) },
{ "range_1_label_comp_id", vS(61, 63) },
{ "range_1_label_asym_id", r1AsymID },
{ "range_1_label_seq_id", r1Seq },
{ "range_1_PDB_ins_code", vS(70, 70) },
{ "range_1_auth_atom_id", vS(57, 60) },
{ "range_1_auth_comp_id", vS(61, 63) },
{ "range_1_auth_asym_id", vS(65, 65) },
{ "range_1_auth_seq_id", vI(66, 69) },
{ "range_2_label_atom_id", vS(42, 45) },
{ "range_2_label_comp_id", vS(46, 48) },
......@@ -4957,7 +5142,9 @@ void PDBFileParser::ParseSecondaryStructure()
{ "range_2_auth_atom_id", vS(42, 45) },
{ "range_2_auth_comp_id", vS(46, 48) },
{ "range_2_auth_asym_id", vS(50, 50) },
{ "range_2_auth_seq_id", vI(51, 54) } });
{ "range_2_auth_seq_id", vI(51, 54) }
});
// clang-format on
}
if (sense != 0)
......@@ -5060,6 +5247,7 @@ void PDBFileParser::ParseConnectivtyAnnotation()
{
for (auto a2 : alt2)
{
// clang-format off
getCategory("struct_conn")->emplace({
{ "id", "disulf" + std::to_string(++ssBondNr) },
{ "conn_type_id", "disulf" },
......@@ -5088,6 +5276,7 @@ void PDBFileParser::ParseConnectivtyAnnotation()
{ "pdbx_dist_value", vS(74, 78) },
});
// clang-format on
}
}
......@@ -5185,6 +5374,7 @@ void PDBFileParser::ParseConnectivtyAnnotation()
continue;
}
// clang-format off
getCategory("struct_conn")->emplace({
{ "id", type + std::to_string(linkNr) },
{ "conn_type_id", type },
......@@ -5219,7 +5409,9 @@ void PDBFileParser::ParseConnectivtyAnnotation()
{ "ptnr2_symmetry", sym2 },
{ "pdbx_dist_value", distance } });
{ "pdbx_dist_value", distance }
});
// clang-format on
continue;
}
......@@ -5260,25 +5452,28 @@ void PDBFileParser::ParseConnectivtyAnnotation()
std::string iCode1str = iCode1 == ' ' ? std::string() : std::string{ iCode1 };
std::string iCode2str = iCode2 == ' ' ? std::string() : std::string{ iCode2 };
// clang-format off
getCategory("struct_mon_prot_cis")->emplace({
{ "pdbx_id", serNum },
{ "label_comp_id", pep1 },
{ "label_seq_id", lResSeq1 },
{ "label_asym_id", lAsym1 },
{ "label_alt_id", "." },
{ "pdbx_PDB_ins_code", iCode1str },
{ "auth_comp_id", pep1 },
{ "auth_seq_id", seqNum1 },
{ "auth_asym_id", std::string{ chainID1 } },
{ "pdbx_label_comp_id_2", pep2 },
{ "pdbx_label_seq_id_2", lResSeq2 },
{ "pdbx_label_asym_id_2", lAsym2 },
{ "pdbx_PDB_ins_code_2", iCode2str },
{ "pdbx_auth_comp_id_2", pep2 },
{ "pdbx_auth_seq_id_2", seqNum2 },
{ "pdbx_auth_asym_id_2", std::string{ chainID2 } },
{ "pdbx_PDB_model_num", modNum },
{ "pdbx_omega_angle", measure } });
{ "label_comp_id", pep1 },
{ "label_seq_id", lResSeq1 },
{ "label_asym_id", lAsym1 },
{ "label_alt_id", "." },
{ "pdbx_PDB_ins_code", iCode1str },
{ "auth_comp_id", pep1 },
{ "auth_seq_id", seqNum1 },
{ "auth_asym_id", std::string{ chainID1 } },
{ "pdbx_label_comp_id_2", pep2 },
{ "pdbx_label_seq_id_2", lResSeq2 },
{ "pdbx_label_asym_id_2", lAsym2 },
{ "pdbx_PDB_ins_code_2", iCode2str },
{ "pdbx_auth_comp_id_2", pep2 },
{ "pdbx_auth_seq_id_2", seqNum2 },
{ "pdbx_auth_asym_id_2", std::string{ chainID2 } },
{ "pdbx_PDB_model_num", modNum },
{ "pdbx_omega_angle", measure }
});
// clang-format on
continue;
}
......@@ -5323,6 +5518,7 @@ void PDBFileParser::ParseMiscellaneousFeatures()
std::cerr << "skipping struct_site_gen record\n";
}
else
// clang-format off
cat->emplace({
{ "id", structSiteGenID++ },
{ "site_id", siteID },
......@@ -5337,6 +5533,7 @@ void PDBFileParser::ParseMiscellaneousFeatures()
{ "label_atom_id", "." },
{ "label_alt_id", "." },
});
// clang-format on
o += 11;
}
......@@ -5351,6 +5548,7 @@ void PDBFileParser::ParseCrystallographic()
{
Match("CRYST1", true);
// clang-format off
getCategory("cell")->emplace({
{ "entry_id", mStructureID }, // 1 - 6 Record name "CRYST1"
{ "length_a", vF(7, 15) }, // 7 - 15 Real(9.3) a a (Angstroms).
......@@ -5362,6 +5560,7 @@ void PDBFileParser::ParseCrystallographic()
/* goes into symmetry */ // 56 - 66 LString sGroup Space group.
{ "Z_PDB", vF(67, 70) } // 67 - 70 Integer z Z value.
});
// clang-format on
std::string spaceGroup, intTablesNr;
try
......@@ -5373,15 +5572,19 @@ void PDBFileParser::ParseCrystallographic()
{
}
// clang-format off
getCategory("symmetry")->emplace({
{ "entry_id", mStructureID },
{ "space_group_name_H-M", spaceGroup },
{ "Int_Tables_number", intTablesNr } });
{ "Int_Tables_number", intTablesNr }
});
GetNextRecord();
}
else
{
// clang-format off
// no cryst1, make a simple one, like this:
// CRYST1 1.000 1.000 1.000 90.00 90.00 90.00 P 1 1
getCategory("cell")->emplace({
......@@ -5401,6 +5604,7 @@ void PDBFileParser::ParseCrystallographic()
{ "space_group_name_H-M", "P 1" },
{ "Int_Tables_number", 1 }
});
// clang-format on
}
}
......@@ -5423,6 +5627,7 @@ void PDBFileParser::ParseCoordinateTransformation()
GetNextRecord();
}
// clang-format off
getCategory("database_PDB_matrix")->emplace({
{ "entry_id", mStructureID },
{ "origx[1][1]", m[0][0] },
......@@ -5438,6 +5643,7 @@ void PDBFileParser::ParseCoordinateTransformation()
{ "origx_vector[2]", v[1] },
{ "origx_vector[3]", v[2] },
});
// clang-format on
}
if (cif::starts_with(mRec->mName, "SCALE"))
......@@ -5455,6 +5661,7 @@ void PDBFileParser::ParseCoordinateTransformation()
GetNextRecord();
}
// clang-format off
getCategory("atom_sites")->emplace({
{ "entry_id", mStructureID },
{ "fract_transf_matrix[1][1]", m[0][0] },
......@@ -5470,6 +5677,7 @@ void PDBFileParser::ParseCoordinateTransformation()
{ "fract_transf_vector[2]", v[1] },
{ "fract_transf_vector[3]", v[2] },
});
// clang-format on
}
while (cif::starts_with(mRec->mName, "MTRIX1"))
......@@ -5491,21 +5699,24 @@ void PDBFileParser::ParseCoordinateTransformation()
GetNextRecord(); // transformations of the molecule are
} // contained in the datablock. Otherwise, blank.
// clang-format off
getCategory("struct_ncs_oper")->emplace({
{ "id", serial },
{ "matrix[1][1]", m[0][0] },
{ "matrix[1][2]", m[0][1] },
{ "matrix[1][3]", m[0][2] },
{ "matrix[2][1]", m[1][0] },
{ "matrix[2][2]", m[1][1] },
{ "matrix[2][3]", m[1][2] },
{ "matrix[3][1]", m[2][0] },
{ "matrix[3][2]", m[2][1] },
{ "matrix[3][3]", m[2][2] },
{ "vector[1]", v[0] },
{ "vector[2]", v[1] },
{ "vector[3]", v[2] },
{ "code", igiven ? "given" : "" } });
{ "matrix[1][1]", m[0][0] },
{ "matrix[1][2]", m[0][1] },
{ "matrix[1][3]", m[0][2] },
{ "matrix[2][1]", m[1][0] },
{ "matrix[2][2]", m[1][1] },
{ "matrix[2][3]", m[1][2] },
{ "matrix[3][1]", m[2][0] },
{ "matrix[3][2]", m[2][1] },
{ "matrix[3][3]", m[2][2] },
{ "vector[1]", v[0] },
{ "vector[2]", v[1] },
{ "vector[3]", v[2] },
{ "code", igiven ? "given" : "" }
});
// clang-format on
}
}
......@@ -5673,28 +5884,31 @@ void PDBFileParser::ParseCoordinate(int modelNr)
resSeq = branch_scheme.find1<int>("asym_id"_key == asymID and "auth_seq_num"_key == resSeq, "pdb_seq_num");
}
// clang-format off
getCategory("atom_site")->emplace({
{ "group_PDB", groupPDB },
{ "id", mAtomID },
{ "type_symbol", element },
{ "label_atom_id", name },
{ "label_alt_id", altLoc != ' ' ? std::string{ altLoc } : "." },
{ "label_comp_id", resName },
{ "label_asym_id", asymID },
{ "label_entity_id", entityID },
{ "label_seq_id", (isResseq and seqID > 0) ? std::to_string(seqID) : "." },
{ "pdbx_PDB_ins_code", iCode == ' ' ? "" : std::string{ iCode } },
{ "Cartn_x", x },
{ "Cartn_y", y },
{ "Cartn_z", z },
{ "occupancy", occupancy },
{ "B_iso_or_equiv", tempFactor },
{ "pdbx_formal_charge", charge },
{ "auth_seq_id", resSeq },
{ "auth_comp_id", resName },
{ "auth_asym_id", std::string{ chainID } },
{ "auth_atom_id", name },
{ "pdbx_PDB_model_num", modelNr } });
{ "id", mAtomID },
{ "type_symbol", element },
{ "label_atom_id", name },
{ "label_alt_id", altLoc != ' ' ? std::string{ altLoc } : "." },
{ "label_comp_id", resName },
{ "label_asym_id", asymID },
{ "label_entity_id", entityID },
{ "label_seq_id", (isResseq and seqID > 0) ? std::to_string(seqID) : "." },
{ "pdbx_PDB_ins_code", iCode == ' ' ? "" : std::string{ iCode } },
{ "Cartn_x", x },
{ "Cartn_y", y },
{ "Cartn_z", z },
{ "occupancy", occupancy },
{ "B_iso_or_equiv", tempFactor },
{ "pdbx_formal_charge", charge },
{ "auth_seq_id", resSeq },
{ "auth_comp_id", resName },
{ "auth_asym_id", std::string{ chainID } },
{ "auth_atom_id", name },
{ "pdbx_PDB_model_num", modelNr }
});
// clang-format on
InsertAtomType(element);
......@@ -5714,26 +5928,31 @@ void PDBFileParser::ParseCoordinate(int modelNr)
throw std::runtime_error("ANISOU record should follow corresponding ATOM record");
auto f = [](float f) -> std::string
{ return cif::format("%6.4f", f).str(); };
{
return cif::format("%6.4f", f).str();
};
// clang-format off
getCategory("atom_site_anisotrop")->emplace({
{ "id", mAtomID },
{ "type_symbol", element },
{ "pdbx_label_atom_id", name },
{ "pdbx_label_alt_id", altLoc != ' ' ? std::string{ altLoc } : "." },
{ "pdbx_label_comp_id", resName },
{ "pdbx_label_asym_id", asymID },
{ "pdbx_label_seq_id", (isResseq and seqID > 0) ? std::to_string(seqID) : "." },
{ "U[1][1]", f(u11 / 10000.f) },
{ "U[2][2]", f(u22 / 10000.f) },
{ "U[3][3]", f(u33 / 10000.f) },
{ "U[1][2]", f(u12 / 10000.f) },
{ "U[1][3]", f(u13 / 10000.f) },
{ "U[2][3]", f(u23 / 10000.f) },
{ "pdbx_auth_seq_id", resSeq },
{ "pdbx_auth_comp_id", resName },
{ "pdbx_auth_asym_id", std::string{ chainID } },
{ "pdbx_auth_atom_id", name } });
{ "type_symbol", element },
{ "pdbx_label_atom_id", name },
{ "pdbx_label_alt_id", altLoc != ' ' ? std::string{ altLoc } : "." },
{ "pdbx_label_comp_id", resName },
{ "pdbx_label_asym_id", asymID },
{ "pdbx_label_seq_id", (isResseq and seqID > 0) ? std::to_string(seqID) : "." },
{ "U[1][1]", f(u11 / 10000.f) },
{ "U[2][2]", f(u22 / 10000.f) },
{ "U[3][3]", f(u33 / 10000.f) },
{ "U[1][2]", f(u12 / 10000.f) },
{ "U[1][3]", f(u13 / 10000.f) },
{ "U[2][3]", f(u23 / 10000.f) },
{ "pdbx_auth_seq_id", resSeq },
{ "pdbx_auth_comp_id", resName },
{ "pdbx_auth_asym_id", std::string{ chainID } },
{ "pdbx_auth_atom_id", name }
});
// clang-format on
}
}
......@@ -5812,8 +6031,7 @@ void PDBFileParser::Parse(std::istream &is, cif::file &result)
throw std::runtime_error("Either the PDB file has no atom records, or the field " + std::string(mRec->mName) + " is not at the correct location");
for (auto e : mAtomTypes)
getCategory("atom_type")->emplace({
{ "symbol", e } });
getCategory("atom_type")->emplace({ { "symbol", e } });
// in V5, atom_type is sorted
getCategory("atom_type")->reorder_by_index();
......@@ -5833,8 +6051,7 @@ void PDBFileParser::Parse(std::istream &is, cif::file &result)
auto exptl = getCategory("exptl");
if (exptl->empty())
{
exptl->emplace({
{ "entry_id", mStructureID },
exptl->emplace({ { "entry_id", mStructureID },
{ "method", mExpMethod },
{ "crystals_number", mRemark200["NUMBER OF CRYSTALS USED"] } });
}
......@@ -5881,7 +6098,7 @@ void PDBFileParser::Parse(std::istream &is, cif::file &result)
{
const auto &[asym1, seq1, atom1, symm1, asym2, seq2, atom2, symm2] = r.get<std::string, std::string, std::string, std::string, std::string, std::string, std::string, std::string>(
"ptnr1_label_asym_id", "ptnr1_label_seq_id", "ptnr1_label_atom_id", "ptnr1_symmetry",
"ptnr2_label_asym_id", "ptnr2_label_seq_id", "ptnr2_label_atom_id", "ptnr2_symmetry");
"ptnr2_label_asym_id", "ptnr2_label_seq_id", "ptnr2_label_atom_id", "ptnr2_symmetry");
float distance = 1.0f;
......@@ -5900,8 +6117,7 @@ void PDBFileParser::Parse(std::istream &is, cif::file &result)
distance = std::sqrt(
(x1 - x2) * (x1 - x2) +
(y1 - y2) * (y1 - y2) +
(z1 - z2) * (z1 - z2)
);
(z1 - z2) * (z1 - z2));
else if (cif::VERBOSE > 0)
std::cerr << "Cannot calculate distance for link since one of the atoms is in another dimension\n";
}
......@@ -6279,7 +6495,7 @@ file read(const std::filesystem::path &file)
gzio::ifstream in(file);
if (not in.is_open())
throw std::runtime_error("Could not open file " + file.string() + " for input");
return read(in);
}
catch (const std::exception &ex)
......@@ -6288,4 +6504,4 @@ file read(const std::filesystem::path &file)
}
}
} // namespace pdbx
} // namespace cif::pdb
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment