Commit 12ee4a79 by Maarten L. Hekkelman

pdb2cif work

parent e5975038
...@@ -11,4 +11,5 @@ Testing/ ...@@ -11,4 +11,5 @@ Testing/
include/cif++/exports.hpp include/cif++/exports.hpp
docs/api docs/api
docs/conf.py docs/conf.py
build_ci/ build_ci/
\ No newline at end of file data/components.cif
...@@ -1123,9 +1123,6 @@ void PDBFileParser::PreParseInput(std::istream &is) ...@@ -1123,9 +1123,6 @@ void PDBFileParser::PreParseInput(std::istream &is)
if (lookahead.back() == '\r') if (lookahead.back() == '\r')
lookahead.pop_back(); lookahead.pop_back();
// if (cif::starts_with(lookahead, "HEADER") == false)
// throw std::runtime_error("This does not look like a PDB file, should start with a HEADER line");
auto contNr = [&lookahead](int offset, int len) -> int auto contNr = [&lookahead](int offset, int len) -> int
{ {
std::string cs = lookahead.substr(offset, len); std::string cs = lookahead.substr(offset, len);
...@@ -1558,52 +1555,54 @@ void PDBFileParser::ParseTitle() ...@@ -1558,52 +1555,54 @@ void PDBFileParser::ParseTitle()
// 11 - 80 Specification compound Description of the molecular components. // 11 - 80 Specification compound Description of the molecular components.
// list // list
std::string value{ mRec->vS(11) }; if (mRec->is("COMPND"))
if (value.find(':') == std::string::npos)
{
// special case for dumb, stripped files
auto &comp = GetOrCreateCompound(1);
comp.mInfo["MOLECULE"] = value;
}
else
{ {
SpecificationListParser p(value); std::string value{ mRec->vS(11) };
if (value.find(':') == std::string::npos)
for (;;)
{ {
std::string key, val; // special case for dumb, stripped files
std::tie(key, val) = p.GetNextSpecification(); auto &comp = GetOrCreateCompound(1);
comp.mInfo["MOLECULE"] = value;
if (key.empty()) }
break; else
{
SpecificationListParser p(value);
if (not iequals(key, "MOL_ID") and mCompounds.empty()) for (;;)
{ {
if (cif::VERBOSE > 0) std::string key, val;
std::cerr << "Ignoring invalid COMPND record\n"; std::tie(key, val) = p.GetNextSpecification();
break;
}
if (key == "MOL_ID") if (key.empty())
{ break;
auto &comp = GetOrCreateCompound(stoi(val));
comp.mTitle = title; if (not iequals(key, "MOL_ID") and mCompounds.empty())
}
else if (key == "CHAIN")
{
for (auto c : cif::split<std::string>(val, ","))
{ {
cif::trim(c); if (cif::VERBOSE > 0)
mCompounds.back().mChains.insert(c[0]); std::cerr << "Ignoring invalid COMPND record\n";
break;
} }
if (key == "MOL_ID")
{
auto &comp = GetOrCreateCompound(stoi(val));
comp.mTitle = title;
}
else if (key == "CHAIN")
{
for (auto c : cif::split<std::string>(val, ","))
{
cif::trim(c);
mCompounds.back().mChains.insert(c[0]);
}
}
else
mCompounds.back().mInfo[key] = val;
} }
else
mCompounds.back().mInfo[key] = val;
} }
}
if (mRec->is("COMPND"))
GetNextRecord(); GetNextRecord();
}
// SOURCE // SOURCE
Match("SOURCE", false); Match("SOURCE", false);
...@@ -1740,7 +1739,7 @@ void PDBFileParser::ParseTitle() ...@@ -1740,7 +1739,7 @@ void PDBFileParser::ParseTitle()
int n = 1; int n = 1;
cat = getCategory("audit_author"); cat = getCategory("audit_author");
value = { mRec->vS(11) }; std::string value = { mRec->vS(11) };
for (auto author : cif::split<std::string>(value, ",", true)) for (auto author : cif::split<std::string>(value, ",", true))
{ {
// clang-format off // clang-format off
...@@ -4556,7 +4555,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4556,7 +4555,7 @@ void PDBFileParser::ConstructEntities()
std::string formula; std::string formula;
std::string type; std::string type;
std::string nstd = "."; std::string nstd = ".";
std::string formulaWeight; std::optional<float> formulaWeight;
if (compound != nullptr) if (compound != nullptr)
{ {
...@@ -4567,7 +4566,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4567,7 +4566,7 @@ void PDBFileParser::ConstructEntities()
nstd = "y"; nstd = "y";
formula = compound->formula(); formula = compound->formula();
formulaWeight = std::to_string(compound->formula_weight()); formulaWeight = compound->formula_weight();
} }
if (name.empty()) if (name.empty())
...@@ -4594,7 +4593,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4594,7 +4593,7 @@ void PDBFileParser::ConstructEntities()
{ "id", cc }, { "id", cc },
{ "name", name }, { "name", name },
{ "formula", formula }, { "formula", formula },
{ "formula_weight", formulaWeight }, { "formula_weight", formulaWeight, 3 },
{ "mon_nstd_flag", nstd }, { "mon_nstd_flag", nstd },
{ "type", type } { "type", type }
}); });
...@@ -4709,7 +4708,7 @@ void PDBFileParser::ConstructEntities() ...@@ -4709,7 +4708,7 @@ void PDBFileParser::ConstructEntities()
} }
if (formula_weight > 0) if (formula_weight > 0)
entity["formula_weight"] = formula_weight; entity.assign({ { "formula_weight", formula_weight, 3 } });
} }
} }
...@@ -5578,31 +5577,6 @@ void PDBFileParser::ParseCrystallographic() ...@@ -5578,31 +5577,6 @@ void PDBFileParser::ParseCrystallographic()
GetNextRecord(); GetNextRecord();
} }
else
{
// clang-format off
// no cryst1, make a simple one, like this:
// CRYST1 1.000 1.000 1.000 90.00 90.00 90.00 P 1 1
getCategory("cell")->emplace({
{ "entry_id", mStructureID }, // 1 - 6 Record name "CRYST1"
{ "length_a", 1 }, // 7 - 15 Real(9.3) a a (Angstroms).
{ "length_b", 1 }, // 16 - 24 Real(9.3) b b (Angstroms).
{ "length_c", 1 }, // 25 - 33 Real(9.3) c c (Angstroms).
{ "angle_alpha", 90 }, // 34 - 40 Real(7.2) alpha alpha (degrees).
{ "angle_beta", 90 }, // 41 - 47 Real(7.2) beta beta (degrees).
{ "angle_gamma", 90 }, // 48 - 54 Real(7.2) gamma gamma (degrees).
/* goes into symmetry */ // 56 - 66 LString sGroup Space group.
{ "Z_PDB", 1 } // 67 - 70 Integer z Z value.
});
getCategory("symmetry")->emplace({
{ "entry_id", mStructureID },
{ "space_group_name_H-M", "P 1" },
{ "Int_Tables_number", 1 }
});
// clang-format on
}
} }
void PDBFileParser::ParseCoordinateTransformation() void PDBFileParser::ParseCoordinateTransformation()
...@@ -6463,7 +6437,12 @@ file read(std::istream &is) ...@@ -6463,7 +6437,12 @@ file read(std::istream &is)
// and so the very first character in a valid PDB file // and so the very first character in a valid PDB file
// is 'H'. It is as simple as that. // is 'H'. It is as simple as that.
if (ch == 'h' or ch == 'H') // Well, not quite, Unfortunately... People insisted that
// having only ATOM records also makes up a valid PDB file...
// Since mmCIF files cannot validly start with a letter character
// the test has changed into the following:
if (std::isalpha(ch))
read_pdb_file(is, result); read_pdb_file(is, result);
else else
{ {
......
...@@ -491,12 +491,13 @@ void checkAtomAnisotropRecords(datablock &db) ...@@ -491,12 +491,13 @@ void checkAtomAnisotropRecords(datablock &db)
auto &atom_site = db["atom_site"]; auto &atom_site = db["atom_site"];
auto &atom_site_anisotrop = db["atom_site_anisotrop"]; auto &atom_site_anisotrop = db["atom_site_anisotrop"];
auto m_validator = db.get_validator(); // auto m_validator = db.get_validator();
if (not m_validator) // if (not m_validator)
return; // return;
std::vector<row_handle> to_be_deleted; std::vector<row_handle> to_be_deleted;
bool warnReplaceTypeSymbol = true;
for (auto row : atom_site_anisotrop) for (auto row : atom_site_anisotrop)
{ {
auto parents = atom_site_anisotrop.get_parents(row, atom_site); auto parents = atom_site_anisotrop.get_parents(row, atom_site);
...@@ -512,6 +513,12 @@ void checkAtomAnisotropRecords(datablock &db) ...@@ -512,6 +513,12 @@ void checkAtomAnisotropRecords(datablock &db)
if (row["type_symbol"].empty()) if (row["type_symbol"].empty())
row["type_symbol"] = parent["type_symbol"].text(); row["type_symbol"] = parent["type_symbol"].text();
else if (row["type_symbol"].text() != parent["type_symbol"].text())
{
if (cif::VERBOSE and std::exchange(warnReplaceTypeSymbol, false))
std::clog << "Replacing type_symbol in atom_site_anisotrop record(s)\n";
row["type_symbol"] != parent["type_symbol"].text();
}
if (row["pdbx_auth_alt_id"].empty()) if (row["pdbx_auth_alt_id"].empty())
row["pdbx_auth_alt_id"] = parent["pdbx_auth_alt_id"].text(); row["pdbx_auth_alt_id"] = parent["pdbx_auth_alt_id"].text();
...@@ -1019,9 +1026,6 @@ bool reconstruct_pdbx(file &file, std::string_view dictionary) ...@@ -1019,9 +1026,6 @@ bool reconstruct_pdbx(file &file, std::string_view dictionary)
// Now see if atom records make sense at all // Now see if atom records make sense at all
checkAtomRecords(db); checkAtomRecords(db);
if (db.get("atom_site_anisotrop"))
checkAtomAnisotropRecords(db);
std::vector<std::string> invalidCategories; std::vector<std::string> invalidCategories;
// clean up each category // clean up each category
...@@ -1244,6 +1248,9 @@ bool reconstruct_pdbx(file &file, std::string_view dictionary) ...@@ -1244,6 +1248,9 @@ bool reconstruct_pdbx(file &file, std::string_view dictionary)
file.load_dictionary(dictionary); file.load_dictionary(dictionary);
if (db.get("atom_site_anisotrop"))
checkAtomAnisotropRecords(db);
// Now create any missing categories // Now create any missing categories
// Next make sure we have struct_asym records // Next make sure we have struct_asym records
if (db.get("struct_asym") == nullptr) if (db.get("struct_asym") == nullptr)
......
...@@ -41,12 +41,24 @@ TEST_CASE("reconstruct") ...@@ -41,12 +41,24 @@ TEST_CASE("reconstruct")
{ {
std::cout << i->path() << '\n'; std::cout << i->path() << '\n';
cif::file f(i->path()); if (i->path().extension() == ".pdb")
{
cif::file f = cif::pdb::read(i->path());
std::error_code ec; std::error_code ec;
CHECK_FALSE(cif::pdb::is_valid_pdbx_file(f, ec));
CHECK(ec != std::errc{});
CHECK(cif::pdb::reconstruct_pdbx(f)); if (not cif::pdb::is_valid_pdbx_file(f, ec))
CHECK(cif::pdb::reconstruct_pdbx(f));
}
else
{
cif::file f(i->path());
std::error_code ec;
CHECK_FALSE(cif::pdb::is_valid_pdbx_file(f, ec));
CHECK(ec != std::errc{});
CHECK(cif::pdb::reconstruct_pdbx(f));
}
} }
} }
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment