Commit 12ee4a79 by Maarten L. Hekkelman

pdb2cif work

parent e5975038
......@@ -11,4 +11,5 @@ Testing/
include/cif++/exports.hpp
docs/api
docs/conf.py
build_ci/
\ No newline at end of file
build_ci/
data/components.cif
......@@ -1123,9 +1123,6 @@ void PDBFileParser::PreParseInput(std::istream &is)
if (lookahead.back() == '\r')
lookahead.pop_back();
// if (cif::starts_with(lookahead, "HEADER") == false)
// throw std::runtime_error("This does not look like a PDB file, should start with a HEADER line");
auto contNr = [&lookahead](int offset, int len) -> int
{
std::string cs = lookahead.substr(offset, len);
......@@ -1558,52 +1555,54 @@ void PDBFileParser::ParseTitle()
// 11 - 80 Specification compound Description of the molecular components.
// list
std::string value{ mRec->vS(11) };
if (value.find(':') == std::string::npos)
{
// special case for dumb, stripped files
auto &comp = GetOrCreateCompound(1);
comp.mInfo["MOLECULE"] = value;
}
else
if (mRec->is("COMPND"))
{
SpecificationListParser p(value);
for (;;)
std::string value{ mRec->vS(11) };
if (value.find(':') == std::string::npos)
{
std::string key, val;
std::tie(key, val) = p.GetNextSpecification();
if (key.empty())
break;
// special case for dumb, stripped files
auto &comp = GetOrCreateCompound(1);
comp.mInfo["MOLECULE"] = value;
}
else
{
SpecificationListParser p(value);
if (not iequals(key, "MOL_ID") and mCompounds.empty())
for (;;)
{
if (cif::VERBOSE > 0)
std::cerr << "Ignoring invalid COMPND record\n";
break;
}
std::string key, val;
std::tie(key, val) = p.GetNextSpecification();
if (key == "MOL_ID")
{
auto &comp = GetOrCreateCompound(stoi(val));
comp.mTitle = title;
}
else if (key == "CHAIN")
{
for (auto c : cif::split<std::string>(val, ","))
if (key.empty())
break;
if (not iequals(key, "MOL_ID") and mCompounds.empty())
{
cif::trim(c);
mCompounds.back().mChains.insert(c[0]);
if (cif::VERBOSE > 0)
std::cerr << "Ignoring invalid COMPND record\n";
break;
}
if (key == "MOL_ID")
{
auto &comp = GetOrCreateCompound(stoi(val));
comp.mTitle = title;
}
else if (key == "CHAIN")
{
for (auto c : cif::split<std::string>(val, ","))
{
cif::trim(c);
mCompounds.back().mChains.insert(c[0]);
}
}
else
mCompounds.back().mInfo[key] = val;
}
else
mCompounds.back().mInfo[key] = val;
}
}
if (mRec->is("COMPND"))
GetNextRecord();
}
// SOURCE
Match("SOURCE", false);
......@@ -1740,7 +1739,7 @@ void PDBFileParser::ParseTitle()
int n = 1;
cat = getCategory("audit_author");
value = { mRec->vS(11) };
std::string value = { mRec->vS(11) };
for (auto author : cif::split<std::string>(value, ",", true))
{
// clang-format off
......@@ -4556,7 +4555,7 @@ void PDBFileParser::ConstructEntities()
std::string formula;
std::string type;
std::string nstd = ".";
std::string formulaWeight;
std::optional<float> formulaWeight;
if (compound != nullptr)
{
......@@ -4567,7 +4566,7 @@ void PDBFileParser::ConstructEntities()
nstd = "y";
formula = compound->formula();
formulaWeight = std::to_string(compound->formula_weight());
formulaWeight = compound->formula_weight();
}
if (name.empty())
......@@ -4594,7 +4593,7 @@ void PDBFileParser::ConstructEntities()
{ "id", cc },
{ "name", name },
{ "formula", formula },
{ "formula_weight", formulaWeight },
{ "formula_weight", formulaWeight, 3 },
{ "mon_nstd_flag", nstd },
{ "type", type }
});
......@@ -4709,7 +4708,7 @@ void PDBFileParser::ConstructEntities()
}
if (formula_weight > 0)
entity["formula_weight"] = formula_weight;
entity.assign({ { "formula_weight", formula_weight, 3 } });
}
}
......@@ -5578,31 +5577,6 @@ void PDBFileParser::ParseCrystallographic()
GetNextRecord();
}
else
{
// clang-format off
// no cryst1, make a simple one, like this:
// CRYST1 1.000 1.000 1.000 90.00 90.00 90.00 P 1 1
getCategory("cell")->emplace({
{ "entry_id", mStructureID }, // 1 - 6 Record name "CRYST1"
{ "length_a", 1 }, // 7 - 15 Real(9.3) a a (Angstroms).
{ "length_b", 1 }, // 16 - 24 Real(9.3) b b (Angstroms).
{ "length_c", 1 }, // 25 - 33 Real(9.3) c c (Angstroms).
{ "angle_alpha", 90 }, // 34 - 40 Real(7.2) alpha alpha (degrees).
{ "angle_beta", 90 }, // 41 - 47 Real(7.2) beta beta (degrees).
{ "angle_gamma", 90 }, // 48 - 54 Real(7.2) gamma gamma (degrees).
/* goes into symmetry */ // 56 - 66 LString sGroup Space group.
{ "Z_PDB", 1 } // 67 - 70 Integer z Z value.
});
getCategory("symmetry")->emplace({
{ "entry_id", mStructureID },
{ "space_group_name_H-M", "P 1" },
{ "Int_Tables_number", 1 }
});
// clang-format on
}
}
void PDBFileParser::ParseCoordinateTransformation()
......@@ -6463,7 +6437,12 @@ file read(std::istream &is)
// and so the very first character in a valid PDB file
// is 'H'. It is as simple as that.
if (ch == 'h' or ch == 'H')
// Well, not quite, Unfortunately... People insisted that
// having only ATOM records also makes up a valid PDB file...
// Since mmCIF files cannot validly start with a letter character
// the test has changed into the following:
if (std::isalpha(ch))
read_pdb_file(is, result);
else
{
......
......@@ -491,12 +491,13 @@ void checkAtomAnisotropRecords(datablock &db)
auto &atom_site = db["atom_site"];
auto &atom_site_anisotrop = db["atom_site_anisotrop"];
auto m_validator = db.get_validator();
if (not m_validator)
return;
// auto m_validator = db.get_validator();
// if (not m_validator)
// return;
std::vector<row_handle> to_be_deleted;
bool warnReplaceTypeSymbol = true;
for (auto row : atom_site_anisotrop)
{
auto parents = atom_site_anisotrop.get_parents(row, atom_site);
......@@ -512,6 +513,12 @@ void checkAtomAnisotropRecords(datablock &db)
if (row["type_symbol"].empty())
row["type_symbol"] = parent["type_symbol"].text();
else if (row["type_symbol"].text() != parent["type_symbol"].text())
{
if (cif::VERBOSE and std::exchange(warnReplaceTypeSymbol, false))
std::clog << "Replacing type_symbol in atom_site_anisotrop record(s)\n";
row["type_symbol"] != parent["type_symbol"].text();
}
if (row["pdbx_auth_alt_id"].empty())
row["pdbx_auth_alt_id"] = parent["pdbx_auth_alt_id"].text();
......@@ -1019,9 +1026,6 @@ bool reconstruct_pdbx(file &file, std::string_view dictionary)
// Now see if atom records make sense at all
checkAtomRecords(db);
if (db.get("atom_site_anisotrop"))
checkAtomAnisotropRecords(db);
std::vector<std::string> invalidCategories;
// clean up each category
......@@ -1244,6 +1248,9 @@ bool reconstruct_pdbx(file &file, std::string_view dictionary)
file.load_dictionary(dictionary);
if (db.get("atom_site_anisotrop"))
checkAtomAnisotropRecords(db);
// Now create any missing categories
// Next make sure we have struct_asym records
if (db.get("struct_asym") == nullptr)
......
......@@ -41,12 +41,24 @@ TEST_CASE("reconstruct")
{
std::cout << i->path() << '\n';
cif::file f(i->path());
if (i->path().extension() == ".pdb")
{
cif::file f = cif::pdb::read(i->path());
std::error_code ec;
CHECK_FALSE(cif::pdb::is_valid_pdbx_file(f, ec));
CHECK(ec != std::errc{});
std::error_code ec;
CHECK(cif::pdb::reconstruct_pdbx(f));
if (not cif::pdb::is_valid_pdbx_file(f, ec))
CHECK(cif::pdb::reconstruct_pdbx(f));
}
else
{
cif::file f(i->path());
std::error_code ec;
CHECK_FALSE(cif::pdb::is_valid_pdbx_file(f, ec));
CHECK(ec != std::errc{});
CHECK(cif::pdb::reconstruct_pdbx(f));
}
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment