Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
libcifpp
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
open
libcifpp
Commits
12ee4a79
Commit
12ee4a79
authored
Jan 29, 2024
by
Maarten L. Hekkelman
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
pdb2cif work
parent
e5975038
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
81 additions
and
82 deletions
+81
-82
.gitignore
+2
-2
src/pdb/pdb2cif.cpp
+48
-69
src/pdb/reconstruct.cpp
+13
-6
test/reconstruction-test.cpp
+18
-5
No files found.
.gitignore
View file @
12ee4a79
...
@@ -11,4 +11,5 @@ Testing/
...
@@ -11,4 +11,5 @@ Testing/
include/cif++/exports.hpp
include/cif++/exports.hpp
docs/api
docs/api
docs/conf.py
docs/conf.py
build_ci/
build_ci/
\ No newline at end of file
data/components.cif
src/pdb/pdb2cif.cpp
View file @
12ee4a79
...
@@ -1123,9 +1123,6 @@ void PDBFileParser::PreParseInput(std::istream &is)
...
@@ -1123,9 +1123,6 @@ void PDBFileParser::PreParseInput(std::istream &is)
if
(
lookahead
.
back
()
==
'\r'
)
if
(
lookahead
.
back
()
==
'\r'
)
lookahead
.
pop_back
();
lookahead
.
pop_back
();
// if (cif::starts_with(lookahead, "HEADER") == false)
// throw std::runtime_error("This does not look like a PDB file, should start with a HEADER line");
auto
contNr
=
[
&
lookahead
](
int
offset
,
int
len
)
->
int
auto
contNr
=
[
&
lookahead
](
int
offset
,
int
len
)
->
int
{
{
std
::
string
cs
=
lookahead
.
substr
(
offset
,
len
);
std
::
string
cs
=
lookahead
.
substr
(
offset
,
len
);
...
@@ -1558,52 +1555,54 @@ void PDBFileParser::ParseTitle()
...
@@ -1558,52 +1555,54 @@ void PDBFileParser::ParseTitle()
// 11 - 80 Specification compound Description of the molecular components.
// 11 - 80 Specification compound Description of the molecular components.
// list
// list
std
::
string
value
{
mRec
->
vS
(
11
)
};
if
(
mRec
->
is
(
"COMPND"
))
if
(
value
.
find
(
':'
)
==
std
::
string
::
npos
)
{
// special case for dumb, stripped files
auto
&
comp
=
GetOrCreateCompound
(
1
);
comp
.
mInfo
[
"MOLECULE"
]
=
value
;
}
else
{
{
SpecificationListParser
p
(
value
);
std
::
string
value
{
mRec
->
vS
(
11
)
};
if
(
value
.
find
(
':'
)
==
std
::
string
::
npos
)
for
(;;)
{
{
std
::
string
key
,
val
;
// special case for dumb, stripped files
std
::
tie
(
key
,
val
)
=
p
.
GetNextSpecification
();
auto
&
comp
=
GetOrCreateCompound
(
1
);
comp
.
mInfo
[
"MOLECULE"
]
=
value
;
if
(
key
.
empty
())
}
break
;
else
{
SpecificationListParser
p
(
value
);
if
(
not
iequals
(
key
,
"MOL_ID"
)
and
mCompounds
.
empty
()
)
for
(;;
)
{
{
if
(
cif
::
VERBOSE
>
0
)
std
::
string
key
,
val
;
std
::
cerr
<<
"Ignoring invalid COMPND record
\n
"
;
std
::
tie
(
key
,
val
)
=
p
.
GetNextSpecification
();
break
;
}
if
(
key
==
"MOL_ID"
)
if
(
key
.
empty
())
{
break
;
auto
&
comp
=
GetOrCreateCompound
(
stoi
(
val
));
comp
.
mTitle
=
title
;
if
(
not
iequals
(
key
,
"MOL_ID"
)
and
mCompounds
.
empty
())
}
else
if
(
key
==
"CHAIN"
)
{
for
(
auto
c
:
cif
::
split
<
std
::
string
>
(
val
,
","
))
{
{
cif
::
trim
(
c
);
if
(
cif
::
VERBOSE
>
0
)
mCompounds
.
back
().
mChains
.
insert
(
c
[
0
]);
std
::
cerr
<<
"Ignoring invalid COMPND record
\n
"
;
break
;
}
}
if
(
key
==
"MOL_ID"
)
{
auto
&
comp
=
GetOrCreateCompound
(
stoi
(
val
));
comp
.
mTitle
=
title
;
}
else
if
(
key
==
"CHAIN"
)
{
for
(
auto
c
:
cif
::
split
<
std
::
string
>
(
val
,
","
))
{
cif
::
trim
(
c
);
mCompounds
.
back
().
mChains
.
insert
(
c
[
0
]);
}
}
else
mCompounds
.
back
().
mInfo
[
key
]
=
val
;
}
}
else
mCompounds
.
back
().
mInfo
[
key
]
=
val
;
}
}
}
if
(
mRec
->
is
(
"COMPND"
))
GetNextRecord
();
GetNextRecord
();
}
// SOURCE
// SOURCE
Match
(
"SOURCE"
,
false
);
Match
(
"SOURCE"
,
false
);
...
@@ -1740,7 +1739,7 @@ void PDBFileParser::ParseTitle()
...
@@ -1740,7 +1739,7 @@ void PDBFileParser::ParseTitle()
int
n
=
1
;
int
n
=
1
;
cat
=
getCategory
(
"audit_author"
);
cat
=
getCategory
(
"audit_author"
);
value
=
{
mRec
->
vS
(
11
)
};
std
::
string
value
=
{
mRec
->
vS
(
11
)
};
for
(
auto
author
:
cif
::
split
<
std
::
string
>
(
value
,
","
,
true
))
for
(
auto
author
:
cif
::
split
<
std
::
string
>
(
value
,
","
,
true
))
{
{
// clang-format off
// clang-format off
...
@@ -4556,7 +4555,7 @@ void PDBFileParser::ConstructEntities()
...
@@ -4556,7 +4555,7 @@ void PDBFileParser::ConstructEntities()
std
::
string
formula
;
std
::
string
formula
;
std
::
string
type
;
std
::
string
type
;
std
::
string
nstd
=
"."
;
std
::
string
nstd
=
"."
;
std
::
string
formulaWeight
;
std
::
optional
<
float
>
formulaWeight
;
if
(
compound
!=
nullptr
)
if
(
compound
!=
nullptr
)
{
{
...
@@ -4567,7 +4566,7 @@ void PDBFileParser::ConstructEntities()
...
@@ -4567,7 +4566,7 @@ void PDBFileParser::ConstructEntities()
nstd
=
"y"
;
nstd
=
"y"
;
formula
=
compound
->
formula
();
formula
=
compound
->
formula
();
formulaWeight
=
std
::
to_string
(
compound
->
formula_weight
()
);
formulaWeight
=
compound
->
formula_weight
(
);
}
}
if
(
name
.
empty
())
if
(
name
.
empty
())
...
@@ -4594,7 +4593,7 @@ void PDBFileParser::ConstructEntities()
...
@@ -4594,7 +4593,7 @@ void PDBFileParser::ConstructEntities()
{
"id"
,
cc
},
{
"id"
,
cc
},
{
"name"
,
name
},
{
"name"
,
name
},
{
"formula"
,
formula
},
{
"formula"
,
formula
},
{
"formula_weight"
,
formulaWeight
},
{
"formula_weight"
,
formulaWeight
,
3
},
{
"mon_nstd_flag"
,
nstd
},
{
"mon_nstd_flag"
,
nstd
},
{
"type"
,
type
}
{
"type"
,
type
}
});
});
...
@@ -4709,7 +4708,7 @@ void PDBFileParser::ConstructEntities()
...
@@ -4709,7 +4708,7 @@ void PDBFileParser::ConstructEntities()
}
}
if
(
formula_weight
>
0
)
if
(
formula_weight
>
0
)
entity
[
"formula_weight"
]
=
formula_weight
;
entity
.
assign
({
{
"formula_weight"
,
formula_weight
,
3
}
})
;
}
}
}
}
...
@@ -5578,31 +5577,6 @@ void PDBFileParser::ParseCrystallographic()
...
@@ -5578,31 +5577,6 @@ void PDBFileParser::ParseCrystallographic()
GetNextRecord
();
GetNextRecord
();
}
}
else
{
// clang-format off
// no cryst1, make a simple one, like this:
// CRYST1 1.000 1.000 1.000 90.00 90.00 90.00 P 1 1
getCategory
(
"cell"
)
->
emplace
({
{
"entry_id"
,
mStructureID
},
// 1 - 6 Record name "CRYST1"
{
"length_a"
,
1
},
// 7 - 15 Real(9.3) a a (Angstroms).
{
"length_b"
,
1
},
// 16 - 24 Real(9.3) b b (Angstroms).
{
"length_c"
,
1
},
// 25 - 33 Real(9.3) c c (Angstroms).
{
"angle_alpha"
,
90
},
// 34 - 40 Real(7.2) alpha alpha (degrees).
{
"angle_beta"
,
90
},
// 41 - 47 Real(7.2) beta beta (degrees).
{
"angle_gamma"
,
90
},
// 48 - 54 Real(7.2) gamma gamma (degrees).
/* goes into symmetry */
// 56 - 66 LString sGroup Space group.
{
"Z_PDB"
,
1
}
// 67 - 70 Integer z Z value.
});
getCategory
(
"symmetry"
)
->
emplace
({
{
"entry_id"
,
mStructureID
},
{
"space_group_name_H-M"
,
"P 1"
},
{
"Int_Tables_number"
,
1
}
});
// clang-format on
}
}
}
void
PDBFileParser
::
ParseCoordinateTransformation
()
void
PDBFileParser
::
ParseCoordinateTransformation
()
...
@@ -6463,7 +6437,12 @@ file read(std::istream &is)
...
@@ -6463,7 +6437,12 @@ file read(std::istream &is)
// and so the very first character in a valid PDB file
// and so the very first character in a valid PDB file
// is 'H'. It is as simple as that.
// is 'H'. It is as simple as that.
if
(
ch
==
'h'
or
ch
==
'H'
)
// Well, not quite, Unfortunately... People insisted that
// having only ATOM records also makes up a valid PDB file...
// Since mmCIF files cannot validly start with a letter character
// the test has changed into the following:
if
(
std
::
isalpha
(
ch
))
read_pdb_file
(
is
,
result
);
read_pdb_file
(
is
,
result
);
else
else
{
{
...
...
src/pdb/reconstruct.cpp
View file @
12ee4a79
...
@@ -491,12 +491,13 @@ void checkAtomAnisotropRecords(datablock &db)
...
@@ -491,12 +491,13 @@ void checkAtomAnisotropRecords(datablock &db)
auto
&
atom_site
=
db
[
"atom_site"
];
auto
&
atom_site
=
db
[
"atom_site"
];
auto
&
atom_site_anisotrop
=
db
[
"atom_site_anisotrop"
];
auto
&
atom_site_anisotrop
=
db
[
"atom_site_anisotrop"
];
auto
m_validator
=
db
.
get_validator
();
//
auto m_validator = db.get_validator();
if
(
not
m_validator
)
//
if (not m_validator)
return
;
//
return;
std
::
vector
<
row_handle
>
to_be_deleted
;
std
::
vector
<
row_handle
>
to_be_deleted
;
bool
warnReplaceTypeSymbol
=
true
;
for
(
auto
row
:
atom_site_anisotrop
)
for
(
auto
row
:
atom_site_anisotrop
)
{
{
auto
parents
=
atom_site_anisotrop
.
get_parents
(
row
,
atom_site
);
auto
parents
=
atom_site_anisotrop
.
get_parents
(
row
,
atom_site
);
...
@@ -512,6 +513,12 @@ void checkAtomAnisotropRecords(datablock &db)
...
@@ -512,6 +513,12 @@ void checkAtomAnisotropRecords(datablock &db)
if
(
row
[
"type_symbol"
].
empty
())
if
(
row
[
"type_symbol"
].
empty
())
row
[
"type_symbol"
]
=
parent
[
"type_symbol"
].
text
();
row
[
"type_symbol"
]
=
parent
[
"type_symbol"
].
text
();
else
if
(
row
[
"type_symbol"
].
text
()
!=
parent
[
"type_symbol"
].
text
())
{
if
(
cif
::
VERBOSE
and
std
::
exchange
(
warnReplaceTypeSymbol
,
false
))
std
::
clog
<<
"Replacing type_symbol in atom_site_anisotrop record(s)
\n
"
;
row
[
"type_symbol"
]
!=
parent
[
"type_symbol"
].
text
();
}
if
(
row
[
"pdbx_auth_alt_id"
].
empty
())
if
(
row
[
"pdbx_auth_alt_id"
].
empty
())
row
[
"pdbx_auth_alt_id"
]
=
parent
[
"pdbx_auth_alt_id"
].
text
();
row
[
"pdbx_auth_alt_id"
]
=
parent
[
"pdbx_auth_alt_id"
].
text
();
...
@@ -1019,9 +1026,6 @@ bool reconstruct_pdbx(file &file, std::string_view dictionary)
...
@@ -1019,9 +1026,6 @@ bool reconstruct_pdbx(file &file, std::string_view dictionary)
// Now see if atom records make sense at all
// Now see if atom records make sense at all
checkAtomRecords
(
db
);
checkAtomRecords
(
db
);
if
(
db
.
get
(
"atom_site_anisotrop"
))
checkAtomAnisotropRecords
(
db
);
std
::
vector
<
std
::
string
>
invalidCategories
;
std
::
vector
<
std
::
string
>
invalidCategories
;
// clean up each category
// clean up each category
...
@@ -1244,6 +1248,9 @@ bool reconstruct_pdbx(file &file, std::string_view dictionary)
...
@@ -1244,6 +1248,9 @@ bool reconstruct_pdbx(file &file, std::string_view dictionary)
file
.
load_dictionary
(
dictionary
);
file
.
load_dictionary
(
dictionary
);
if
(
db
.
get
(
"atom_site_anisotrop"
))
checkAtomAnisotropRecords
(
db
);
// Now create any missing categories
// Now create any missing categories
// Next make sure we have struct_asym records
// Next make sure we have struct_asym records
if
(
db
.
get
(
"struct_asym"
)
==
nullptr
)
if
(
db
.
get
(
"struct_asym"
)
==
nullptr
)
...
...
test/reconstruction-test.cpp
View file @
12ee4a79
...
@@ -41,12 +41,24 @@ TEST_CASE("reconstruct")
...
@@ -41,12 +41,24 @@ TEST_CASE("reconstruct")
{
{
std
::
cout
<<
i
->
path
()
<<
'\n'
;
std
::
cout
<<
i
->
path
()
<<
'\n'
;
cif
::
file
f
(
i
->
path
());
if
(
i
->
path
().
extension
()
==
".pdb"
)
{
cif
::
file
f
=
cif
::
pdb
::
read
(
i
->
path
());
std
::
error_code
ec
;
std
::
error_code
ec
;
CHECK_FALSE
(
cif
::
pdb
::
is_valid_pdbx_file
(
f
,
ec
));
CHECK
(
ec
!=
std
::
errc
{});
CHECK
(
cif
::
pdb
::
reconstruct_pdbx
(
f
));
if
(
not
cif
::
pdb
::
is_valid_pdbx_file
(
f
,
ec
))
CHECK
(
cif
::
pdb
::
reconstruct_pdbx
(
f
));
}
else
{
cif
::
file
f
(
i
->
path
());
std
::
error_code
ec
;
CHECK_FALSE
(
cif
::
pdb
::
is_valid_pdbx_file
(
f
,
ec
));
CHECK
(
ec
!=
std
::
errc
{});
CHECK
(
cif
::
pdb
::
reconstruct_pdbx
(
f
));
}
}
}
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment