Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
P
pdbfixer
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
open
pdbfixer
Commits
892470c1
Commit
892470c1
authored
Sep 05, 2013
by
Peter Eastman
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Preliminary support for adding missing residues
parent
f8d71ec9
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
158 additions
and
70 deletions
+158
-70
pdbfixer.py
+143
-52
ui.py
+15
-18
No files found.
pdbfixer.py
View file @
892470c1
...
...
@@ -35,6 +35,7 @@ import ui
import
simtk.openmm
as
mm
import
simtk.openmm.app
as
app
import
simtk.unit
as
unit
from
simtk.openmm.app.internal.pdbstructure
import
PdbStructure
from
simtk.openmm.app.element
import
hydrogen
,
oxygen
import
numpy
as
np
import
numpy.linalg
as
lin
...
...
@@ -91,10 +92,12 @@ def _overlayPoints(points1, points2):
return
(
-
1
*
center2
,
np
.
dot
(
u
,
v
)
.
transpose
(),
center1
)
class
PDBFixer
(
object
):
def
__init__
(
self
,
pdb
):
self
.
pdb
=
pdb
self
.
topology
=
pdb
.
topology
self
.
positions
=
pdb
.
positions
def
__init__
(
self
,
structure
):
self
.
structure
=
structure
self
.
pdb
=
app
.
PDBFile
(
structure
)
self
.
topology
=
self
.
pdb
.
topology
self
.
positions
=
self
.
pdb
.
positions
self
.
centroid
=
unit
.
sum
(
self
.
positions
)
/
len
(
self
.
positions
)
# Load the templates.
...
...
@@ -105,7 +108,7 @@ class PDBFixer(object):
name
=
templatePdb
.
topology
.
residues
()
.
next
()
.
name
self
.
templates
[
name
]
=
templatePdb
def
_addAtomsToTopology
(
self
,
missingAtoms
,
missingTerminals
,
heavyAtomsOnly
,
omitUnknownMolecules
):
def
_addAtomsToTopology
(
self
,
heavyAtomsOnly
,
omitUnknownMolecules
):
"""Create a new Topology in which missing atoms have been added."""
newTopology
=
app
.
Topology
()
...
...
@@ -117,18 +120,34 @@ class PDBFixer(object):
for
chain
in
self
.
topology
.
chains
():
if
omitUnknownMolecules
and
not
any
(
residue
.
name
in
self
.
templates
for
residue
in
chain
.
residues
()):
continue
chainResidues
=
list
(
chain
.
residues
())
newChain
=
newTopology
.
addChain
()
for
residue
in
chain
.
residues
():
newResidue
=
newTopology
.
addResidue
(
residue
.
name
,
newChain
)
for
indexInChain
,
residue
in
enumerate
(
chain
.
residues
()):
# Insert missing residues here.
# Add the existing heavy atoms.
insertHere
=
[
r
[
2
]
for
r
in
self
.
missingResidues
if
r
[
0
]
==
chain
.
index
and
r
[
1
]
==
indexInChain
]
if
len
(
insertHere
)
>
0
:
endPosition
=
self
.
_computeResidueCenter
(
residue
)
if
indexInChain
>
0
:
startPosition
=
self
.
_computeResidueCenter
(
chainResidues
[
indexInChain
-
1
])
else
:
outward
=
endPosition
-
self
.
centroid
norm
=
unit
.
norm
(
outward
)
if
norm
>
0
*
unit
.
nanometer
:
outward
*=
len
(
insertHere
)
*
0.5
*
unit
.
nanometer
/
norm
startPosition
=
endPosition
+
outward
self
.
_addMissingResiduesToChain
(
newChain
,
insertHere
,
startPosition
,
endPosition
,
residue
,
newAtoms
,
newPositions
)
# Create the new residue and add existing heavy atoms.
newResidue
=
newTopology
.
addResidue
(
residue
.
name
,
newChain
)
for
atom
in
residue
.
atoms
():
if
not
heavyAtomsOnly
or
(
atom
.
element
is
not
None
and
atom
.
element
!=
hydrogen
):
newAtom
=
newTopology
.
addAtom
(
atom
.
name
,
atom
.
element
,
newResidue
)
existingAtomMap
[
atom
]
=
newAtom
newPositions
.
append
(
self
.
positions
[
atom
.
index
])
if
residue
in
missingAtoms
:
if
residue
in
self
.
missingAtoms
:
# Find corresponding atoms in the residue and the template.
...
...
@@ -148,18 +167,37 @@ class PDBFixer(object):
# Add the missing atoms.
addedAtomMap
[
residue
]
=
{}
for
atom
in
missingAtoms
[
residue
]:
for
atom
in
self
.
missingAtoms
[
residue
]:
newAtom
=
newTopology
.
addAtom
(
atom
.
name
,
atom
.
element
,
newResidue
)
newAtoms
.
append
(
newAtom
)
addedAtomMap
[
residue
][
atom
]
=
newAtom
templatePosition
=
template
.
positions
[
atom
.
index
]
.
value_in_unit
(
unit
.
nanometer
)
newPositions
.
append
((
mm
.
Vec3
(
*
np
.
dot
(
rotate
,
templatePosition
+
translate2
))
+
translate1
)
*
unit
.
nanometer
)
if
residue
in
self
.
missingTerminals
:
terminalsToAdd
=
self
.
missingTerminals
[
residue
]
else
:
terminalsToAdd
=
None
# If this is the end of the chain, add any missing residues that come after it.
if
residue
==
chainResidues
[
-
1
]:
insertHere
=
[
r
[
2
]
for
r
in
self
.
missingResidues
if
r
[
0
]
==
chain
.
index
and
r
[
1
]
>
indexInChain
]
if
len
(
insertHere
)
>
0
:
startPosition
=
self
.
_computeResidueCenter
(
residue
)
outward
=
startPosition
-
self
.
centroid
norm
=
unit
.
norm
(
outward
)
if
norm
>
0
*
unit
.
nanometer
:
outward
*=
len
(
insertHere
)
*
0.5
*
unit
.
nanometer
/
norm
endPosition
=
startPosition
+
outward
self
.
_addMissingResiduesToChain
(
newChain
,
insertHere
,
startPosition
,
endPosition
,
residue
,
newAtoms
,
newPositions
)
newResidue
=
list
(
newChain
.
residues
())[
-
1
]
terminalsToAdd
=
[
'OXT'
]
# If a terminal OXT is missing, add it.
if
residue
in
missingTerminals
:
if
terminalsToAdd
is
not
None
:
atomPositions
=
dict
((
atom
.
name
,
newPositions
[
atom
.
index
]
.
value_in_unit
(
unit
.
nanometer
))
for
atom
in
newResidue
.
atoms
())
if
'OXT'
in
missingTerminals
[
residue
]
:
if
'OXT'
in
terminalsToAdd
:
newAtom
=
newTopology
.
addAtom
(
'OXT'
,
oxygen
,
newResidue
)
newAtoms
.
append
(
newAtom
)
addedOXT
.
append
(
newAtom
)
...
...
@@ -168,48 +206,51 @@ class PDBFixer(object):
d_ca_c
/=
unit
.
sqrt
(
unit
.
dot
(
d_ca_c
,
d_ca_c
))
v
=
d_ca_o
-
d_ca_c
*
unit
.
dot
(
d_ca_c
,
d_ca_o
)
newPositions
.
append
((
atomPositions
[
'O'
]
+
2
*
v
)
*
unit
.
nanometer
)
# Add bonds from the original Topology.
for
atom1
,
atom2
in
self
.
topology
.
bonds
():
if
atom1
in
existingAtomMap
and
atom2
in
existingAtomMap
:
newTopology
.
addBond
(
existingAtomMap
[
atom1
],
existingAtomMap
[
atom2
])
# Add bonds that connect to new atoms.
for
residue
in
missingAtoms
:
template
=
self
.
templates
[
residue
.
name
]
atomsByName
=
dict
((
atom
.
name
,
atom
)
for
atom
in
residue
.
atoms
())
addedAtoms
=
addedAtomMap
[
residue
]
for
atom1
,
atom2
in
template
.
topology
.
bonds
():
if
atom1
in
addedAtoms
or
atom2
in
addedAtoms
:
if
atom1
in
addedAtoms
:
bondAtom1
=
addedAtoms
[
atom1
]
else
:
bondAtom1
=
existingAtomMap
[
atomsByName
[
atom1
.
name
]]
if
atom2
in
addedAtoms
:
bondAtom2
=
addedAtoms
[
atom2
]
else
:
bondAtom2
=
existingAtomMap
[
atomsByName
[
atom2
.
name
]]
newTopology
.
addBond
(
bondAtom1
,
bondAtom2
)
for
atom1
in
addedOXT
:
atom2
=
[
atom
for
atom
in
atom1
.
residue
.
atoms
()
if
atom
.
name
==
'C'
][
0
]
newTopology
.
addBond
(
atom1
,
atom2
)
newTopology
.
createStandardBonds
()
newTopology
.
createDisulfideBonds
(
newPositions
)
# Return the results.
return
(
newTopology
,
newPositions
,
newAtoms
,
existingAtomMap
)
def
_computeResidueCenter
(
self
,
residue
):
return
unit
.
sum
([
self
.
pdb
.
positions
[
atom
.
index
]
for
atom
in
residue
.
atoms
()])
/
len
(
list
(
residue
.
atoms
()))
def
_addMissingResiduesToChain
(
self
,
chain
,
residueNames
,
startPosition
,
endPosition
,
orientTo
,
newAtoms
,
newPositions
):
orientToPositions
=
dict
((
atom
.
name
,
self
.
positions
[
atom
.
index
])
for
atom
in
orientTo
.
atoms
())
for
i
,
residueName
in
enumerate
(
residueNames
):
template
=
self
.
templates
[
residueName
]
# Find a translation that best matches the adjacent residue.
points1
=
[]
points2
=
[]
for
atom
in
template
.
topology
.
atoms
():
if
atom
.
name
in
orientToPositions
:
points1
.
append
(
orientToPositions
[
atom
.
name
]
.
value_in_unit
(
unit
.
nanometer
))
points2
.
append
(
template
.
positions
[
atom
.
index
]
.
value_in_unit
(
unit
.
nanometer
))
(
translate2
,
rotate
,
translate1
)
=
_overlayPoints
(
points1
,
points2
)
# Create the new residue.
newResidue
=
chain
.
topology
.
addResidue
(
residueName
,
chain
)
translate
=
startPosition
+
(
endPosition
-
startPosition
)
*
(
i
+
1.0
)
/
(
len
(
residueNames
)
+
1.0
)
for
atom
in
template
.
topology
.
atoms
():
newAtom
=
chain
.
topology
.
addAtom
(
atom
.
name
,
atom
.
element
,
newResidue
)
newAtoms
.
append
(
newAtom
)
templatePosition
=
template
.
positions
[
atom
.
index
]
.
value_in_unit
(
unit
.
nanometer
)
newPositions
.
append
(
mm
.
Vec3
(
*
np
.
dot
(
rotate
,
templatePosition
))
*
unit
.
nanometer
+
translate
)
def
findNonstandardResidues
(
self
):
return
[
r
for
r
in
self
.
topology
.
residues
()
if
r
.
name
in
substitutions
]
self
.
nonstandardResidues
=
[
r
for
r
in
self
.
topology
.
residues
()
if
r
.
name
in
substitutions
]
def
replaceNonstandardResidues
(
self
,
replaceResidues
):
if
len
(
replace
Residues
)
>
0
:
def
replaceNonstandardResidues
(
self
):
if
len
(
self
.
nonstandard
Residues
)
>
0
:
deleteAtoms
=
[]
# Find atoms that should be deleted.
for
residue
in
replace
Residues
:
for
residue
in
self
.
nonstandard
Residues
:
residue
.
name
=
substitutions
[
residue
.
name
]
template
=
self
.
templates
[
residue
.
name
]
standardAtoms
=
set
(
atom
.
name
for
atom
in
template
.
topology
.
atoms
())
...
...
@@ -224,6 +265,53 @@ class PDBFixer(object):
self
.
topology
=
modeller
.
topology
self
.
positions
=
modeller
.
positions
def
findMissingResidues
(
self
):
chains
=
[
c
for
c
in
self
.
structure
.
iter_chains
()
if
any
(
atom
.
record_name
==
'ATOM'
for
atom
in
c
.
iter_atoms
())]
chainWithGaps
=
{}
# Find the sequence of each chain, with gaps for missing residues.
for
chain
in
chains
:
minResidue
=
min
(
r
.
number
for
r
in
chain
.
iter_residues
())
maxResidue
=
max
(
r
.
number
for
r
in
chain
.
iter_residues
())
residues
=
[
None
]
*
(
maxResidue
-
minResidue
+
1
)
for
r
in
chain
.
iter_residues
():
residues
[
r
.
number
-
minResidue
]
=
r
.
get_name
()
chainWithGaps
[
chain
]
=
residues
# Try to find the chain that matches each sequence.
chainSequence
=
{}
chainOffset
=
{}
for
sequence
in
self
.
structure
.
sequences
:
for
chain
in
chains
:
if
chain
.
chain_id
!=
sequence
.
chain_id
:
continue
if
chain
in
chainSequence
:
continue
for
offset
in
range
(
len
(
sequence
.
residues
)
-
len
(
chainWithGaps
[
chain
])
+
1
):
if
all
(
a
==
b
or
b
==
None
for
a
,
b
in
zip
(
sequence
.
residues
[
offset
:],
chainWithGaps
[
chain
])):
chainSequence
[
chain
]
=
sequence
chainOffset
[
chain
]
=
offset
break
if
chain
in
chainSequence
:
break
# Now build the list of residues to add.
self
.
missingResidues
=
[]
for
structChain
,
topChain
in
zip
(
self
.
structure
.
iter_chains
(),
self
.
pdb
.
topology
.
chains
()):
if
structChain
in
chainSequence
:
offset
=
chainOffset
[
structChain
]
sequence
=
chainSequence
[
structChain
]
.
residues
gappedSequence
=
chainWithGaps
[
structChain
]
index
=
0
for
i
in
range
(
len
(
sequence
)):
if
i
<
offset
or
i
>=
len
(
gappedSequence
)
+
offset
or
gappedSequence
[
i
-
offset
]
is
None
:
self
.
missingResidues
.
append
((
topChain
.
index
,
index
,
sequence
[
i
]))
else
:
index
+=
1
def
findMissingAtoms
(
self
):
missingAtoms
=
{}
missingTerminals
=
{}
...
...
@@ -249,18 +337,19 @@ class PDBFixer(object):
# Add missing terminal atoms.
terminals
=
[]
if
residue
==
chainResidues
[
-
1
]:
if
residue
==
chainResidues
[
-
1
]
and
not
any
(
r
[
0
]
==
chain
.
index
and
r
[
1
]
>=
len
(
chainResidues
)
for
r
in
self
.
missingResidues
)
:
templateNames
=
set
(
atom
.
name
for
atom
in
template
.
topology
.
atoms
())
if
'OXT'
not
in
atomNames
and
all
(
name
in
templateNames
for
name
in
[
'C'
,
'O'
,
'CA'
]):
terminals
.
append
(
'OXT'
)
if
len
(
terminals
)
>
0
:
missingTerminals
[
residue
]
=
terminals
return
(
missingAtoms
,
missingTerminals
)
self
.
missingAtoms
=
missingAtoms
self
.
missingTerminals
=
missingTerminals
def
addMissingAtoms
(
self
,
missingAtoms
,
missingTerminals
):
def
addMissingAtoms
(
self
):
# Create a Topology that 1) adds missing atoms, 2) removes all hydrogens, and 3) removes unknown molecules.
(
newTopology
,
newPositions
,
newAtoms
,
existingAtomMap
)
=
self
.
_addAtomsToTopology
(
missingAtoms
,
missingTerminals
,
True
,
True
)
(
newTopology
,
newPositions
,
newAtoms
,
existingAtomMap
)
=
self
.
_addAtomsToTopology
(
True
,
True
)
if
len
(
newAtoms
)
>
0
:
# Create a System for energy minimizing it.
...
...
@@ -297,7 +386,7 @@ class PDBFixer(object):
# Now create a new Topology, including all atoms from the original one and adding the missing atoms.
(
newTopology2
,
newPositions2
,
newAtoms2
,
existingAtomMap2
)
=
self
.
_addAtomsToTopology
(
missingAtoms
,
missingTerminals
,
False
,
False
)
(
newTopology2
,
newPositions2
,
newAtoms2
,
existingAtomMap2
)
=
self
.
_addAtomsToTopology
(
False
,
False
)
# Copy over the minimized positions for the new atoms.
...
...
@@ -365,8 +454,10 @@ if __name__=='__main__':
if
len
(
sys
.
argv
)
<
2
:
ui
.
launchUI
()
else
:
fixer
=
PDBFixer
(
app
.
PDBFile
(
sys
.
argv
[
1
]))
fixer
.
replaceNonstandardResidues
(
fixer
.
findNonstandardResidues
())
missingAtoms
,
missingTerminals
=
fixer
.
findMissingAtoms
()
fixer
.
addMissingAtoms
(
missingAtoms
,
missingTerminals
)
fixer
=
PDBFixer
(
PdbStructure
(
open
(
sys
.
argv
[
1
])))
fixer
.
findMissingResidues
()
fixer
.
findNonstandardResidues
()
fixer
.
replaceNonstandardResidues
()
fixer
.
findMissingAtoms
()
fixer
.
addMissingAtoms
()
app
.
PDBFile
.
writeFile
(
fixer
.
topology
,
fixer
.
positions
,
open
(
'output.pdb'
,
'w'
))
ui.py
View file @
892470c1
import
simtk.openmm.app
as
app
from
simtk.openmm.app.internal.pdbstructure
import
PdbStructure
from
pdbfixer
import
PDBFixer
,
substitutions
import
uiserver
import
webbrowser
...
...
@@ -7,19 +8,17 @@ from cStringIO import StringIO
def
startPageCallback
(
parameters
,
handler
):
if
'pdbfile'
in
parameters
:
global
fixer
pdb
=
app
.
PDBFil
e
(
parameters
[
'pdbfile'
]
.
value
.
splitlines
())
pdb
=
PdbStructur
e
(
parameters
[
'pdbfile'
]
.
value
.
splitlines
())
fixer
=
PDBFixer
(
pdb
)
displayConvertResiduesPage
()
def
convertResiduesPageCallback
(
parameters
,
handler
):
global
nonstandard
nonstandard
=
[
residue
for
i
,
residue
in
enumerate
(
nonstandard
)
if
'convert'
+
str
(
i
)
in
parameters
]
fixer
.
replaceNonstandardResidues
(
nonstandard
)
nonstandard
=
[]
fixer
.
nonstandardResidues
=
[
residue
for
i
,
residue
in
enumerate
(
fixer
.
nonstandardResidues
)
if
'convert'
+
str
(
i
)
in
parameters
]
fixer
.
replaceNonstandardResidues
()
displayMissingAtomsPage
()
def
missingAtomsPageCallback
(
parameters
,
handler
):
fixer
.
addMissingAtoms
(
missingAtoms
,
missingTerminals
)
fixer
.
addMissingAtoms
()
displayDownloadPage
()
def
downloadPageCallback
(
parameters
,
handler
):
...
...
@@ -50,9 +49,9 @@ PDB File: <input type="file" name="pdbfile"/>
def
displayConvertResiduesPage
():
uiserver
.
setCallback
(
convertResiduesPageCallback
)
global
nonstandard
nonstandard
=
fixer
.
findNonstandardResidues
()
if
len
(
nonstandard
)
==
0
:
fixer
.
findMissingResidues
()
fixer
.
findNonstandardResidues
()
if
len
(
fixer
.
nonstandardResidues
)
==
0
:
displayMissingAtomsPage
()
return
indexInChain
=
{}
...
...
@@ -60,7 +59,7 @@ def displayConvertResiduesPage():
for
i
,
residue
in
enumerate
(
chain
.
residues
()):
indexInChain
[
residue
]
=
i
+
1
table
=
""
for
i
,
residue
in
enumerate
(
nonstandard
):
for
i
,
residue
in
enumerate
(
fixer
.
nonstandardResidues
):
table
+=
' <tr><td>
%
d</td><td>
%
s
%
d</td><td>
%
s</td><td><input type="checkbox" name="convert
%
d" checked></td></tr>
\n
'
%
(
residue
.
chain
.
index
+
1
,
residue
.
name
,
indexInChain
[
residue
],
substitutions
[
residue
.
name
],
i
)
uiserver
.
setContent
(
"""
<html>
...
...
@@ -82,10 +81,8 @@ This PDB file contains non-standard residues. Do you want to convert them to th
def
displayMissingAtomsPage
():
uiserver
.
setCallback
(
missingAtomsPageCallback
)
global
missingAtoms
global
missingTerminals
missingAtoms
,
missingTerminals
=
fixer
.
findMissingAtoms
()
allResidues
=
list
(
set
(
missingAtoms
.
iterkeys
())
.
union
(
missingTerminals
.
iterkeys
()))
fixer
.
findMissingAtoms
()
allResidues
=
list
(
set
(
fixer
.
missingAtoms
.
iterkeys
())
.
union
(
fixer
.
missingTerminals
.
iterkeys
()))
allResidues
.
sort
(
key
=
lambda
x
:
x
.
index
)
if
len
(
allResidues
)
==
0
:
displayDownloadPage
()
...
...
@@ -97,10 +94,10 @@ def displayMissingAtomsPage():
table
=
""
for
residue
in
allResidues
:
atoms
=
[]
if
residue
in
missingAtoms
:
atoms
.
extend
(
atom
.
name
for
atom
in
missingAtoms
[
residue
])
if
residue
in
missingTerminals
:
atoms
.
extend
(
atom
for
atom
in
missingTerminals
[
residue
])
if
residue
in
fixer
.
missingAtoms
:
atoms
.
extend
(
atom
.
name
for
atom
in
fixer
.
missingAtoms
[
residue
])
if
residue
in
fixer
.
missingTerminals
:
atoms
.
extend
(
atom
for
atom
in
fixer
.
missingTerminals
[
residue
])
table
+=
' <tr><td>
%
d</td><td>
%
s
%
d</td><td>
%
s</td></tr>
\n
'
%
(
residue
.
chain
.
index
+
1
,
residue
.
name
,
indexInChain
[
residue
],
', '
.
join
(
atoms
))
uiserver
.
setContent
(
"""
<html>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment