Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
P
pdbfixer
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
open
pdbfixer
Commits
82ce81d0
Commit
82ce81d0
authored
Aug 06, 2015
by
Peter Eastman
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added support for loading PDBx/mmCIF files
parent
bbb8cfa8
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
105 additions
and
35 deletions
+105
-35
pdbfixer/html/addResidues.html
+1
-1
pdbfixer/pdbfixer.py
+99
-33
pdbfixer/ui.py
+5
-1
No files found.
pdbfixer/html/addResidues.html
View file @
82ce81d0
The
SEQRES
records in this PDB file include residues that are missing from the atom data section. Do you want to add the missing residues?
The
sequence
records in this PDB file include residues that are missing from the atom data section. Do you want to add the missing residues?
<p>
<p>
<form
id=
"mainform"
method=
"post"
action=
"/"
>
<form
id=
"mainform"
method=
"post"
action=
"/"
>
<table
border=
"1"
id=
"table"
>
<table
border=
"1"
id=
"table"
>
...
...
pdbfixer/pdbfixer.py
View file @
82ce81d0
...
@@ -36,6 +36,7 @@ import simtk.openmm as mm
...
@@ -36,6 +36,7 @@ import simtk.openmm as mm
import
simtk.openmm.app
as
app
import
simtk.openmm.app
as
app
import
simtk.unit
as
unit
import
simtk.unit
as
unit
from
simtk.openmm.app.internal.pdbstructure
import
PdbStructure
from
simtk.openmm.app.internal.pdbstructure
import
PdbStructure
from
simtk.openmm.app.internal.pdbx.reader.PdbxReader
import
PdbxReader
from
simtk.openmm.app.element
import
hydrogen
,
oxygen
from
simtk.openmm.app.element
import
hydrogen
,
oxygen
from
simtk.openmm.app.forcefield
import
NonbondedGenerator
from
simtk.openmm.app.forcefield
import
NonbondedGenerator
import
numpy
as
np
import
numpy
as
np
...
@@ -47,11 +48,12 @@ import math
...
@@ -47,11 +48,12 @@ import math
from
pkg_resources
import
resource_filename
from
pkg_resources
import
resource_filename
# Imports for urlopen
try
:
if
sys
.
version_info
>=
(
3
,
0
):
from
urllib.request
import
urlopen
from
urllib.request
import
urlopen
else
:
from
io
import
StringIO
except
:
from
urllib2
import
urlopen
from
urllib2
import
urlopen
from
cStringIO
import
StringIO
substitutions
=
{
substitutions
=
{
'2AS'
:
'ASP'
,
'3AH'
:
'HIS'
,
'5HP'
:
'GLU'
,
'ACL'
:
'ARG'
,
'AGM'
:
'ARG'
,
'AIB'
:
'ALA'
,
'ALM'
:
'ALA'
,
'ALO'
:
'THR'
,
'ALY'
:
'LYS'
,
'ARM'
:
'ARG'
,
'2AS'
:
'ASP'
,
'3AH'
:
'HIS'
,
'5HP'
:
'GLU'
,
'ACL'
:
'ARG'
,
'AGM'
:
'ARG'
,
'AIB'
:
'ALA'
,
'ALM'
:
'ALA'
,
'ALO'
:
'THR'
,
'ALY'
:
'LYS'
,
'ARM'
:
'ARG'
,
...
@@ -147,27 +149,34 @@ def _findUnoccupiedDirection(point, positions):
...
@@ -147,27 +149,34 @@ def _findUnoccupiedDirection(point, positions):
return
direction
return
direction
class
PDBFixer
(
object
):
class
PDBFixer
(
object
):
"""PDBFixer implements many tools for fixing problems in PDB files.
"""PDBFixer implements many tools for fixing problems in PDB
and PDBx/mmCIF
files.
"""
"""
def
__init__
(
self
,
filename
=
None
,
pdbfile
=
None
,
url
=
None
,
pdbid
=
None
):
def
__init__
(
self
,
filename
=
None
,
pdbfile
=
None
,
pdbxfile
=
None
,
url
=
None
,
pdbid
=
None
):
"""Create a new PDBFixer instance to fix problems in a PDB file.
"""Create a new PDBFixer instance to fix problems in a PDB
or PDBx/mmCIF
file.
Parameters
Parameters
----------
----------
filename : str, optional, default=None
filename : str, optional, default=None
A filename specifying the file from which the PDB file is to be read.
The name of the file to read. The format is determined automatically based on the filename extension. If
it ends in either ".pdbx" or ".cif", it is assumed to be a PDBx/mmCIF file. Otherwise, it is assumed to be
a PDB file.
pdbfile : file, optional, default=None
pdbfile : file, optional, default=None
A file-like object from which the PDB file is to be read.
A file-like object from which the PDB file is to be read.
The file is not closed after reading.
The file is not closed after reading.
pdbxfile : file, optional, default=None
A file-like object from which the PDBx/mmCIF file is to be read.
The file is not closed after reading.
url : str, optional, default=None
url : str, optional, default=None
A URL specifying the internet location from which the PDB file contents should be retrieved.
A URL specifying the internet location from which the file contents should be retrieved. The format is
determined automatically by looking for a filename extension. If the URL contains either ".pdbx" or ".cif",
it is assumed to be a PDBx/mmCIF file. Otherwise, it is assumed to be a PDB file.
pdbid : str, optional, default=None
pdbid : str, optional, default=None
A four-letter PDB code specifying the structure to be retrieved from the RCSB.
A four-letter PDB code specifying the structure to be retrieved from the RCSB.
Notes
Notes
-----
-----
Only one of structure, filename, pdbfile, url, or pdbid may be specified or an exception will be thrown.
Only one of structure, filename, pdbfile,
pdbxfile,
url, or pdbid may be specified or an exception will be thrown.
Examples
Examples
--------
--------
...
@@ -193,47 +202,44 @@ class PDBFixer(object):
...
@@ -193,47 +202,44 @@ class PDBFixer(object):
"""
"""
# Check to make sure only one option has been specified.
# Check to make sure only one option has been specified.
if
bool
(
filename
)
+
bool
(
pdbfile
)
+
bool
(
url
)
+
bool
(
pdbid
)
!=
1
:
if
bool
(
filename
)
+
bool
(
pdbfile
)
+
bool
(
pdbxfile
)
+
bool
(
url
)
+
bool
(
pdbid
)
!=
1
:
raise
Exception
(
"Exactly one option [filename, pdbfile, url, pdbid] must be specified."
)
raise
Exception
(
"Exactly one option [filename, pdbfile,
pdbxfile,
url, pdbid] must be specified."
)
self
.
source
=
None
self
.
source
=
None
if
pdbid
:
# A PDB id has been specified.
url
=
'http://www.rcsb.org/pdb/files/
%
s.pdb'
%
pdbid
if
filename
:
if
filename
:
self
.
source
=
filename
# A local file has been specified.
# A local file has been specified.
self
.
source
=
filename
file
=
open
(
filename
,
'r'
)
file
=
open
(
filename
,
'r'
)
structure
=
PdbStructure
(
file
)
if
filename
.
lower
()
.
endswith
(
'.pdbx'
)
or
filename
.
lower
()
.
endswith
(
'.cif'
):
self
.
_initializeFromPDBx
(
file
.
read
())
else
:
self
.
_initializeFromPDB
(
file
)
file
.
close
()
file
.
close
()
elif
pdbfile
:
elif
pdbfile
:
# A file-like object has been specified.
# A file-like object has been specified.
structure
=
PdbStructure
(
pdbfile
)
self
.
_initializeFromPDB
(
pdbfile
)
elif
pdbxfile
:
# A file-like object has been specified.
self
.
_initializeFromPDBx
(
pdbxfile
.
read
())
elif
url
:
elif
url
:
self
.
source
=
url
# A URL has been specified.
# A URL has been specified.
file
=
urlopen
(
url
)
structure
=
PdbStructure
(
file
)
file
.
close
()
elif
pdbid
:
# A PDB id has been specified.
url
=
'http://www.rcsb.org/pdb/files/
%
s.pdb'
%
pdbid
self
.
source
=
url
self
.
source
=
url
file
=
urlopen
(
url
)
file
=
urlopen
(
url
)
# Read contents all at once and split into lines, since urlopen doesn't like it when we read one line at a time over the network.
contents
=
file
.
read
()
.
decode
(
'utf-8'
)
contents
=
file
.
read
()
.
decode
(
'utf-8'
)
lines
=
contents
.
split
(
'
\n
'
)
file
.
close
()
file
.
close
()
structure
=
PdbStructure
(
lines
)
if
'.pdbx'
in
url
.
lower
()
or
'.cif'
in
url
.
lower
():
self
.
_initializeFromPDBx
(
contents
)
else
:
self
.
_initializeFromPDB
(
StringIO
(
contents
))
# Check the structure has some atoms in it.
# Check the structure has some atoms in it.
atoms
=
list
(
s
tructure
.
iter_
atoms
())
atoms
=
list
(
s
elf
.
topology
.
atoms
())
if
len
(
atoms
)
==
0
:
if
len
(
atoms
)
==
0
:
raise
Exception
(
"Structure contains no atoms."
)
raise
Exception
(
"Structure contains no atoms."
)
pdb
=
app
.
PDBFile
(
structure
)
self
.
topology
=
pdb
.
topology
self
.
positions
=
pdb
.
positions
self
.
sequences
=
[
Sequence
(
s
.
chain_id
,
s
.
residues
)
for
s
in
structure
.
sequences
]
self
.
modifiedResidues
=
[
ModifiedResidue
(
r
.
chain_id
,
r
.
number
,
r
.
residue_name
,
r
.
standard_name
)
for
r
in
structure
.
modified_residues
]
# Load the templates.
# Load the templates.
self
.
templates
=
{}
self
.
templates
=
{}
...
@@ -243,7 +249,67 @@ class PDBFixer(object):
...
@@ -243,7 +249,67 @@ class PDBFixer(object):
name
=
next
(
templatePdb
.
topology
.
residues
())
.
name
name
=
next
(
templatePdb
.
topology
.
residues
())
.
name
self
.
templates
[
name
]
=
templatePdb
self
.
templates
[
name
]
=
templatePdb
return
def
_initializeFromPDB
(
self
,
file
):
"""Initialize this object by reading a PDB file."""
structure
=
PdbStructure
(
file
)
pdb
=
app
.
PDBFile
(
structure
)
self
.
topology
=
pdb
.
topology
self
.
positions
=
pdb
.
positions
self
.
sequences
=
[
Sequence
(
s
.
chain_id
,
s
.
residues
)
for
s
in
structure
.
sequences
]
self
.
modifiedResidues
=
[
ModifiedResidue
(
r
.
chain_id
,
r
.
number
,
r
.
residue_name
,
r
.
standard_name
)
for
r
in
structure
.
modified_residues
]
def
_initializeFromPDBx
(
self
,
filecontent
):
"""Initialize this object by reading a PDBx/mmCIF file."""
pdbx
=
app
.
PDBxFile
(
StringIO
(
filecontent
))
self
.
topology
=
pdbx
.
topology
self
.
positions
=
pdbx
.
positions
# PDBxFile doesn't record the information about sequence or modified residues, so we need to read them separately.
reader
=
PdbxReader
(
StringIO
(
filecontent
))
data
=
[]
reader
.
read
(
data
)
block
=
data
[
0
]
# Load the sequence data.
sequenceData
=
block
.
getObj
(
'entity_poly_seq'
)
entityIdCol
=
sequenceData
.
getAttributeIndex
(
'entity_id'
)
residueCol
=
sequenceData
.
getAttributeIndex
(
'mon_id'
)
sequences
=
{}
for
row
in
sequenceData
.
getRowList
():
entityId
=
row
[
entityIdCol
]
residue
=
row
[
residueCol
]
if
entityId
not
in
sequences
:
sequences
[
entityId
]
=
[]
sequences
[
entityId
]
.
append
(
residue
)
# Sequences are stored by "entity". There could be multiple chains that are all the same entity, so we need to
# convert from entities to chains.
asymData
=
block
.
getObj
(
'struct_asym'
)
asymIdCol
=
asymData
.
getAttributeIndex
(
'id'
)
entityIdCol
=
asymData
.
getAttributeIndex
(
'entity_id'
)
self
.
sequences
=
[]
for
row
in
asymData
.
getRowList
():
asymId
=
row
[
asymIdCol
]
entityId
=
row
[
entityIdCol
]
if
entityId
in
sequences
:
self
.
sequences
.
append
(
Sequence
(
asymId
,
sequences
[
entityId
]))
# Load the modified residues.
modData
=
block
.
getObj
(
'pdbx_struct_mod_residue'
)
asymIdCol
=
modData
.
getAttributeIndex
(
'label_asym_id'
)
resNameCol
=
modData
.
getAttributeIndex
(
'label_comp_id'
)
resNumCol
=
modData
.
getAttributeIndex
(
'auth_seq_id'
)
standardResCol
=
modData
.
getAttributeIndex
(
'parent_comp_id'
)
self
.
modifiedResidues
=
[]
if
-
1
not
in
(
asymIdCol
,
resNameCol
,
resNumCol
,
standardResCol
):
for
row
in
modData
.
getRowList
():
self
.
modifiedResidues
.
append
(
ModifiedResidue
(
row
[
asymIdCol
],
int
(
row
[
resNumCol
]),
row
[
resNameCol
],
row
[
standardResCol
]))
def
_addAtomsToTopology
(
self
,
heavyAtomsOnly
,
omitUnknownMolecules
):
def
_addAtomsToTopology
(
self
,
heavyAtomsOnly
,
omitUnknownMolecules
):
"""Create a new Topology in which missing atoms have been added.
"""Create a new Topology in which missing atoms have been added.
...
...
pdbfixer/ui.py
View file @
82ce81d0
...
@@ -55,8 +55,12 @@ def startPageCallback(parameters, handler):
...
@@ -55,8 +55,12 @@ def startPageCallback(parameters, handler):
global
fixer
global
fixer
if
'type'
in
parameters
:
if
'type'
in
parameters
:
if
parameters
.
getfirst
(
'type'
)
==
'local'
:
if
parameters
.
getfirst
(
'type'
)
==
'local'
:
filename
=
parameters
[
'pdbfile'
]
.
filename
if
filename
.
lower
()
.
endswith
(
'.pdbx'
)
or
filename
.
lower
()
.
endswith
(
'.cif'
):
fixer
=
PDBFixer
(
pdbxfile
=
StringIO
(
parameters
[
'pdbfile'
]
.
value
.
decode
()))
else
:
fixer
=
PDBFixer
(
pdbfile
=
parameters
[
'pdbfile'
]
.
value
.
decode
()
.
splitlines
())
fixer
=
PDBFixer
(
pdbfile
=
parameters
[
'pdbfile'
]
.
value
.
decode
()
.
splitlines
())
fixer
.
source
=
parameters
[
'pdbfile'
]
.
filename
fixer
.
source
=
filename
else
:
else
:
id
=
parameters
.
getfirst
(
'pdbid'
)
id
=
parameters
.
getfirst
(
'pdbid'
)
try
:
try
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment