Commit 264f95c1 by peastman

Merge pull request #26 from jchodera/master

Add support for specifying PDB codes, filenames, and file objects [ready to merge]
parents 6e4432c2 10eb76db
...@@ -11,7 +11,7 @@ install: ...@@ -11,7 +11,7 @@ install:
script: script:
- python setup.py install - python setup.py install
- cd tests - cd tests
- nosetests - nosetests --with-doctest
after_script: after_script:
- pyflakes pdbfixer/*.py - pyflakes pdbfixer/*.py
......
...@@ -44,6 +44,12 @@ import os ...@@ -44,6 +44,12 @@ import os
import os.path import os.path
import math import math
# Imports for urlopen
if sys.version_info >= (3,0):
from urllib.request import urlopen
else:
from urllib2 import urlopen
substitutions = { substitutions = {
'2AS':'ASP', '3AH':'HIS', '5HP':'GLU', 'ACL':'ARG', 'AGM':'ARG', 'AIB':'ALA', 'ALM':'ALA', 'ALO':'THR', 'ALY':'LYS', 'ARM':'ARG', '2AS':'ASP', '3AH':'HIS', '5HP':'GLU', 'ACL':'ARG', 'AGM':'ARG', 'AIB':'ALA', 'ALM':'ALA', 'ALO':'THR', 'ALY':'LYS', 'ARM':'ARG',
'ASA':'ASP', 'ASB':'ASP', 'ASK':'ASP', 'ASL':'ASP', 'ASQ':'ASP', 'AYA':'ALA', 'BCS':'CYS', 'BHD':'ASP', 'BMT':'THR', 'BNN':'ALA', 'ASA':'ASP', 'ASB':'ASP', 'ASK':'ASP', 'ASL':'ASP', 'ASQ':'ASP', 'AYA':'ALA', 'BCS':'CYS', 'BHD':'ASP', 'BMT':'THR', 'BNN':'ALA',
...@@ -67,7 +73,22 @@ dnaResidues = ['DA', 'DG', 'DC', 'DT', 'DI'] ...@@ -67,7 +73,22 @@ dnaResidues = ['DA', 'DG', 'DC', 'DT', 'DI']
def _overlayPoints(points1, points2): def _overlayPoints(points1, points2):
"""Given two sets of points, determine the translation and rotation that matches them as closely as possible. """Given two sets of points, determine the translation and rotation that matches them as closely as possible.
This is based on W. Kabsch, Acta Cryst., A34, pp. 828-829 (1978).""" Parameters
----------
points1 (numpy array of simtk.unit.Quantity with units compatible with distance) - reference set of coordinates
points2 (numpy array of simtk.unit.Quantity with units compatible with distance) - set of coordinates to be rotated
Returns
-------
translate2 - vector to translate points2 by in order to center it
rotate - rotation matrix to apply to centered points2 to map it on to points1
center1 - center of points1
Notes
-----
This is based on W. Kabsch, Acta Cryst., A34, pp. 828-829 (1978).
"""
if len(points1) == 0: if len(points1) == 0:
return (Vec3(0, 0, 0), np.identity(3), Vec3(0, 0, 0)) return (Vec3(0, 0, 0), np.identity(3), Vec3(0, 0, 0))
...@@ -95,14 +116,89 @@ def _overlayPoints(points1, points2): ...@@ -95,14 +116,89 @@ def _overlayPoints(points1, points2):
return (-1*center2, np.dot(u, v).transpose(), center1) return (-1*center2, np.dot(u, v).transpose(), center1)
class PDBFixer(object): class PDBFixer(object):
"""PDBFixer implements many tools for fixing problems in PDB files.""" """PDBFixer implements many tools for fixing problems in PDB files.
"""
def __init__(self, filename=None, file=None, url=None, pdbid=None):
"""Create a new PDBFixer instance to fix problems in a PDB file.
Parameters
----------
filename : str, optional, default=None
A filename specifying the file from which the PDB file is to be read.
file : file, optional, default=None
A file-like object from which the PDB file is to be read.
The file is not closed after reading.
url : str, optional, default=None
A URL specifying the internet location from which the PDB file contents should be retrieved.
pdbid : str, optional, default=None
A four-letter PDB code specifying the structure to be retrieved from the RCSB.
Notes
-----
Only one of structure, filename, file, url, or pdbid may be specified or an exception will be thrown.
Examples
--------
Start from a file object.
>>> pdbid = '1VII'
>>> url = 'http://www.rcsb.org/pdb/files/%s.pdb' % pdbid
>>> file = urlopen(url)
>>> fixer = PDBFixer(file=file)
Start from a filename.
>>> filename = 'test.pdb'
>>> file = urlopen(url)
>>> outfile = open(filename, 'w')
>>> outfile.write(file.read())
>>> outfile.close()
>>> fixer = PDBFixer(filename=filename)
Start from a URL.
>>> fixer = PDBFixer(url=url)
def __init__(self, structure): Start from a PDB code.
"""Create a new PDBFixer to fix problems in a PDB file.
>>> fixer = PDBFixer(pdbid=pdbid)
Parameters:
- structure (PdbStructure) the starting PDB structure containing problems to be fixed
""" """
# Check to make sure only one option has been specified.
if bool(filename) + bool(file) + bool(url) + bool(pdbid) != 1:
raise Exception("Exactly one option [filename, file, url, pdbid] must be specified.")
if filename:
# A local file has been specified.
file = open(filename, 'r')
structure = PdbStructure(file)
file.close()
elif file:
# A file-like object has been specified.
structure = PdbStructure(file)
elif url:
# A URL has been specified.
file = urlopen(url)
structure = PdbStructure(file)
file.close()
elif pdbid:
# A PDB id has been specified.
url = 'http://www.rcsb.org/pdb/files/%s.pdb' % pdbid
file = urlopen(url)
# Read contents all at once and split into lines, since urlopen doesn't like it when we read one line at a time over the network.
contents = file.read()
lines = contents.split('\n')
file.close()
structure = PdbStructure(lines)
# Check the structure has some atoms in it.
atoms = list(structure.iter_atoms())
if len(atoms)==0:
raise Exception("Structure contains no atoms.")
self.structure = structure self.structure = structure
self.pdb = app.PDBFile(structure) self.pdb = app.PDBFile(structure)
self.topology = self.pdb.topology self.topology = self.pdb.topology
...@@ -119,8 +215,30 @@ class PDBFixer(object): ...@@ -119,8 +215,30 @@ class PDBFixer(object):
name = next(templatePdb.topology.residues()).name name = next(templatePdb.topology.residues()).name
self.templates[name] = templatePdb self.templates[name] = templatePdb
return
def _addAtomsToTopology(self, heavyAtomsOnly, omitUnknownMolecules): def _addAtomsToTopology(self, heavyAtomsOnly, omitUnknownMolecules):
"""Create a new Topology in which missing atoms have been added.""" """Create a new Topology in which missing atoms have been added.
Parameters
----------
heavyAtomsOnly : bool
If True, only heavy atoms will be added to the topology.
omitUnknownMolecules : bool
If True, unknown molecules will be omitted from the topology.
Returns
-------
newTopology : simtk.openmm.app.Topology
A new Topology object containing atoms from the old.
newPositions : list of simtk.unit.Quantity with units compatible with nanometers
Atom positions for the new Topology object.
newAtoms : simtk.openmm.app.Topology.Atom
New atom objects.
existingAtomMap : dict
Mapping from old atoms to new atoms.
"""
newTopology = app.Topology() newTopology = app.Topology()
newPositions = []*unit.nanometer newPositions = []*unit.nanometer
...@@ -267,8 +385,19 @@ class PDBFixer(object): ...@@ -267,8 +385,19 @@ class PDBFixer(object):
def removeChains(self, chainIndices): def removeChains(self, chainIndices):
"""Remove a set of chains from the structure. """Remove a set of chains from the structure.
Parameters: Parameters
- chainIndices (list) the indices of the chains to remove. ----------
chainIndices : list of int
List of the indices of the chains to remove.
Examples
--------
Load a PDB file with two chains and eliminate the second chain.
>>> fixer = PDBFixer(pdbid='4J7F')
>>> fixer.removeChains([1])
""" """
modeller = app.Modeller(self.topology, self.positions) modeller = app.Modeller(self.topology, self.positions)
allChains = list(self.topology.chains()) allChains = list(self.topology.chains())
...@@ -283,6 +412,14 @@ class PDBFixer(object): ...@@ -283,6 +412,14 @@ class PDBFixer(object):
The results are stored into the missingResidues field, which is a dict. Each key is a tuple consisting of The results are stored into the missingResidues field, which is a dict. Each key is a tuple consisting of
the index of a chain, and the residue index within that chain at which new residues should be inserted. the index of a chain, and the residue index within that chain at which new residues should be inserted.
The corresponding value is a list of the names of residues to insert there. The corresponding value is a list of the names of residues to insert there.
Examples
--------
>>> fixer = PDBFixer(pdbid='1VII')
>>> fixer.findMissingResidues()
>>> missing_residues = fixer.missingResidues
""" """
chains = [c for c in self.structureChains if any(atom.record_name == 'ATOM' for atom in c.iter_atoms())] chains = [c for c in self.structureChains if any(atom.record_name == 'ATOM' for atom in c.iter_atoms())]
chainWithGaps = {} chainWithGaps = {}
...@@ -341,6 +478,16 @@ class PDBFixer(object): ...@@ -341,6 +478,16 @@ class PDBFixer(object):
The results are stored into the nonstandardResidues field, which is a map of Residue objects to the names The results are stored into the nonstandardResidues field, which is a map of Residue objects to the names
of suggested replacement residues. of suggested replacement residues.
Examples
--------
Find nonstandard residues.
>>> fixer = PDBFixer(pdbid='1YRI')
>>> fixer.findNonstandardResidues()
>>> nonstandard_residues = fixer.nonstandardResidues
""" """
# First find residues based on our table of standard substitutions. # First find residues based on our table of standard substitutions.
...@@ -362,7 +509,22 @@ class PDBFixer(object): ...@@ -362,7 +509,22 @@ class PDBFixer(object):
self.nonstandardResidues = [(r, nonstandard[r]) for r in sorted(nonstandard, key=lambda r: r.index)] self.nonstandardResidues = [(r, nonstandard[r]) for r in sorted(nonstandard, key=lambda r: r.index)]
def replaceNonstandardResidues(self): def replaceNonstandardResidues(self):
"""Replace every residue listed in the nonstandardResidues field with the specified standard residue.""" """Replace every residue listed in the nonstandardResidues field with the specified standard residue.
Notes
-----
You must have first called findNonstandardResidues() to identify nonstandard residues.
Examples
--------
Find and replace nonstandard residues using replacement templates stored in the 'templates' field of PDBFixer object.
>>> fixer = PDBFixer(pdbid='1YRI')
>>> fixer.findNonstandardResidues()
>>> fixer.replaceNonstandardResidues()
"""
if len(self.nonstandardResidues) > 0: if len(self.nonstandardResidues) > 0:
deleteAtoms = [] deleteAtoms = []
...@@ -390,6 +552,24 @@ class PDBFixer(object): ...@@ -390,6 +552,24 @@ class PDBFixer(object):
are Residue objects and whose values are lists of atom names. missingAtoms contains standard atoms that should are Residue objects and whose values are lists of atom names. missingAtoms contains standard atoms that should
be present in any residue of that type. missingTerminals contains terminal atoms that should be present at the be present in any residue of that type. missingTerminals contains terminal atoms that should be present at the
start or end of a chain. start or end of a chain.
Notes
-----
You must have first called findMissingResidues().
Examples
--------
Find missing heavy atoms in Abl kinase structure.
>>> fixer = PDBFixer(pdbid='2F4J')
>>> fixer.findMissingResidues()
>>> fixer.findMissingAtoms()
>>> # Retrieve missing atoms.
>>> missingAtoms = fixer.missingAtoms
>>> # Retrieve missing terminal atoms.
>>> missingTerminals = fixer.missingTerminals
""" """
missingAtoms = {} missingAtoms = {}
missingTerminals = {} missingTerminals = {}
...@@ -428,7 +608,23 @@ class PDBFixer(object): ...@@ -428,7 +608,23 @@ class PDBFixer(object):
self.missingTerminals = missingTerminals self.missingTerminals = missingTerminals
def addMissingAtoms(self): def addMissingAtoms(self):
"""Add all missing heavy atoms, as specified by the missingAtoms, missingTerminals, and missingResidues fields.""" """Add all missing heavy atoms, as specified by the missingAtoms, missingTerminals, and missingResidues fields.
Notes
-----
You must already have called findMissingAtoms() to have identified atoms to be added.
Examples
--------
Find missing heavy atoms in Abl kinase structure.
>>> fixer = PDBFixer(pdbid='2F4J')
>>> fixer.findMissingResidues()
>>> fixer.findMissingAtoms()
>>> fixer.addMissingAtoms()
"""
# Create a Topology that 1) adds missing atoms, 2) removes all hydrogens, and 3) removes unknown molecules. # Create a Topology that 1) adds missing atoms, 2) removes all hydrogens, and 3) removes unknown molecules.
...@@ -495,12 +691,24 @@ class PDBFixer(object): ...@@ -495,12 +691,24 @@ class PDBFixer(object):
self.topology = newTopology2 self.topology = newTopology2
self.positions = newPositions2 self.positions = newPositions2
def removeHeterogens(self, keepWater): def removeHeterogens(self, keepWater=True):
"""Remove all heterogens from the structure. """Remove all heterogens from the structure.
Parameters: Parameters
- keepWater (bool) if True, water molecules will not be removed ----------
keepWater : bool, optional, default=True
If True, water molecules will not be removed.
Examples
--------
Remove heterogens in Abl structure complexed with imatinib.
>>> fixer = PDBFixer(pdbid='2F4J')
>>> fixer.removeHeterogens(keepWater=False)
""" """
keep = set(proteinResidues).union(dnaResidues).union(rnaResidues) keep = set(proteinResidues).union(dnaResidues).union(rnaResidues)
keep.add('N') keep.add('N')
keep.add('UNK') keep.add('UNK')
...@@ -515,30 +723,68 @@ class PDBFixer(object): ...@@ -515,30 +723,68 @@ class PDBFixer(object):
self.topology = modeller.topology self.topology = modeller.topology
self.positions = modeller.positions self.positions = modeller.positions
def addMissingHydrogens(self, pH): def addMissingHydrogens(self, pH=7.0):
"""Add missing hydrogen atoms to the structure. """Add missing hydrogen atoms to the structure.
Parameters: Parameters
- pH (float) the pH based on which to select hydrogens ----------
pH : float, optional, default=7.0
The pH based on which to select hydrogens.
Notes
-----
No extensive electrostatic analysis is performed; only default residue pKas are used.
Examples
--------
Examples
--------
Add missing hydrogens appropriate for pH 8.
>>> fixer = PDBFixer(pdbid='1VII')
>>> fixer.addMissingHydrogens(pH=8.0)
""" """
modeller = app.Modeller(self.topology, self.positions) modeller = app.Modeller(self.topology, self.positions)
modeller.addHydrogens(pH=pH) modeller.addHydrogens(pH=pH)
self.topology = modeller.topology self.topology = modeller.topology
self.positions = modeller.positions self.positions = modeller.positions
def addSolvent(self, boxSize, positiveIon='Na+', negativeIon='Cl-', ionicStrength=0*unit.molar): def addSolvent(self, boxSize=None, padding=None, positiveIon='Na+', negativeIon='Cl-', ionicStrength=0*unit.molar):
"""Add a solvent box surrounding the structure. """Add a solvent box surrounding the structure.
Parameters: Parameters
- boxSize (Vec3) the size of the box to fill with water ----------
- positiveIon (string='Na+') the type of positive ion to add. Allowed values are 'Cs+', 'K+', 'Li+', 'Na+', and 'Rb+' boxSize : simtk.openmm.Vec3, optional, default=None
- negativeIon (string='Cl-') the type of negative ion to add. Allowed values are 'Cl-', 'Br-', 'F-', and 'I-' The size of the box to fill with water. If specified, padding must not be specified.
- ionicString (concentration=0*molar) the total concentration of ions (both positive and negative) to add. This padding : simtk.unit.Quantity compatible with nanometers, optional, default=None
does not include ions that are added to neutralize the system. Padding around macromolecule for filling box with water. If specified, boxSize must not be specified.
positiveIon : str, optional, default='Na+'
The type of positive ion to add. Allowed values are 'Cs+', 'K+', 'Li+', 'Na+', and 'Rb+'.
negativeIon : str, optional, default='Cl-'
The type of negative ion to add. Allowed values are 'Cl-', 'Br-', 'F-', and 'I-'.
ionicStrength : simtk.unit.Quantity with units compatible with molar, optional, default=0*molar
The total concentration of ions (both positive and negative) to add. This does not include ions that are added to neutralize the system.
Examples
--------
Add missing residues, heavy atoms, and hydrogens, and then solvate with 10 A padding.
>>> fixer = PDBFixer(pdbid='1VII')
>>> fixer.findMissingResidues()
>>> fixer.findMissingAtoms()
>>> fixer.addMissingAtoms()
>>> fixer.addMissingHydrogens(pH=8.0)
>>> fixer.addSolvent(padding=10*unit.angstrom, ionicStrength=0.050*unit.molar)
""" """
modeller = app.Modeller(self.topology, self.positions) modeller = app.Modeller(self.topology, self.positions)
forcefield = self._createForceField(self.topology, True) forcefield = self._createForceField(self.topology, True)
modeller.addSolvent(forcefield, boxSize=boxSize, positiveIon=positiveIon, negativeIon=negativeIon, ionicStrength=ionicStrength) modeller.addSolvent(forcefield, padding=padding, boxSize=boxSize, positiveIon=positiveIon, negativeIon=negativeIon, ionicStrength=ionicStrength)
self.topology = modeller.topology self.topology = modeller.topology
self.positions = modeller.positions self.positions = modeller.positions
...@@ -652,7 +898,7 @@ def main(): ...@@ -652,7 +898,7 @@ def main():
parser.error('No filename specified') parser.error('No filename specified')
if len(args) > 1: if len(args) > 1:
parser.error('Must specify a single filename') parser.error('Must specify a single filename')
fixer = PDBFixer(PdbStructure(open(args[0]))) fixer = PDBFixer(filename=argv[0])
if options.residues: if options.residues:
fixer.findMissingResidues() fixer.findMissingResidues()
else: else:
......
...@@ -111,6 +111,7 @@ setup( ...@@ -111,6 +111,7 @@ setup(
packages=find_packages(), packages=find_packages(),
package_data={'pdbfixer': find_package_data()}, package_data={'pdbfixer': find_package_data()},
zip_safe=False, zip_safe=False,
install_requires=[],
entry_points={'console_scripts': ['pdbfixer = pdbfixer.pdbfixer:main']}) entry_points={'console_scripts': ['pdbfixer = pdbfixer.pdbfixer:main']})
check_dependencies() check_dependencies()
import pdbfixer import pdbfixer
import simtk.openmm import simtk.openmm
import Bio.PDB
import os import os
import os.path import os.path
import sys import sys
import numpy import numpy
import tempfile
from threading import Timer from threading import Timer
...@@ -37,7 +37,8 @@ def simulate(pdbcode, pdb_filename): ...@@ -37,7 +37,8 @@ def simulate(pdbcode, pdb_filename):
pdb = app.PDBFile(pdb_filename) pdb = app.PDBFile(pdb_filename)
# Set up implicit solvent forcefield. # Set up implicit solvent forcefield.
forcefield = app.ForceField('amber99sbildn.xml') #forcefield = app.ForceField('amber99sbildn.xml')
forcefield = app.ForceField('amber10.xml')
# Create the system. # Create the system.
system = forcefield.createSystem(pdb.topology, nonbondedMethod=app.NoCutoff, constraints=app.HBonds) system = forcefield.createSystem(pdb.topology, nonbondedMethod=app.NoCutoff, constraints=app.HBonds)
...@@ -83,32 +84,24 @@ def simulate(pdbcode, pdb_filename): ...@@ -83,32 +84,24 @@ def simulate(pdbcode, pdb_filename):
return return
def test_build_and_simulate(): def test_build_and_simulate():
# These are tough PDB codes from http://www.umass.edu/microbio/chime/pe_beta/pe/protexpl/badpdbs.htm # DEBUG: These are tough PDB codes from http://www.umass.edu/microbio/chime/pe_beta/pe/protexpl/badpdbs.htm
pdbcodes_to_build = ['1AS5', '1CBN', '1DPO', '1IGY', '1HAG', '1IAO', '4CPA', '1QCQ'] pdbcodes_to_build = ['1AS5', '1CBN', '1DPO', '1IGY', '1HAG', '1IAO', '4CPA', '1QCQ']
pdbcodes_to_simulate = ['1AS5', '1CBN', '1DPO', '1IGY', '1HAG', '1IAO', '4CPA', '1QCQ']
# Set up PDB retrieval. # DEBUG: Small test cases.
from Bio.PDB import PDBList pdbcodes_to_build = ['110D', '116D', '117D', '118D', '134D', '135D', '136D', '138D', '143D', '148D', '151D', '152D', '159D', '177D', '17RA', '183D', '184D', '186D', '187D', '188D', '189D', '1A11', '1A13', '1A1P', '1A3P', '1A51', '1A60', '1A83', '1A9L', '1AAF', '1AB1', '1ABZ', '1AC7', '1ACW', '1AD7', '1ADX', '1AFP', '1AFT', '1AFX', '1AG7', '1AGG', '1AGL', '1AGT', '1AHL', '1AIE', '1AJ1', '1AJF', '1AJJ', '1AJU', '1AKG', '1AKX', '1AL1', '1ALE', '1ALF', '1ALG', '1AM0', '1AMB', '1AMC', '1AML', '1ANP', '1ANR', '1ANS', '1AO9', '1AOO', '1APF', '1APO', '1APQ', '1AQG', '1AQO', '1AQQ', '1AQR', '1AQS', '1ARD', '1ARE', '1ARF', '1ARJ', '1ARK', '1AS5', '1AT4', '1ATO', '1ATV', '1ATW', '1ATX', '1AV3', '1AW4', '1AW6', '1AWY', '1AXH', '1AY3', '1AYJ', '1AZ6', '1AZH', '1AZJ', '1AZK', '1B03', '1B0Q', '1B13', '1B1V', '1B2J', '1B36', '1B45', '1B4G', '1B4I', '1B4Y', '1B5N', '1B8W', '1B9G', '1B9P', '1B9Q', '1B9U', '1BA4', '1BA5', '1BA6', '1BAH', '1BAL', '1BBA', '1BBG', '1BBL', '1BBO', '1BCV', '1BD1', '1BDC', '1BDD', '1BDE', '1BDK', '1BDS', '1BE7', '1BEI', '1BF0', '1BF9', '1BFW', '1BFY', '1BFZ', '1BGK', '1BGZ', '1BH0', '1BH1', '1BH4', '1BH7', '1BHI', '1BHP', '1BIG', '1BJB', '1BJC', '1BJH', '1BK2', '1BK8', '1BKT', '1BKU', '1BL1', '1BM4', '1BMX', '1BN0', '1BNB', '1BNX', '1BOE', '1BOR', '1BPI', '1BPT', '1BQ8', '1BQ9', '1BQF', '1BRF', '1BRV', '1BRZ', '1BTI', '1BTQ', '1BTR', '1BTS', '1BTT', '1BUB', '1BUS', '1BVJ', '1BW6', '1BWX', '1BX7', '1BX8', '1BY0', '1BY6', '1BYJ', '1BYV', '1BYY', '1BZ2', '1BZ3', '1BZB', '1BZG', '1BZK', '1BZT', '1BZU', '1C0O', '1C26', '1C2U', '1C32', '1C34', '1C35', '1C38', '1C49', '1C4B', '1C4E', '1C4S', '1C55', '1C56', '1C6W', '1C98', '1C9A', '1C9Z', '1CAA', '1CAD', '1CAP', '1CB3', '1CB9', '1CBH', '1CBN', '1CCF', '1CCM', '1CCN', '1CCQ', '1CCV', '1CE3', '1CE4', '1CEK', '1CEU', '1CFG', '1CFH', '1CFI', '1CHL', '1CHV', '1CIX', '1CKW', '1CKX', '1CKY', '1CKZ', '1CL4', '1CLF', '1CMR', '1CNL', '1CNN', '1CNR', '1CO4', '1COI', '1CQ0', '1CQ5', '1CQL', '1CQU', '1CR8', '1CRE', '1CRF', '1CRN', '1CS9', '1CSA', '1CT6', '1CTI', '1CV9', '1CVQ', '1CW5', '1CW6', '1CW8', '1CWX', '1CWZ', '1CXN', '1CXO', '1CXR', '1CXW', '1CYA', '1CYB', '1CZ6', '1D0R', '1D0T', '1D0U', '1D0W', '1D10', '1D11', '1D12', '1D13', '1D14', '1D15', '1D16', '1D17', '1D1E', '1D1F', '1D1H', '1D26', '1D2D', '1D2J', '1D2L', '1D33', '1D35', '1D36', '1D37', '1D38', '1D54', '1D58', '1D5Q', '1D61', '1D62', '1D67', '1D6B', '1D6X', '1D78', '1D79', '1D7N', '1D7T', '1D7Z', '1D82', '1D8G', '1D93', '1D9J', '1D9L', '1D9M', '1D9O', '1D9P', '1DA0', '1DA9', '1DB6', '1DEC', '1DEM', '1DEN', '1DEP', '1DF6', '1DFE', '1DFS', '1DFT', '1DFW', '1DFY', '1DFZ']
pdblist = PDBList(server=PDBList.alternative_download_url)
success = True # DEBUG: A few small test cases.
pdbcodes_to_build = ['110D', '116D', '117D', '118D', '134D', '135D', '136D', '138D', '143D', '148D', '151D', '152D', '159D', '177D', '17RA', '183D', '184D', '186D', '187D', '188D', '189D', '1A11', '1A13', '1A1P', '1A3P', '1A51', '1A60', '1A83', '1A9L', '1AAF', '1AB1', '1ABZ', '1AC7', '1ACW', '1AD7', '1ADX', '1AFP', '1AFT', '1AFX', '1AG7', '1AGG', '1AGL', '1AGT', '1AHL', '1AIE', '1AJ1', '1AJF', '1AJJ', '1AJU', '1AKG', '1AKX', '1AL1', '1ALE', '1ALF', '1ALG', '1AM0', '1AMB', '1AMC', '1AML', '1ANP', '1ANR', '1ANS', '1AO9', '1AOO']
for pdbcode in pdbcodes_to_build: # Don't simulate any.
print pdbcode pdbcodes_to_simulate = []
try:
# Remove file if it exists already.
input_pdb_filename = 'pdb' + pdbcode + '.ent'
if os.path.exists(input_pdb_filename):
os.remove(input_pdb_filename)
print "Attempting to retrieve PDB code '%s' from %s..." % (pdbcode, PDBList.alternative_download_url) # Keep track of list of failures.
input_pdb_filename = pdblist.retrieve_pdb_file(pdbcode, pdir='.') failures = list()
except Exception as e: for pdbcode in pdbcodes_to_build:
print str(e) print "------------------------------------------------"
print "Could not download PDB code '%s'" % pdbcode print pdbcode
continue
output_pdb_filename = 'output.pdb' output_pdb_filename = 'output.pdb'
...@@ -121,18 +114,17 @@ def test_build_and_simulate(): ...@@ -121,18 +114,17 @@ def test_build_and_simulate():
positiveIon = 'Na+' positiveIon = 'Na+'
negativeIon = 'Cl-' negativeIon = 'Cl-'
infile = open(input_pdb_filename) outfile = tempfile.NamedTemporaryFile(mode='w', delete=False)
outfile = open(output_pdb_filename, 'w') output_pdb_filename = outfile.name
success = True
timeout_seconds = 0.1 timeout_seconds = 30
watchdog = Watchdog(timeout_seconds) watchdog = Watchdog(timeout_seconds)
build_successful = False
try: try:
from pdbfixer.pdbfixer import PDBFixer, PdbStructure from pdbfixer.pdbfixer import PDBFixer
from simtk.openmm import app from simtk.openmm import app
stage = "Creating PDBFixer..." stage = "Creating PDBFixer..."
fixer = PDBFixer(PdbStructure(infile)) fixer = PDBFixer(pdbid=pdbcode)
stage = "Finding missing residues..." stage = "Finding missing residues..."
fixer.findMissingResidues() fixer.findMissingResidues()
stage = "Finding nonstandard residues..." stage = "Finding nonstandard residues..."
...@@ -150,43 +142,63 @@ def test_build_and_simulate(): ...@@ -150,43 +142,63 @@ def test_build_and_simulate():
stage = "Writing PDB file..." stage = "Writing PDB file..."
app.PDBFile.writeFile(fixer.topology, fixer.positions, outfile) app.PDBFile.writeFile(fixer.topology, fixer.positions, outfile)
stage = "Done." stage = "Done."
infile.close()
outfile.close() outfile.close()
build_successful = True
except Watchdog: except Watchdog:
print "timed out fixing PDB %s" % pdbcode message = "timed out in stage %s" % stage
success = False print message
failures.append((pdbcode, Exception(message)))
except Exception as e: except Exception as e:
print "EXCEPTION DURING BUILD"
#import traceback
#print traceback.print_exc()
print str(e) print str(e)
success = False failures.append((pdbcode, e))
watchdog.stop() watchdog.stop()
del watchdog del watchdog
# Test simulating this with OpenMM. # Test simulating this with OpenMM.
if pdbcode in pdbcodes_to_simulate: if (pdbcode in pdbcodes_to_simulate) and (build_successful):
watchdog = Watchdog(timeout_seconds) watchdog = Watchdog(timeout_seconds)
try: try:
simulate(pdbcode, output_pdb_filename) simulate(pdbcode, output_pdb_filename)
except Watchdog: except Watchdog:
print "PDB code %s timed out in stage '%s'." % (pdbcode, stage) message = "timed out in simulation"
success = False print message
failures.append((pdbcode, Exception(message)))
except Exception as e: except Exception as e:
print "EXCEPTION DURING SIMULATE"
#import traceback
#print traceback.print_exc()
print str(e) print str(e)
success = False failures.append((pdbcode, e))
watchdog.stop() watchdog.stop()
del watchdog del watchdog
# Clean up. # Clean up.
os.remove(input_pdb_filename)
os.remove(output_pdb_filename) os.remove(output_pdb_filename)
if not success: print "------------------------------------------------"
raise Exception("build test failed on one or more PDB files.")
if len(failures) != 0:
print ""
print "SUMMARY OF FAILURES:"
print ""
for failure in failures:
(pdbcode, exception) = failure
print "%6s : %s" % (pdbcode, str(exception))
print ""
raise Exception("Build test failed on one or more PDB files.")
else:
print "All tests succeeded."
if __name__ == '__main__': if __name__ == '__main__':
test_build_and_simulate() test_build_and_simulate()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment