Commit 5fd17b42 by John Chodera (MSKCC)

Updated interface based on @peastman suggestions. Started to convert docstrings…

Updated interface based on @peastman suggestions. Started to convert docstrings to numpy format. Updated test with some small PDBs.
parent d97651ac
...@@ -44,29 +44,11 @@ import os ...@@ -44,29 +44,11 @@ import os
import os.path import os.path
import math import math
# Imports for urlopen. # Imports for urlopen
import six # compatibility with Python 2 and 3 if sys.version_info >= (3,0):
if six.PY3:
from urllib.request import urlopen from urllib.request import urlopen
from urllib.parse import urlparse
from urllib.parse import (uses_relative, uses_netloc, uses_params)
else: else:
from urllib2 import urlopen from urllib2 import urlopen
from urlparse import urlparse
from urlparse import uses_relative, uses_netloc, uses_params
_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
_VALID_URLS.discard('')
def _is_url(url):
"""Check to see if a URL has a valid protocol.
from pandas/io.common.py Copyright 2014 Pandas Developers
Used under the BSD licence
"""
try:
return urlparse(url).scheme in _VALID_URLS
except:
return False
substitutions = { substitutions = {
'2AS':'ASP', '3AH':'HIS', '5HP':'GLU', 'ACL':'ARG', 'AGM':'ARG', 'AIB':'ALA', 'ALM':'ALA', 'ALO':'THR', 'ALY':'LYS', 'ARM':'ARG', '2AS':'ASP', '3AH':'HIS', '5HP':'GLU', 'ACL':'ARG', 'AGM':'ARG', 'AIB':'ALA', 'ALM':'ALA', 'ALO':'THR', 'ALY':'LYS', 'ARM':'ARG',
...@@ -90,8 +72,23 @@ dnaResidues = ['DA', 'DG', 'DC', 'DT', 'DI'] ...@@ -90,8 +72,23 @@ dnaResidues = ['DA', 'DG', 'DC', 'DT', 'DI']
def _overlayPoints(points1, points2): def _overlayPoints(points1, points2):
"""Given two sets of points, determine the translation and rotation that matches them as closely as possible. """Given two sets of points, determine the translation and rotation that matches them as closely as possible.
This is based on W. Kabsch, Acta Cryst., A34, pp. 828-829 (1978).""" Parameters
----------
points1 (numpy array of simtk.unit.Quantity with units compatible with distance) - reference set of coordinates
points2 (numpy array of simtk.unit.Quantity with units compatible with distance) - set of coordinates to be rotated
Returns
-------
translate2 - vector to translate points2 by in order to center it
rotate - rotation matrix to apply to centered points2 to map it on to points1
center1 - center of points1
Notes
-----
This is based on W. Kabsch, Acta Cryst., A34, pp. 828-829 (1978).
"""
if len(points1) == 0: if len(points1) == 0:
return (Vec3(0, 0, 0), np.identity(3), Vec3(0, 0, 0)) return (Vec3(0, 0, 0), np.identity(3), Vec3(0, 0, 0))
...@@ -119,29 +116,45 @@ def _overlayPoints(points1, points2): ...@@ -119,29 +116,45 @@ def _overlayPoints(points1, points2):
return (-1*center2, np.dot(u, v).transpose(), center1) return (-1*center2, np.dot(u, v).transpose(), center1)
class PDBFixer(object): class PDBFixer(object):
"""PDBFixer implements many tools for fixing problems in PDB files.""" """PDBFixer implements many tools for fixing problems in PDB files.
"""
def __init__(self, structure): def __init__(self, structure=None, filename=None, file=None, url=None, pdbid=None):
"""Create a new PDBFixer to fix problems in a PDB file. """Create a new PDBFixer instance to fix problems in a PDB file.
Parameters: Parameters
- structure (PdbStructure) the starting PDB structure containing problems to be fixed ----------
or a URL, filename, gzipped filename, or PDB code structure : PdbStructure, optional, default=None
A PdbStructure object containing the PDB file to be repaired.
Examples: filename : str, optional, default=None
A filename specifying the file from which the PDB file is to be read.
file : file, optional, default=None
A file-like object from which the PDB file is to be read.
The file is not closed after reading.
url : str, optional, default=None
A URL specifying the internet location from which the PDB file contents should be retrieved.
pdbid : str, optional, default=None
A four-letter PDB code specifying the structure to be retrieved from the RCSB.
Notes
-----
Only one of structure, filename, file, url, or pdbid may be specified or an exception will be thrown.
Examples
--------
Start from a PdbStructure Start from a PdbStructure
>>> pdbcode = '1VII' >>> pdbid = '1VII'
>>> url = 'http://www.rcsb.org/pdb/files/%s.pdb' % pdbcode >>> url = 'http://www.rcsb.org/pdb/files/%s.pdb' % pdbid
>>> file = urlopen(url) >>> file = urlopen(url)
>>> structure = PdbStructure(file) >>> structure = PdbStructure(file)
>>> fixer = PDBFixer(structure) >>> fixer = PDBFixer(structure=structure)
Start from a file object. Start from a file object.
>>> file = urlopen(url) >>> file = urlopen(url)
>>> fixer = PDBFixer(file) >>> fixer = PDBFixer(file=file)
Start from a filename. Start from a filename.
...@@ -150,43 +163,53 @@ class PDBFixer(object): ...@@ -150,43 +163,53 @@ class PDBFixer(object):
>>> outfile = open(filename, 'w') >>> outfile = open(filename, 'w')
>>> outfile.write(file.read()) >>> outfile.write(file.read())
>>> outfile.close() >>> outfile.close()
>>> fixer = PDBFixer(filename) >>> fixer = PDBFixer(filename=filename)
Start from a URL. Start from a URL.
>>> fixer = PDBFixer(url) >>> fixer = PDBFixer(url=url)
Start from a PDB code. Start from a PDB code.
>>> fixer = PDBFixer(pdbcode) >>> fixer = PDBFixer(pdbid=pdbid)
""" """
if isinstance(structure, PdbStructure): # Check to make sure only one option has been specified.
# We already have a PDB structure; do nothing. if bool(structure) + bool(filename) + bool(file) + bool(url) + bool(pdbid) != 1:
pass raise Exception("Exactly one option [structure, filename, file, url, pdbid] must be specified.")
if isinstance(structure, str):
if os.path.exists(structure):
# First, try opening it as a local file.
file = open(structure, 'r')
structure = PdbStructure(file)
file.close()
elif _is_url(structure):
# If it's a URL, try that.
file = urlopen(structure)
structure = PdbStructure(file)
file.close()
elif len(structure) == 4:
# If it's a PDB code, try that.
url = 'http://www.rcsb.org/pdb/files/%s.pdb' % structure
file = urlopen(url)
structure = PdbStructure(file)
file.close()
elif hasattr(structure, 'read'):
# If a file-like object, read it.
structure = PdbStructure(structure)
if structure:
# A PDB structure has been specified; do nothing.
pass
elif filename:
# A local file has been specified.
file = open(filename, 'r')
structure = PdbStructure(file)
file.close()
elif file:
# A file-like object has been specified.
structure = PdbStructure(file)
elif url:
# A URL has been specified.
file = urlopen(url)
structure = PdbStructure(file)
file.close()
elif pdbid:
# A PDB id has been specified.
url = 'http://www.rcsb.org/pdb/files/%s.pdb' % pdbid
file = urlopen(url)
# Read contents all at once and split into lines, since urlopen doesn't like it when we read one line at a time over the network.
contents = file.read()
lines = contents.split('\n')
file.close()
structure = PdbStructure(lines)
# Check the structure has some atoms in it.
atoms = list(structure.iter_atoms())
if len(atoms)==0:
raise Exception("Structure contains no atoms.")
self.structure = structure self.structure = structure
self.pdb = app.PDBFile(structure) self.pdb = app.PDBFile(structure)
self.topology = self.pdb.topology self.topology = self.pdb.topology
...@@ -202,9 +225,31 @@ class PDBFixer(object): ...@@ -202,9 +225,31 @@ class PDBFixer(object):
templatePdb = app.PDBFile(os.path.join(templatesPath, file)) templatePdb = app.PDBFile(os.path.join(templatesPath, file))
name = next(templatePdb.topology.residues()).name name = next(templatePdb.topology.residues()).name
self.templates[name] = templatePdb self.templates[name] = templatePdb
return
def _addAtomsToTopology(self, heavyAtomsOnly, omitUnknownMolecules): def _addAtomsToTopology(self, heavyAtomsOnly, omitUnknownMolecules):
"""Create a new Topology in which missing atoms have been added.""" """Create a new Topology in which missing atoms have been added.
Parameters
----------
heavyAtomsOnly : bool
If True, only heavy atoms will be added to the topology.
omitUnknownMolecules : bool
If True, unknown molecules will be omitted from the topology.
Returns
-------
newTopology : simtk.openmm.app.Topology
A new Topology object containing atoms from the old.
newPositions : list of simtk.unit.Quantity with units compatible with nanometers
Atom positions for the new Topology object.
newAtoms : simtk.openmm.app.Topology.Atom
New atom objects.
existingAtomMap : dict
Mapping from old atoms to new atoms.
"""
newTopology = app.Topology() newTopology = app.Topology()
newPositions = []*unit.nanometer newPositions = []*unit.nanometer
......
...@@ -111,9 +111,7 @@ setup( ...@@ -111,9 +111,7 @@ setup(
packages=find_packages(), packages=find_packages(),
package_data={'pdbfixer': find_package_data()}, package_data={'pdbfixer': find_package_data()},
zip_safe=False, zip_safe=False,
install_requires=[ install_requires=[],
'six',
],
entry_points={'console_scripts': ['pdbfixer = pdbfixer.pdbfixer:main']}) entry_points={'console_scripts': ['pdbfixer = pdbfixer.pdbfixer:main']})
check_dependencies() check_dependencies()
...@@ -6,6 +6,7 @@ import os ...@@ -6,6 +6,7 @@ import os
import os.path import os.path
import sys import sys
import numpy import numpy
import tempfile
from threading import Timer from threading import Timer
...@@ -36,7 +37,8 @@ def simulate(pdbcode, pdb_filename): ...@@ -36,7 +37,8 @@ def simulate(pdbcode, pdb_filename):
pdb = app.PDBFile(pdb_filename) pdb = app.PDBFile(pdb_filename)
# Set up implicit solvent forcefield. # Set up implicit solvent forcefield.
forcefield = app.ForceField('amber99sbildn.xml') #forcefield = app.ForceField('amber99sbildn.xml')
forcefield = app.ForceField('amber10.xml')
# Create the system. # Create the system.
system = forcefield.createSystem(pdb.topology, nonbondedMethod=app.NoCutoff, constraints=app.HBonds) system = forcefield.createSystem(pdb.topology, nonbondedMethod=app.NoCutoff, constraints=app.HBonds)
...@@ -82,17 +84,19 @@ def simulate(pdbcode, pdb_filename): ...@@ -82,17 +84,19 @@ def simulate(pdbcode, pdb_filename):
return return
def test_build_and_simulate(): def test_build_and_simulate():
# These are tough PDB codes from http://www.umass.edu/microbio/chime/pe_beta/pe/protexpl/badpdbs.htm # DEBUG: These are tough PDB codes from http://www.umass.edu/microbio/chime/pe_beta/pe/protexpl/badpdbs.htm
pdbcodes_to_build = ['1AS5', '1CBN', '1DPO', '1IGY', '1HAG', '1IAO', '4CPA', '1QCQ'] pdbcodes_to_build = ['1AS5', '1CBN', '1DPO', '1IGY', '1HAG', '1IAO', '4CPA', '1QCQ']
pdbcodes_to_simulate = ['1AS5', '1CBN', '1DPO', '1IGY', '1HAG', '1IAO', '4CPA', '1QCQ']
# DEBUG: Simpler test cases. # DEBUG: Small test cases.
pdbcodes_to_build = ['1AS5', '1VII'] pdbcodes_to_build = ['110D', '116D', '117D', '118D', '134D', '135D', '136D', '138D', '143D', '148D', '151D', '152D', '159D', '177D', '17RA', '183D', '184D', '186D', '187D', '188D', '189D', '1A11', '1A13', '1A1P', '1A3P', '1A51', '1A60', '1A83', '1A9L', '1AAF', '1AB1', '1ABZ', '1AC7', '1ACW', '1AD7', '1ADX', '1AFP', '1AFT', '1AFX', '1AG7', '1AGG', '1AGL', '1AGT', '1AHL', '1AIE', '1AJ1', '1AJF', '1AJJ', '1AJU', '1AKG', '1AKX', '1AL1', '1ALE', '1ALF', '1ALG', '1AM0', '1AMB', '1AMC', '1AML', '1ANP', '1ANR', '1ANS', '1AO9', '1AOO', '1APF', '1APO', '1APQ', '1AQG', '1AQO', '1AQQ', '1AQR', '1AQS', '1ARD', '1ARE', '1ARF', '1ARJ', '1ARK', '1AS5', '1AT4', '1ATO', '1ATV', '1ATW', '1ATX', '1AV3', '1AW4', '1AW6', '1AWY', '1AXH', '1AY3', '1AYJ', '1AZ6', '1AZH', '1AZJ', '1AZK', '1B03', '1B0Q', '1B13', '1B1V', '1B2J', '1B36', '1B45', '1B4G', '1B4I', '1B4Y', '1B5N', '1B8W', '1B9G', '1B9P', '1B9Q', '1B9U', '1BA4', '1BA5', '1BA6', '1BAH', '1BAL', '1BBA', '1BBG', '1BBL', '1BBO', '1BCV', '1BD1', '1BDC', '1BDD', '1BDE', '1BDK', '1BDS', '1BE7', '1BEI', '1BF0', '1BF9', '1BFW', '1BFY', '1BFZ', '1BGK', '1BGZ', '1BH0', '1BH1', '1BH4', '1BH7', '1BHI', '1BHP', '1BIG', '1BJB', '1BJC', '1BJH', '1BK2', '1BK8', '1BKT', '1BKU', '1BL1', '1BM4', '1BMX', '1BN0', '1BNB', '1BNX', '1BOE', '1BOR', '1BPI', '1BPT', '1BQ8', '1BQ9', '1BQF', '1BRF', '1BRV', '1BRZ', '1BTI', '1BTQ', '1BTR', '1BTS', '1BTT', '1BUB', '1BUS', '1BVJ', '1BW6', '1BWX', '1BX7', '1BX8', '1BY0', '1BY6', '1BYJ', '1BYV', '1BYY', '1BZ2', '1BZ3', '1BZB', '1BZG', '1BZK', '1BZT', '1BZU', '1C0O', '1C26', '1C2U', '1C32', '1C34', '1C35', '1C38', '1C49', '1C4B', '1C4E', '1C4S', '1C55', '1C56', '1C6W', '1C98', '1C9A', '1C9Z', '1CAA', '1CAD', '1CAP', '1CB3', '1CB9', '1CBH', '1CBN', '1CCF', '1CCM', '1CCN', '1CCQ', '1CCV', '1CE3', '1CE4', '1CEK', '1CEU', '1CFG', '1CFH', '1CFI', '1CHL', '1CHV', '1CIX', '1CKW', '1CKX', '1CKY', '1CKZ', '1CL4', '1CLF', '1CMR', '1CNL', '1CNN', '1CNR', '1CO4', '1COI', '1CQ0', '1CQ5', '1CQL', '1CQU', '1CR8', '1CRE', '1CRF', '1CRN', '1CS9', '1CSA', '1CT6', '1CTI', '1CV9', '1CVQ', '1CW5', '1CW6', '1CW8', '1CWX', '1CWZ', '1CXN', '1CXO', '1CXR', '1CXW', '1CYA', '1CYB', '1CZ6', '1D0R', '1D0T', '1D0U', '1D0W', '1D10', '1D11', '1D12', '1D13', '1D14', '1D15', '1D16', '1D17', '1D1E', '1D1F', '1D1H', '1D26', '1D2D', '1D2J', '1D2L', '1D33', '1D35', '1D36', '1D37', '1D38', '1D54', '1D58', '1D5Q', '1D61', '1D62', '1D67', '1D6B', '1D6X', '1D78', '1D79', '1D7N', '1D7T', '1D7Z', '1D82', '1D8G', '1D93', '1D9J', '1D9L', '1D9M', '1D9O', '1D9P', '1DA0', '1DA9', '1DB6', '1DEC', '1DEM', '1DEN', '1DEP', '1DF6', '1DFE', '1DFS', '1DFT', '1DFW', '1DFY', '1DFZ']
pdbcodes_to_simulate = ['1AS5', '1VII']
success = True # DEBUG: A few small test cases.
pdbcodes_to_build = ['110D', '116D', '117D', '118D', '134D', '135D', '136D', '138D', '143D', '148D', '151D', '152D', '159D', '177D', '17RA', '183D', '184D', '186D', '187D', '188D', '189D', '1A11', '1A13', '1A1P', '1A3P', '1A51', '1A60', '1A83', '1A9L', '1AAF', '1AB1', '1ABZ', '1AC7', '1ACW', '1AD7', '1ADX', '1AFP', '1AFT', '1AFX', '1AG7', '1AGG', '1AGL', '1AGT', '1AHL', '1AIE', '1AJ1', '1AJF', '1AJJ', '1AJU', '1AKG', '1AKX', '1AL1', '1ALE', '1ALF', '1ALG', '1AM0', '1AMB', '1AMC', '1AML', '1ANP', '1ANR', '1ANS', '1AO9', '1AOO']
pdbcodes_to_simulate = pdbcodes_to_build
for pdbcode in pdbcodes_to_build: for pdbcode in pdbcodes_to_build:
print "------------------------------------------------"
print pdbcode print pdbcode
output_pdb_filename = 'output.pdb' output_pdb_filename = 'output.pdb'
...@@ -106,18 +110,20 @@ def test_build_and_simulate(): ...@@ -106,18 +110,20 @@ def test_build_and_simulate():
positiveIon = 'Na+' positiveIon = 'Na+'
negativeIon = 'Cl-' negativeIon = 'Cl-'
# Keep track of list of failures.
failures = list()
success = True outfile = tempfile.NamedTemporaryFile(mode='w', delete=False)
output_pdb_filename = outfile.name
outfile = open(output_pdb_filename, 'w')
timeout_seconds = 0.1 timeout_seconds = 30
watchdog = Watchdog(timeout_seconds) watchdog = Watchdog(timeout_seconds)
build_successful = False
try: try:
from pdbfixer.pdbfixer import PDBFixer, PdbStructure from pdbfixer.pdbfixer import PDBFixer
from simtk.openmm import app from simtk.openmm import app
stage = "Creating PDBFixer..." stage = "Creating PDBFixer..."
fixer = PDBFixer(pdbcode) fixer = PDBFixer(pdbid=pdbcode)
stage = "Finding missing residues..." stage = "Finding missing residues..."
fixer.findMissingResidues() fixer.findMissingResidues()
stage = "Finding nonstandard residues..." stage = "Finding nonstandard residues..."
...@@ -136,31 +142,40 @@ def test_build_and_simulate(): ...@@ -136,31 +142,40 @@ def test_build_and_simulate():
app.PDBFile.writeFile(fixer.topology, fixer.positions, outfile) app.PDBFile.writeFile(fixer.topology, fixer.positions, outfile)
stage = "Done." stage = "Done."
outfile.close() outfile.close()
build_successful = True
except Watchdog: except Watchdog:
print "timed out fixing PDB %s" % pdbcode message = "timed out in stage %s" % stage
success = False print message
failures.append((pdbcode, Exception(message)))
except Exception as e: except Exception as e:
print "EXCEPTION DURING BUILD"
import traceback
print traceback.print_exc()
print str(e) print str(e)
success = False failures.append((pdbcode, e))
watchdog.stop() watchdog.stop()
del watchdog del watchdog
# Test simulating this with OpenMM. # Test simulating this with OpenMM.
if pdbcode in pdbcodes_to_simulate: if (pdbcode in pdbcodes_to_simulate) and (build_successful):
watchdog = Watchdog(timeout_seconds) watchdog = Watchdog(timeout_seconds)
try: try:
simulate(pdbcode, output_pdb_filename) simulate(pdbcode, output_pdb_filename)
except Watchdog: except Watchdog:
print "PDB code %s timed out in stage '%s'." % (pdbcode, stage) message = "timed out in simulation"
success = False print message
failures.append((pdbcode, Exception(message)))
except Exception as e: except Exception as e:
print "EXCEPTION DURING SIMULATE"
import traceback
print traceback.print_exc()
print str(e) print str(e)
success = False failures.append((pdbcode, e))
watchdog.stop() watchdog.stop()
del watchdog del watchdog
...@@ -168,8 +183,20 @@ def test_build_and_simulate(): ...@@ -168,8 +183,20 @@ def test_build_and_simulate():
# Clean up. # Clean up.
os.remove(output_pdb_filename) os.remove(output_pdb_filename)
if not success: print "------------------------------------------------"
raise Exception("build test failed on one or more PDB files.")
if len(failures) != 0:
print ""
print "SUMMARY OF FAILURES:"
print ""
for failure in failures:
(pdbcode, exception)
print "%6s : %s" % (pdbcode, str(exception))
print ""
raise Exception("Build test failed on one or more PDB files.")
else:
print "All tests succeeded."
if __name__ == '__main__': if __name__ == '__main__':
test_build_and_simulate() test_build_and_simulate()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment