Commit 5fd17b42 by John Chodera (MSKCC)

Updated interface based on @peastman suggestions. Started to convert docstrings…

Updated interface based on @peastman suggestions. Started to convert docstrings to numpy format. Updated test with some small PDBs.
parent d97651ac
......@@ -44,29 +44,11 @@ import os
import os.path
import math
# Imports for urlopen.
import six # compatibility with Python 2 and 3
if six.PY3:
# Imports for urlopen
if sys.version_info >= (3,0):
from urllib.request import urlopen
from urllib.parse import urlparse
from urllib.parse import (uses_relative, uses_netloc, uses_params)
else:
from urllib2 import urlopen
from urlparse import urlparse
from urlparse import uses_relative, uses_netloc, uses_params
_VALID_URLS = set(uses_relative + uses_netloc + uses_params)
_VALID_URLS.discard('')
def _is_url(url):
"""Check to see if a URL has a valid protocol.
from pandas/io.common.py Copyright 2014 Pandas Developers
Used under the BSD licence
"""
try:
return urlparse(url).scheme in _VALID_URLS
except:
return False
substitutions = {
'2AS':'ASP', '3AH':'HIS', '5HP':'GLU', 'ACL':'ARG', 'AGM':'ARG', 'AIB':'ALA', 'ALM':'ALA', 'ALO':'THR', 'ALY':'LYS', 'ARM':'ARG',
......@@ -91,7 +73,22 @@ dnaResidues = ['DA', 'DG', 'DC', 'DT', 'DI']
def _overlayPoints(points1, points2):
"""Given two sets of points, determine the translation and rotation that matches them as closely as possible.
This is based on W. Kabsch, Acta Cryst., A34, pp. 828-829 (1978)."""
Parameters
----------
points1 (numpy array of simtk.unit.Quantity with units compatible with distance) - reference set of coordinates
points2 (numpy array of simtk.unit.Quantity with units compatible with distance) - set of coordinates to be rotated
Returns
-------
translate2 - vector to translate points2 by in order to center it
rotate - rotation matrix to apply to centered points2 to map it on to points1
center1 - center of points1
Notes
-----
This is based on W. Kabsch, Acta Cryst., A34, pp. 828-829 (1978).
"""
if len(points1) == 0:
return (Vec3(0, 0, 0), np.identity(3), Vec3(0, 0, 0))
......@@ -119,29 +116,45 @@ def _overlayPoints(points1, points2):
return (-1*center2, np.dot(u, v).transpose(), center1)
class PDBFixer(object):
"""PDBFixer implements many tools for fixing problems in PDB files."""
def __init__(self, structure):
"""Create a new PDBFixer to fix problems in a PDB file.
Parameters:
- structure (PdbStructure) the starting PDB structure containing problems to be fixed
or a URL, filename, gzipped filename, or PDB code
"""PDBFixer implements many tools for fixing problems in PDB files.
"""
Examples:
def __init__(self, structure=None, filename=None, file=None, url=None, pdbid=None):
"""Create a new PDBFixer instance to fix problems in a PDB file.
Parameters
----------
structure : PdbStructure, optional, default=None
A PdbStructure object containing the PDB file to be repaired.
filename : str, optional, default=None
A filename specifying the file from which the PDB file is to be read.
file : file, optional, default=None
A file-like object from which the PDB file is to be read.
The file is not closed after reading.
url : str, optional, default=None
A URL specifying the internet location from which the PDB file contents should be retrieved.
pdbid : str, optional, default=None
A four-letter PDB code specifying the structure to be retrieved from the RCSB.
Notes
-----
Only one of structure, filename, file, url, or pdbid may be specified or an exception will be thrown.
Examples
--------
Start from a PdbStructure
>>> pdbcode = '1VII'
>>> url = 'http://www.rcsb.org/pdb/files/%s.pdb' % pdbcode
>>> pdbid = '1VII'
>>> url = 'http://www.rcsb.org/pdb/files/%s.pdb' % pdbid
>>> file = urlopen(url)
>>> structure = PdbStructure(file)
>>> fixer = PDBFixer(structure)
>>> fixer = PDBFixer(structure=structure)
Start from a file object.
>>> file = urlopen(url)
>>> fixer = PDBFixer(file)
>>> fixer = PDBFixer(file=file)
Start from a filename.
......@@ -150,42 +163,52 @@ class PDBFixer(object):
>>> outfile = open(filename, 'w')
>>> outfile.write(file.read())
>>> outfile.close()
>>> fixer = PDBFixer(filename)
>>> fixer = PDBFixer(filename=filename)
Start from a URL.
>>> fixer = PDBFixer(url)
>>> fixer = PDBFixer(url=url)
Start from a PDB code.
>>> fixer = PDBFixer(pdbcode)
>>> fixer = PDBFixer(pdbid=pdbid)
"""
if isinstance(structure, PdbStructure):
# We already have a PDB structure; do nothing.
pass
# Check to make sure only one option has been specified.
if bool(structure) + bool(filename) + bool(file) + bool(url) + bool(pdbid) != 1:
raise Exception("Exactly one option [structure, filename, file, url, pdbid] must be specified.")
if isinstance(structure, str):
if os.path.exists(structure):
# First, try opening it as a local file.
file = open(structure, 'r')
if structure:
# A PDB structure has been specified; do nothing.
pass
elif filename:
# A local file has been specified.
file = open(filename, 'r')
structure = PdbStructure(file)
file.close()
elif _is_url(structure):
# If it's a URL, try that.
file = urlopen(structure)
elif file:
# A file-like object has been specified.
structure = PdbStructure(file)
file.close()
elif len(structure) == 4:
# If it's a PDB code, try that.
url = 'http://www.rcsb.org/pdb/files/%s.pdb' % structure
elif url:
# A URL has been specified.
file = urlopen(url)
structure = PdbStructure(file)
file.close()
elif hasattr(structure, 'read'):
# If a file-like object, read it.
structure = PdbStructure(structure)
elif pdbid:
# A PDB id has been specified.
url = 'http://www.rcsb.org/pdb/files/%s.pdb' % pdbid
file = urlopen(url)
# Read contents all at once and split into lines, since urlopen doesn't like it when we read one line at a time over the network.
contents = file.read()
lines = contents.split('\n')
file.close()
structure = PdbStructure(lines)
# Check the structure has some atoms in it.
atoms = list(structure.iter_atoms())
if len(atoms)==0:
raise Exception("Structure contains no atoms.")
self.structure = structure
self.pdb = app.PDBFile(structure)
......@@ -203,8 +226,30 @@ class PDBFixer(object):
name = next(templatePdb.topology.residues()).name
self.templates[name] = templatePdb
return
def _addAtomsToTopology(self, heavyAtomsOnly, omitUnknownMolecules):
"""Create a new Topology in which missing atoms have been added."""
"""Create a new Topology in which missing atoms have been added.
Parameters
----------
heavyAtomsOnly : bool
If True, only heavy atoms will be added to the topology.
omitUnknownMolecules : bool
If True, unknown molecules will be omitted from the topology.
Returns
-------
newTopology : simtk.openmm.app.Topology
A new Topology object containing atoms from the old.
newPositions : list of simtk.unit.Quantity with units compatible with nanometers
Atom positions for the new Topology object.
newAtoms : simtk.openmm.app.Topology.Atom
New atom objects.
existingAtomMap : dict
Mapping from old atoms to new atoms.
"""
newTopology = app.Topology()
newPositions = []*unit.nanometer
......
......@@ -111,9 +111,7 @@ setup(
packages=find_packages(),
package_data={'pdbfixer': find_package_data()},
zip_safe=False,
install_requires=[
'six',
],
install_requires=[],
entry_points={'console_scripts': ['pdbfixer = pdbfixer.pdbfixer:main']})
check_dependencies()
......@@ -6,6 +6,7 @@ import os
import os.path
import sys
import numpy
import tempfile
from threading import Timer
......@@ -36,7 +37,8 @@ def simulate(pdbcode, pdb_filename):
pdb = app.PDBFile(pdb_filename)
# Set up implicit solvent forcefield.
forcefield = app.ForceField('amber99sbildn.xml')
#forcefield = app.ForceField('amber99sbildn.xml')
forcefield = app.ForceField('amber10.xml')
# Create the system.
system = forcefield.createSystem(pdb.topology, nonbondedMethod=app.NoCutoff, constraints=app.HBonds)
......@@ -82,17 +84,19 @@ def simulate(pdbcode, pdb_filename):
return
def test_build_and_simulate():
# These are tough PDB codes from http://www.umass.edu/microbio/chime/pe_beta/pe/protexpl/badpdbs.htm
# DEBUG: These are tough PDB codes from http://www.umass.edu/microbio/chime/pe_beta/pe/protexpl/badpdbs.htm
pdbcodes_to_build = ['1AS5', '1CBN', '1DPO', '1IGY', '1HAG', '1IAO', '4CPA', '1QCQ']
pdbcodes_to_simulate = ['1AS5', '1CBN', '1DPO', '1IGY', '1HAG', '1IAO', '4CPA', '1QCQ']
# DEBUG: Simpler test cases.
pdbcodes_to_build = ['1AS5', '1VII']
pdbcodes_to_simulate = ['1AS5', '1VII']
# DEBUG: Small test cases.
pdbcodes_to_build = ['110D', '116D', '117D', '118D', '134D', '135D', '136D', '138D', '143D', '148D', '151D', '152D', '159D', '177D', '17RA', '183D', '184D', '186D', '187D', '188D', '189D', '1A11', '1A13', '1A1P', '1A3P', '1A51', '1A60', '1A83', '1A9L', '1AAF', '1AB1', '1ABZ', '1AC7', '1ACW', '1AD7', '1ADX', '1AFP', '1AFT', '1AFX', '1AG7', '1AGG', '1AGL', '1AGT', '1AHL', '1AIE', '1AJ1', '1AJF', '1AJJ', '1AJU', '1AKG', '1AKX', '1AL1', '1ALE', '1ALF', '1ALG', '1AM0', '1AMB', '1AMC', '1AML', '1ANP', '1ANR', '1ANS', '1AO9', '1AOO', '1APF', '1APO', '1APQ', '1AQG', '1AQO', '1AQQ', '1AQR', '1AQS', '1ARD', '1ARE', '1ARF', '1ARJ', '1ARK', '1AS5', '1AT4', '1ATO', '1ATV', '1ATW', '1ATX', '1AV3', '1AW4', '1AW6', '1AWY', '1AXH', '1AY3', '1AYJ', '1AZ6', '1AZH', '1AZJ', '1AZK', '1B03', '1B0Q', '1B13', '1B1V', '1B2J', '1B36', '1B45', '1B4G', '1B4I', '1B4Y', '1B5N', '1B8W', '1B9G', '1B9P', '1B9Q', '1B9U', '1BA4', '1BA5', '1BA6', '1BAH', '1BAL', '1BBA', '1BBG', '1BBL', '1BBO', '1BCV', '1BD1', '1BDC', '1BDD', '1BDE', '1BDK', '1BDS', '1BE7', '1BEI', '1BF0', '1BF9', '1BFW', '1BFY', '1BFZ', '1BGK', '1BGZ', '1BH0', '1BH1', '1BH4', '1BH7', '1BHI', '1BHP', '1BIG', '1BJB', '1BJC', '1BJH', '1BK2', '1BK8', '1BKT', '1BKU', '1BL1', '1BM4', '1BMX', '1BN0', '1BNB', '1BNX', '1BOE', '1BOR', '1BPI', '1BPT', '1BQ8', '1BQ9', '1BQF', '1BRF', '1BRV', '1BRZ', '1BTI', '1BTQ', '1BTR', '1BTS', '1BTT', '1BUB', '1BUS', '1BVJ', '1BW6', '1BWX', '1BX7', '1BX8', '1BY0', '1BY6', '1BYJ', '1BYV', '1BYY', '1BZ2', '1BZ3', '1BZB', '1BZG', '1BZK', '1BZT', '1BZU', '1C0O', '1C26', '1C2U', '1C32', '1C34', '1C35', '1C38', '1C49', '1C4B', '1C4E', '1C4S', '1C55', '1C56', '1C6W', '1C98', '1C9A', '1C9Z', '1CAA', '1CAD', '1CAP', '1CB3', '1CB9', '1CBH', '1CBN', '1CCF', '1CCM', '1CCN', '1CCQ', '1CCV', '1CE3', '1CE4', '1CEK', '1CEU', '1CFG', '1CFH', '1CFI', '1CHL', '1CHV', '1CIX', '1CKW', '1CKX', '1CKY', '1CKZ', '1CL4', '1CLF', '1CMR', '1CNL', '1CNN', '1CNR', '1CO4', '1COI', '1CQ0', '1CQ5', '1CQL', '1CQU', '1CR8', '1CRE', '1CRF', '1CRN', '1CS9', '1CSA', '1CT6', '1CTI', '1CV9', '1CVQ', '1CW5', '1CW6', '1CW8', '1CWX', '1CWZ', '1CXN', '1CXO', '1CXR', '1CXW', '1CYA', '1CYB', '1CZ6', '1D0R', '1D0T', '1D0U', '1D0W', '1D10', '1D11', '1D12', '1D13', '1D14', '1D15', '1D16', '1D17', '1D1E', '1D1F', '1D1H', '1D26', '1D2D', '1D2J', '1D2L', '1D33', '1D35', '1D36', '1D37', '1D38', '1D54', '1D58', '1D5Q', '1D61', '1D62', '1D67', '1D6B', '1D6X', '1D78', '1D79', '1D7N', '1D7T', '1D7Z', '1D82', '1D8G', '1D93', '1D9J', '1D9L', '1D9M', '1D9O', '1D9P', '1DA0', '1DA9', '1DB6', '1DEC', '1DEM', '1DEN', '1DEP', '1DF6', '1DFE', '1DFS', '1DFT', '1DFW', '1DFY', '1DFZ']
success = True
# DEBUG: A few small test cases.
pdbcodes_to_build = ['110D', '116D', '117D', '118D', '134D', '135D', '136D', '138D', '143D', '148D', '151D', '152D', '159D', '177D', '17RA', '183D', '184D', '186D', '187D', '188D', '189D', '1A11', '1A13', '1A1P', '1A3P', '1A51', '1A60', '1A83', '1A9L', '1AAF', '1AB1', '1ABZ', '1AC7', '1ACW', '1AD7', '1ADX', '1AFP', '1AFT', '1AFX', '1AG7', '1AGG', '1AGL', '1AGT', '1AHL', '1AIE', '1AJ1', '1AJF', '1AJJ', '1AJU', '1AKG', '1AKX', '1AL1', '1ALE', '1ALF', '1ALG', '1AM0', '1AMB', '1AMC', '1AML', '1ANP', '1ANR', '1ANS', '1AO9', '1AOO']
pdbcodes_to_simulate = pdbcodes_to_build
for pdbcode in pdbcodes_to_build:
print "------------------------------------------------"
print pdbcode
output_pdb_filename = 'output.pdb'
......@@ -106,18 +110,20 @@ def test_build_and_simulate():
positiveIon = 'Na+'
negativeIon = 'Cl-'
# Keep track of list of failures.
failures = list()
success = True
outfile = open(output_pdb_filename, 'w')
outfile = tempfile.NamedTemporaryFile(mode='w', delete=False)
output_pdb_filename = outfile.name
timeout_seconds = 0.1
timeout_seconds = 30
watchdog = Watchdog(timeout_seconds)
build_successful = False
try:
from pdbfixer.pdbfixer import PDBFixer, PdbStructure
from pdbfixer.pdbfixer import PDBFixer
from simtk.openmm import app
stage = "Creating PDBFixer..."
fixer = PDBFixer(pdbcode)
fixer = PDBFixer(pdbid=pdbcode)
stage = "Finding missing residues..."
fixer.findMissingResidues()
stage = "Finding nonstandard residues..."
......@@ -136,31 +142,40 @@ def test_build_and_simulate():
app.PDBFile.writeFile(fixer.topology, fixer.positions, outfile)
stage = "Done."
outfile.close()
build_successful = True
except Watchdog:
print "timed out fixing PDB %s" % pdbcode
success = False
message = "timed out in stage %s" % stage
print message
failures.append((pdbcode, Exception(message)))
except Exception as e:
print "EXCEPTION DURING BUILD"
import traceback
print traceback.print_exc()
print str(e)
success = False
failures.append((pdbcode, e))
watchdog.stop()
del watchdog
# Test simulating this with OpenMM.
if pdbcode in pdbcodes_to_simulate:
if (pdbcode in pdbcodes_to_simulate) and (build_successful):
watchdog = Watchdog(timeout_seconds)
try:
simulate(pdbcode, output_pdb_filename)
except Watchdog:
print "PDB code %s timed out in stage '%s'." % (pdbcode, stage)
success = False
message = "timed out in simulation"
print message
failures.append((pdbcode, Exception(message)))
except Exception as e:
print "EXCEPTION DURING SIMULATE"
import traceback
print traceback.print_exc()
print str(e)
success = False
failures.append((pdbcode, e))
watchdog.stop()
del watchdog
......@@ -168,8 +183,20 @@ def test_build_and_simulate():
# Clean up.
os.remove(output_pdb_filename)
if not success:
raise Exception("build test failed on one or more PDB files.")
print "------------------------------------------------"
if len(failures) != 0:
print ""
print "SUMMARY OF FAILURES:"
print ""
for failure in failures:
(pdbcode, exception)
print "%6s : %s" % (pdbcode, str(exception))
print ""
raise Exception("Build test failed on one or more PDB files.")
else:
print "All tests succeeded."
if __name__ == '__main__':
test_build_and_simulate()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment