Commit af019e18 by Peter Eastman

Restructured PDBFixer to be an externally callable class

parent 23667642
...@@ -42,7 +42,7 @@ import os ...@@ -42,7 +42,7 @@ import os
import os.path import os.path
import math import math
substitutions = { _substitutions = {
'2AS':'ASP', '3AH':'HIS', '5HP':'GLU', 'ACL':'ARG', 'AGM':'ARG', 'AIB':'ALA', 'ALM':'ALA', 'ALO':'THR', 'ALY':'LYS', 'ARM':'ARG', '2AS':'ASP', '3AH':'HIS', '5HP':'GLU', 'ACL':'ARG', 'AGM':'ARG', 'AIB':'ALA', 'ALM':'ALA', 'ALO':'THR', 'ALY':'LYS', 'ARM':'ARG',
'ASA':'ASP', 'ASB':'ASP', 'ASK':'ASP', 'ASL':'ASP', 'ASQ':'ASP', 'AYA':'ALA', 'BCS':'CYS', 'BHD':'ASP', 'BMT':'THR', 'BNN':'ALA', 'ASA':'ASP', 'ASB':'ASP', 'ASK':'ASP', 'ASL':'ASP', 'ASQ':'ASP', 'AYA':'ALA', 'BCS':'CYS', 'BHD':'ASP', 'BMT':'THR', 'BNN':'ALA',
'BUC':'CYS', 'BUG':'LEU', 'C5C':'CYS', 'C6C':'CYS', 'CCS':'CYS', 'CEA':'CYS', 'CGU':'GLU', 'CHG':'ALA', 'CLE':'LEU', 'CME':'CYS', 'BUC':'CYS', 'BUG':'LEU', 'C5C':'CYS', 'C6C':'CYS', 'CCS':'CYS', 'CEA':'CYS', 'CGU':'GLU', 'CHG':'ALA', 'CLE':'LEU', 'CME':'CYS',
...@@ -59,7 +59,7 @@ substitutions = { ...@@ -59,7 +59,7 @@ substitutions = {
'TPL':'TRP', 'TPO':'THR', 'TPQ':'ALA', 'TRG':'LYS', 'TRO':'TRP', 'TYB':'TYR', 'TYQ':'TYR', 'TYS':'TYR', 'TYY':'TYR' 'TPL':'TRP', 'TPO':'THR', 'TPQ':'ALA', 'TRG':'LYS', 'TRO':'TRP', 'TYB':'TYR', 'TYQ':'TYR', 'TYS':'TYR', 'TYY':'TYR'
} }
def overlayPoints(points1, points2): def _overlayPoints(points1, points2):
"""Given two sets of points, determine the translation and rotation that matches them as closely as possible. """Given two sets of points, determine the translation and rotation that matches them as closely as possible.
This is based on W. Kabsch, Acta Cryst., A34, pp. 828-829 (1978).""" This is based on W. Kabsch, Acta Cryst., A34, pp. 828-829 (1978)."""
...@@ -89,8 +89,21 @@ def overlayPoints(points1, points2): ...@@ -89,8 +89,21 @@ def overlayPoints(points1, points2):
(u, s, v) = lin.svd(R) (u, s, v) = lin.svd(R)
return (-1*center2, np.dot(u, v).transpose(), center1) return (-1*center2, np.dot(u, v).transpose(), center1)
class PDBFixer(object):
def __init__(self, pdb):
self.pdb = pdb
self.topology = pdb.topology
self.positions = pdb.positions
def addMissingAtoms(topology, positions, templates, missingAtoms, heavyAtomsOnly, omitUnknownMolecules): # Load the templates.
self.templates = {}
for file in os.listdir('templates'):
templatePdb = app.PDBFile(os.path.join('templates', file))
name = templatePdb.topology.residues().next().name
self.templates[name] = templatePdb
def _addAtomsToTopology(self, missingAtoms, heavyAtomsOnly, omitUnknownMolecules):
"""Create a new Topology in which missing atoms have been added.""" """Create a new Topology in which missing atoms have been added."""
newTopology = app.Topology() newTopology = app.Topology()
...@@ -99,8 +112,8 @@ def addMissingAtoms(topology, positions, templates, missingAtoms, heavyAtomsOnly ...@@ -99,8 +112,8 @@ def addMissingAtoms(topology, positions, templates, missingAtoms, heavyAtomsOnly
existingAtomMap = {} existingAtomMap = {}
addedAtomMap = {} addedAtomMap = {}
addedOXT = [] addedOXT = []
for chain in topology.chains(): for chain in self.topology.chains():
if omitUnknownMolecules and not any(residue.name in templates for residue in chain.residues()): if omitUnknownMolecules and not any(residue.name in self.templates for residue in chain.residues()):
continue continue
newChain = newTopology.addChain() newChain = newTopology.addChain()
chainResidues = list(chain.residues()) chainResidues = list(chain.residues())
...@@ -114,13 +127,13 @@ def addMissingAtoms(topology, positions, templates, missingAtoms, heavyAtomsOnly ...@@ -114,13 +127,13 @@ def addMissingAtoms(topology, positions, templates, missingAtoms, heavyAtomsOnly
newAtom = newTopology.addAtom(atom.name, atom.element, newResidue) newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
newAtoms.append(newAtom) newAtoms.append(newAtom)
existingAtomMap[atom] = newAtom existingAtomMap[atom] = newAtom
newPositions.append(positions[atom.index]) newPositions.append(self.positions[atom.index])
if residue in missingAtoms: if residue in missingAtoms:
# Find corresponding atoms in the residue and the template. # Find corresponding atoms in the residue and the template.
template = templates[residue.name] template = self.templates[residue.name]
atomPositions = dict((atom.name, positions[atom.index]) for atom in residue.atoms()) atomPositions = dict((atom.name, self.positions[atom.index]) for atom in residue.atoms())
points1 = [] points1 = []
points2 = [] points2 = []
for atom in template.topology.atoms(): for atom in template.topology.atoms():
...@@ -130,7 +143,7 @@ def addMissingAtoms(topology, positions, templates, missingAtoms, heavyAtomsOnly ...@@ -130,7 +143,7 @@ def addMissingAtoms(topology, positions, templates, missingAtoms, heavyAtomsOnly
# Compute the optimal transform to overlay them. # Compute the optimal transform to overlay them.
(translate2, rotate, translate1) = overlayPoints(points1, points2) (translate2, rotate, translate1) = _overlayPoints(points1, points2)
# Add the missing atoms. # Add the missing atoms.
...@@ -144,8 +157,8 @@ def addMissingAtoms(topology, positions, templates, missingAtoms, heavyAtomsOnly ...@@ -144,8 +157,8 @@ def addMissingAtoms(topology, positions, templates, missingAtoms, heavyAtomsOnly
# If a terminal OXT is missing, add it. # If a terminal OXT is missing, add it.
if residue == chainResidues[-1] and residue.name in templates: if residue == chainResidues[-1] and residue.name in self.templates:
atomPositions = dict((atom.name, positions[atom.index].value_in_unit(unit.nanometer)) for atom in residue.atoms()) atomPositions = dict((atom.name, self.positions[atom.index].value_in_unit(unit.nanometer)) for atom in residue.atoms())
if 'OXT' not in atomPositions and all(name in atomPositions for name in ['C', 'O', 'CA']): if 'OXT' not in atomPositions and all(name in atomPositions for name in ['C', 'O', 'CA']):
newAtom = newTopology.addAtom('OXT', oxygen, newResidue) newAtom = newTopology.addAtom('OXT', oxygen, newResidue)
newAtoms.append(newAtom) newAtoms.append(newAtom)
...@@ -158,14 +171,14 @@ def addMissingAtoms(topology, positions, templates, missingAtoms, heavyAtomsOnly ...@@ -158,14 +171,14 @@ def addMissingAtoms(topology, positions, templates, missingAtoms, heavyAtomsOnly
# Add bonds from the original Topology. # Add bonds from the original Topology.
for atom1, atom2 in topology.bonds(): for atom1, atom2 in self.topology.bonds():
if atom1 in existingAtomMap and atom2 in existingAtomMap: if atom1 in existingAtomMap and atom2 in existingAtomMap:
newTopology.addBond(existingAtomMap[atom1], existingAtomMap[atom2]) newTopology.addBond(existingAtomMap[atom1], existingAtomMap[atom2])
# Add bonds that connect to new atoms. # Add bonds that connect to new atoms.
for residue in missingAtoms: for residue in missingAtoms:
template = templates[residue.name] template = self.templates[residue.name]
atomsByName = dict((atom.name, atom) for atom in residue.atoms()) atomsByName = dict((atom.name, atom) for atom in residue.atoms())
addedAtoms = addedAtomMap[residue] addedAtoms = addedAtomMap[residue]
for atom1, atom2 in template.topology.bonds(): for atom1, atom2 in template.topology.bonds():
...@@ -187,33 +200,19 @@ def addMissingAtoms(topology, positions, templates, missingAtoms, heavyAtomsOnly ...@@ -187,33 +200,19 @@ def addMissingAtoms(topology, positions, templates, missingAtoms, heavyAtomsOnly
return (newTopology, newPositions, newAtoms, existingAtomMap) return (newTopology, newPositions, newAtoms, existingAtomMap)
def findNonstandardResidues(self):
return [r for r in self.topology.residues() if r.name in _substitutions]
# Load the PDB file. def replaceNonstandardResidues(self, replaceResidues):
if len(replaceResidues) > 0:
pdb = app.PDBFile(sys.argv[1])
topology = pdb.topology
positions = pdb.positions
# Load the templates.
templates = {}
for file in os.listdir('templates'):
templatePdb = app.PDBFile(os.path.join('templates', file))
name = templatePdb.topology.residues().next().name
templates[name] = templatePdb
# Find non-standard residues to replace with standard versions.
replaceResidues = [r for r in topology.residues() if r.name in substitutions]
if len(replaceResidues) > 0:
deleteHeavyAtoms = set() deleteHeavyAtoms = set()
deleteHydrogens = set() deleteHydrogens = set()
# Find heavy atoms that should be deleted. # Find heavy atoms that should be deleted.
for residue in replaceResidues: for residue in replaceResidues:
residue.name = substitutions[residue.name] residue.name = _substitutions[residue.name]
template = templates[residue.name] template = self.templates[residue.name]
standardAtoms = set(atom.name for atom in template.topology.atoms()) standardAtoms = set(atom.name for atom in template.topology.atoms())
for atom in residue.atoms(): for atom in residue.atoms():
if atom.element not in (None, hydrogen) and atom.name not in standardAtoms: if atom.element not in (None, hydrogen) and atom.name not in standardAtoms:
...@@ -221,7 +220,7 @@ if len(replaceResidues) > 0: ...@@ -221,7 +220,7 @@ if len(replaceResidues) > 0:
# We should also delete any hydrogen bonded to a heavy atom that is being deleted. # We should also delete any hydrogen bonded to a heavy atom that is being deleted.
for atom1, atom2 in topology.bonds(): for atom1, atom2 in self.topology.bonds():
if atom1 not in deleteHeavyAtoms: if atom1 not in deleteHeavyAtoms:
(atom1, atom2) = (atom2, atom1) (atom1, atom2) = (atom2, atom1)
if atom1 in deleteHeavyAtoms: if atom1 in deleteHeavyAtoms:
...@@ -230,17 +229,16 @@ if len(replaceResidues) > 0: ...@@ -230,17 +229,16 @@ if len(replaceResidues) > 0:
# Delete them. # Delete them.
modeller = app.Modeller(topology, positions) modeller = app.Modeller(self.topology, self.positions)
modeller.delete(deleteHeavyAtoms.union(deleteHydrogens)) modeller.delete(deleteHeavyAtoms.union(deleteHydrogens))
topology = modeller.topology self.topology = modeller.topology
positions = modeller.positions self.positions = modeller.positions
# Loop over residues to see which ones have missing heavy atoms. def findMissingAtoms(self):
missingAtoms = {}
missingAtoms = {} for residue in self.topology.residues():
for residue in topology.residues(): if residue.name in self.templates:
if residue.name in templates: template = self.templates[residue.name]
template = templates[residue.name]
atomNames = set(atom.name for atom in residue.atoms()) atomNames = set(atom.name for atom in residue.atoms())
missing = [] missing = []
for atom in template.topology.atoms(): for atom in template.topology.atoms():
...@@ -248,48 +246,56 @@ for residue in topology.residues(): ...@@ -248,48 +246,56 @@ for residue in topology.residues():
missing.append(atom) missing.append(atom)
if len(missing) > 0: if len(missing) > 0:
missingAtoms[residue] = missing missingAtoms[residue] = missing
return missingAtoms
# Create a Topology that 1) adds missing atoms, 2) removes all hydrogens, and 3) removes unknown molecules. def addMissingAtoms(self, missingAtoms):
# Create a Topology that 1) adds missing atoms, 2) removes all hydrogens, and 3) removes unknown molecules.
(newTopology, newPositions, newAtoms, existingAtomMap) = addMissingAtoms(topology, positions, templates, missingAtoms, True, True) (newTopology, newPositions, newAtoms, existingAtomMap) = self._addAtomsToTopology(missingAtoms, True, True)
# Create a System for energy minimizing it. # Create a System for energy minimizing it.
forcefield = app.ForceField('soft.xml') forcefield = app.ForceField('soft.xml')
system = forcefield.createSystem(newTopology) system = forcefield.createSystem(newTopology)
# Set any previously existing atoms to be massless, they so won't move. # Set any previously existing atoms to be massless, they so won't move.
for atom in existingAtomMap.itervalues(): for atom in existingAtomMap.itervalues():
system.setParticleMass(atom.index, 0.0) system.setParticleMass(atom.index, 0.0)
# If any heavy atoms were omitted, add them back to avoid steric clashes. # If any heavy atoms were omitted, add them back to avoid steric clashes.
nonbonded = [f for f in system.getForces() if isinstance(f, mm.CustomNonbondedForce)][0] nonbonded = [f for f in system.getForces() if isinstance(f, mm.CustomNonbondedForce)][0]
for atom in topology.atoms(): for atom in self.topology.atoms():
if atom.element not in (None, hydrogen) and atom not in existingAtomMap: if atom.element not in (None, hydrogen) and atom not in existingAtomMap:
system.addParticle(0.0) system.addParticle(0.0)
nonbonded.addParticle([]) nonbonded.addParticle([])
newPositions.append(positions[atom.index]) newPositions.append(self.positions[atom.index])
# For efficiency, only compute interactions that involve a new atom. # For efficiency, only compute interactions that involve a new atom.
nonbonded.addInteractionGroup([atom.index for atom in newTopology.atoms() if atom in newAtoms], range(system.getNumParticles())) nonbonded.addInteractionGroup([atom.index for atom in newTopology.atoms() if atom in newAtoms], range(system.getNumParticles()))
# Do an energy minimization. # Do an energy minimization.
integrator = mm.LangevinIntegrator(300*unit.kelvin, 10/unit.picosecond, 5*unit.femtosecond) integrator = mm.LangevinIntegrator(300*unit.kelvin, 10/unit.picosecond, 5*unit.femtosecond)
context = mm.Context(system, integrator) context = mm.Context(system, integrator)
context.setPositions(newPositions) context.setPositions(newPositions)
mm.LocalEnergyMinimizer.minimize(context) mm.LocalEnergyMinimizer.minimize(context)
state = context.getState(getPositions=True) state = context.getState(getPositions=True)
# Now create a new Topology, including all atoms from the original one and adding the missing atoms. # Now create a new Topology, including all atoms from the original one and adding the missing atoms.
(newTopology2, newPositions2, newAtoms2, existingAtomMap2) = addMissingAtoms(topology, positions, templates, missingAtoms, False, False) (newTopology2, newPositions2, newAtoms2, existingAtomMap2) = self._addAtomsToTopology(missingAtoms, False, False)
# Copy over the minimized positions for the new atoms. # Copy over the minimized positions for the new atoms.
for a1, a2 in zip(newAtoms, newAtoms2): for a1, a2 in zip(newAtoms, newAtoms2):
newPositions2[a2.index] = state.getPositions()[a1.index] newPositions2[a2.index] = state.getPositions()[a1.index]
app.PDBFile.writeFile(newTopology2, newPositions2, open('output.pdb', 'w')) app.PDBFile.writeFile(newTopology2, newPositions2, open('output.pdb', 'w'))
if __name__=='__main__':
fixer = PDBFixer(app.PDBFile(sys.argv[1]))
fixer.replaceNonstandardResidues(fixer.findNonstandardResidues())
fixer.addMissingAtoms(fixer.findMissingAtoms())
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment