Commit af019e18 by Peter Eastman

Restructured PDBFixer to be an externally callable class

parent 23667642
...@@ -42,7 +42,7 @@ import os ...@@ -42,7 +42,7 @@ import os
import os.path import os.path
import math import math
substitutions = { _substitutions = {
'2AS':'ASP', '3AH':'HIS', '5HP':'GLU', 'ACL':'ARG', 'AGM':'ARG', 'AIB':'ALA', 'ALM':'ALA', 'ALO':'THR', 'ALY':'LYS', 'ARM':'ARG', '2AS':'ASP', '3AH':'HIS', '5HP':'GLU', 'ACL':'ARG', 'AGM':'ARG', 'AIB':'ALA', 'ALM':'ALA', 'ALO':'THR', 'ALY':'LYS', 'ARM':'ARG',
'ASA':'ASP', 'ASB':'ASP', 'ASK':'ASP', 'ASL':'ASP', 'ASQ':'ASP', 'AYA':'ALA', 'BCS':'CYS', 'BHD':'ASP', 'BMT':'THR', 'BNN':'ALA', 'ASA':'ASP', 'ASB':'ASP', 'ASK':'ASP', 'ASL':'ASP', 'ASQ':'ASP', 'AYA':'ALA', 'BCS':'CYS', 'BHD':'ASP', 'BMT':'THR', 'BNN':'ALA',
'BUC':'CYS', 'BUG':'LEU', 'C5C':'CYS', 'C6C':'CYS', 'CCS':'CYS', 'CEA':'CYS', 'CGU':'GLU', 'CHG':'ALA', 'CLE':'LEU', 'CME':'CYS', 'BUC':'CYS', 'BUG':'LEU', 'C5C':'CYS', 'C6C':'CYS', 'CCS':'CYS', 'CEA':'CYS', 'CGU':'GLU', 'CHG':'ALA', 'CLE':'LEU', 'CME':'CYS',
...@@ -59,7 +59,7 @@ substitutions = { ...@@ -59,7 +59,7 @@ substitutions = {
'TPL':'TRP', 'TPO':'THR', 'TPQ':'ALA', 'TRG':'LYS', 'TRO':'TRP', 'TYB':'TYR', 'TYQ':'TYR', 'TYS':'TYR', 'TYY':'TYR' 'TPL':'TRP', 'TPO':'THR', 'TPQ':'ALA', 'TRG':'LYS', 'TRO':'TRP', 'TYB':'TYR', 'TYQ':'TYR', 'TYS':'TYR', 'TYY':'TYR'
} }
def overlayPoints(points1, points2): def _overlayPoints(points1, points2):
"""Given two sets of points, determine the translation and rotation that matches them as closely as possible. """Given two sets of points, determine the translation and rotation that matches them as closely as possible.
This is based on W. Kabsch, Acta Cryst., A34, pp. 828-829 (1978).""" This is based on W. Kabsch, Acta Cryst., A34, pp. 828-829 (1978)."""
...@@ -89,207 +89,213 @@ def overlayPoints(points1, points2): ...@@ -89,207 +89,213 @@ def overlayPoints(points1, points2):
(u, s, v) = lin.svd(R) (u, s, v) = lin.svd(R)
return (-1*center2, np.dot(u, v).transpose(), center1) return (-1*center2, np.dot(u, v).transpose(), center1)
class PDBFixer(object):
def addMissingAtoms(topology, positions, templates, missingAtoms, heavyAtomsOnly, omitUnknownMolecules): def __init__(self, pdb):
"""Create a new Topology in which missing atoms have been added.""" self.pdb = pdb
self.topology = pdb.topology
self.positions = pdb.positions
newTopology = app.Topology() # Load the templates.
newPositions = []*unit.nanometer
newAtoms = [] self.templates = {}
existingAtomMap = {} for file in os.listdir('templates'):
addedAtomMap = {} templatePdb = app.PDBFile(os.path.join('templates', file))
addedOXT = [] name = templatePdb.topology.residues().next().name
for chain in topology.chains(): self.templates[name] = templatePdb
if omitUnknownMolecules and not any(residue.name in templates for residue in chain.residues()):
continue def _addAtomsToTopology(self, missingAtoms, heavyAtomsOnly, omitUnknownMolecules):
newChain = newTopology.addChain() """Create a new Topology in which missing atoms have been added."""
chainResidues = list(chain.residues())
for residue in chain.residues(): newTopology = app.Topology()
newResidue = newTopology.addResidue(residue.name, newChain) newPositions = []*unit.nanometer
newAtoms = []
# Add the existing heavy atoms. existingAtomMap = {}
addedAtomMap = {}
for atom in residue.atoms(): addedOXT = []
if not heavyAtomsOnly or (atom.element is not None and atom.element != hydrogen): for chain in self.topology.chains():
newAtom = newTopology.addAtom(atom.name, atom.element, newResidue) if omitUnknownMolecules and not any(residue.name in self.templates for residue in chain.residues()):
newAtoms.append(newAtom) continue
existingAtomMap[atom] = newAtom newChain = newTopology.addChain()
newPositions.append(positions[atom.index]) chainResidues = list(chain.residues())
if residue in missingAtoms: for residue in chain.residues():
newResidue = newTopology.addResidue(residue.name, newChain)
# Find corresponding atoms in the residue and the template.
template = templates[residue.name] # Add the existing heavy atoms.
atomPositions = dict((atom.name, positions[atom.index]) for atom in residue.atoms())
points1 = []
points2 = []
for atom in template.topology.atoms():
if atom.name in atomPositions:
points1.append(atomPositions[atom.name].value_in_unit(unit.nanometer))
points2.append(template.positions[atom.index].value_in_unit(unit.nanometer))
# Compute the optimal transform to overlay them. for atom in residue.atoms():
if not heavyAtomsOnly or (atom.element is not None and atom.element != hydrogen):
newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
newAtoms.append(newAtom)
existingAtomMap[atom] = newAtom
newPositions.append(self.positions[atom.index])
if residue in missingAtoms:
# Find corresponding atoms in the residue and the template.
template = self.templates[residue.name]
atomPositions = dict((atom.name, self.positions[atom.index]) for atom in residue.atoms())
points1 = []
points2 = []
for atom in template.topology.atoms():
if atom.name in atomPositions:
points1.append(atomPositions[atom.name].value_in_unit(unit.nanometer))
points2.append(template.positions[atom.index].value_in_unit(unit.nanometer))
# Compute the optimal transform to overlay them.
(translate2, rotate, translate1) = _overlayPoints(points1, points2)
# Add the missing atoms.
addedAtomMap[residue] = {}
for atom in missingAtoms[residue]:
newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
newAtoms.append(newAtom)
addedAtomMap[residue][atom] = newAtom
templatePosition = template.positions[atom.index].value_in_unit(unit.nanometer)
newPositions.append((mm.Vec3(*np.dot(rotate, templatePosition+translate2))+translate1)*unit.nanometer)
(translate2, rotate, translate1) = overlayPoints(points1, points2) # If a terminal OXT is missing, add it.
# Add the missing atoms. if residue == chainResidues[-1] and residue.name in self.templates:
atomPositions = dict((atom.name, self.positions[atom.index].value_in_unit(unit.nanometer)) for atom in residue.atoms())
if 'OXT' not in atomPositions and all(name in atomPositions for name in ['C', 'O', 'CA']):
newAtom = newTopology.addAtom('OXT', oxygen, newResidue)
newAtoms.append(newAtom)
addedOXT.append(newAtom)
d_ca_o = atomPositions['O']-atomPositions['CA']
d_ca_c = atomPositions['C']-atomPositions['CA']
d_ca_c /= unit.sqrt(unit.dot(d_ca_c, d_ca_c))
v = d_ca_o - d_ca_c*unit.dot(d_ca_c, d_ca_o)
newPositions.append((atomPositions['O']+2*v)*unit.nanometer)
# Add bonds from the original Topology.
for atom1, atom2 in self.topology.bonds():
if atom1 in existingAtomMap and atom2 in existingAtomMap:
newTopology.addBond(existingAtomMap[atom1], existingAtomMap[atom2])
# Add bonds that connect to new atoms.
for residue in missingAtoms:
template = self.templates[residue.name]
atomsByName = dict((atom.name, atom) for atom in residue.atoms())
addedAtoms = addedAtomMap[residue]
for atom1, atom2 in template.topology.bonds():
if atom1 in addedAtoms or atom2 in addedAtoms:
if atom1 in addedAtoms:
bondAtom1 = addedAtoms[atom1]
else:
bondAtom1 = existingAtomMap[atomsByName[atom1.name]]
if atom2 in addedAtoms:
bondAtom2 = addedAtoms[atom2]
else:
bondAtom2 = existingAtomMap[atomsByName[atom2.name]]
newTopology.addBond(bondAtom1, bondAtom2)
for atom1 in addedOXT:
atom2 = [atom for atom in atom1.residue.atoms() if atom.name == 'C'][0]
newTopology.addBond(atom1, atom2)
# Return the results.
return (newTopology, newPositions, newAtoms, existingAtomMap)
def findNonstandardResidues(self):
return [r for r in self.topology.residues() if r.name in _substitutions]
def replaceNonstandardResidues(self, replaceResidues):
if len(replaceResidues) > 0:
deleteHeavyAtoms = set()
deleteHydrogens = set()
# Find heavy atoms that should be deleted.
for residue in replaceResidues:
residue.name = _substitutions[residue.name]
template = self.templates[residue.name]
standardAtoms = set(atom.name for atom in template.topology.atoms())
for atom in residue.atoms():
if atom.element not in (None, hydrogen) and atom.name not in standardAtoms:
deleteHeavyAtoms.add(atom)
addedAtomMap[residue] = {} # We should also delete any hydrogen bonded to a heavy atom that is being deleted.
for atom in missingAtoms[residue]:
newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
newAtoms.append(newAtom)
addedAtomMap[residue][atom] = newAtom
templatePosition = template.positions[atom.index].value_in_unit(unit.nanometer)
newPositions.append((mm.Vec3(*np.dot(rotate, templatePosition+translate2))+translate1)*unit.nanometer)
# If a terminal OXT is missing, add it. for atom1, atom2 in self.topology.bonds():
if atom1 not in deleteHeavyAtoms:
(atom1, atom2) = (atom2, atom1)
if atom1 in deleteHeavyAtoms:
if atom2.element in (None, hydrogen):
deleteHydrogens.add(atom2)
if residue == chainResidues[-1] and residue.name in templates: # Delete them.
atomPositions = dict((atom.name, positions[atom.index].value_in_unit(unit.nanometer)) for atom in residue.atoms())
if 'OXT' not in atomPositions and all(name in atomPositions for name in ['C', 'O', 'CA']): modeller = app.Modeller(self.topology, self.positions)
newAtom = newTopology.addAtom('OXT', oxygen, newResidue) modeller.delete(deleteHeavyAtoms.union(deleteHydrogens))
newAtoms.append(newAtom) self.topology = modeller.topology
addedOXT.append(newAtom) self.positions = modeller.positions
d_ca_o = atomPositions['O']-atomPositions['CA']
d_ca_c = atomPositions['C']-atomPositions['CA']
d_ca_c /= unit.sqrt(unit.dot(d_ca_c, d_ca_c))
v = d_ca_o - d_ca_c*unit.dot(d_ca_c, d_ca_o)
newPositions.append((atomPositions['O']+2*v)*unit.nanometer)
# Add bonds from the original Topology.
for atom1, atom2 in topology.bonds():
if atom1 in existingAtomMap and atom2 in existingAtomMap:
newTopology.addBond(existingAtomMap[atom1], existingAtomMap[atom2])
# Add bonds that connect to new atoms.
for residue in missingAtoms:
template = templates[residue.name]
atomsByName = dict((atom.name, atom) for atom in residue.atoms())
addedAtoms = addedAtomMap[residue]
for atom1, atom2 in template.topology.bonds():
if atom1 in addedAtoms or atom2 in addedAtoms:
if atom1 in addedAtoms:
bondAtom1 = addedAtoms[atom1]
else:
bondAtom1 = existingAtomMap[atomsByName[atom1.name]]
if atom2 in addedAtoms:
bondAtom2 = addedAtoms[atom2]
else:
bondAtom2 = existingAtomMap[atomsByName[atom2.name]]
newTopology.addBond(bondAtom1, bondAtom2)
for atom1 in addedOXT:
atom2 = [atom for atom in atom1.residue.atoms() if atom.name == 'C'][0]
newTopology.addBond(atom1, atom2)
# Return the results.
return (newTopology, newPositions, newAtoms, existingAtomMap)
# Load the PDB file.
pdb = app.PDBFile(sys.argv[1])
topology = pdb.topology
positions = pdb.positions
# Load the templates.
templates = {}
for file in os.listdir('templates'):
templatePdb = app.PDBFile(os.path.join('templates', file))
name = templatePdb.topology.residues().next().name
templates[name] = templatePdb
# Find non-standard residues to replace with standard versions.
replaceResidues = [r for r in topology.residues() if r.name in substitutions]
if len(replaceResidues) > 0:
deleteHeavyAtoms = set()
deleteHydrogens = set()
# Find heavy atoms that should be deleted. def findMissingAtoms(self):
missingAtoms = {}
for residue in self.topology.residues():
if residue.name in self.templates:
template = self.templates[residue.name]
atomNames = set(atom.name for atom in residue.atoms())
missing = []
for atom in template.topology.atoms():
if atom.name not in atomNames:
missing.append(atom)
if len(missing) > 0:
missingAtoms[residue] = missing
return missingAtoms
for residue in replaceResidues: def addMissingAtoms(self, missingAtoms):
residue.name = substitutions[residue.name] # Create a Topology that 1) adds missing atoms, 2) removes all hydrogens, and 3) removes unknown molecules.
template = templates[residue.name]
standardAtoms = set(atom.name for atom in template.topology.atoms())
for atom in residue.atoms():
if atom.element not in (None, hydrogen) and atom.name not in standardAtoms:
deleteHeavyAtoms.add(atom)
# We should also delete any hydrogen bonded to a heavy atom that is being deleted. (newTopology, newPositions, newAtoms, existingAtomMap) = self._addAtomsToTopology(missingAtoms, True, True)
for atom1, atom2 in topology.bonds(): # Create a System for energy minimizing it.
if atom1 not in deleteHeavyAtoms:
(atom1, atom2) = (atom2, atom1) forcefield = app.ForceField('soft.xml')
if atom1 in deleteHeavyAtoms: system = forcefield.createSystem(newTopology)
if atom2.element in (None, hydrogen):
deleteHydrogens.add(atom2) # Set any previously existing atoms to be massless, they so won't move.
# Delete them. for atom in existingAtomMap.itervalues():
system.setParticleMass(atom.index, 0.0)
modeller = app.Modeller(topology, positions)
modeller.delete(deleteHeavyAtoms.union(deleteHydrogens)) # If any heavy atoms were omitted, add them back to avoid steric clashes.
topology = modeller.topology
positions = modeller.positions nonbonded = [f for f in system.getForces() if isinstance(f, mm.CustomNonbondedForce)][0]
for atom in self.topology.atoms():
# Loop over residues to see which ones have missing heavy atoms. if atom.element not in (None, hydrogen) and atom not in existingAtomMap:
system.addParticle(0.0)
missingAtoms = {} nonbonded.addParticle([])
for residue in topology.residues(): newPositions.append(self.positions[atom.index])
if residue.name in templates:
template = templates[residue.name] # For efficiency, only compute interactions that involve a new atom.
atomNames = set(atom.name for atom in residue.atoms())
missing = [] nonbonded.addInteractionGroup([atom.index for atom in newTopology.atoms() if atom in newAtoms], range(system.getNumParticles()))
for atom in template.topology.atoms():
if atom.name not in atomNames: # Do an energy minimization.
missing.append(atom)
if len(missing) > 0: integrator = mm.LangevinIntegrator(300*unit.kelvin, 10/unit.picosecond, 5*unit.femtosecond)
missingAtoms[residue] = missing context = mm.Context(system, integrator)
context.setPositions(newPositions)
# Create a Topology that 1) adds missing atoms, 2) removes all hydrogens, and 3) removes unknown molecules. mm.LocalEnergyMinimizer.minimize(context)
state = context.getState(getPositions=True)
(newTopology, newPositions, newAtoms, existingAtomMap) = addMissingAtoms(topology, positions, templates, missingAtoms, True, True)
# Now create a new Topology, including all atoms from the original one and adding the missing atoms.
# Create a System for energy minimizing it.
(newTopology2, newPositions2, newAtoms2, existingAtomMap2) = self._addAtomsToTopology(missingAtoms, False, False)
forcefield = app.ForceField('soft.xml')
system = forcefield.createSystem(newTopology) # Copy over the minimized positions for the new atoms.
# Set any previously existing atoms to be massless, they so won't move. for a1, a2 in zip(newAtoms, newAtoms2):
newPositions2[a2.index] = state.getPositions()[a1.index]
for atom in existingAtomMap.itervalues(): app.PDBFile.writeFile(newTopology2, newPositions2, open('output.pdb', 'w'))
system.setParticleMass(atom.index, 0.0)
# If any heavy atoms were omitted, add them back to avoid steric clashes.
nonbonded = [f for f in system.getForces() if isinstance(f, mm.CustomNonbondedForce)][0]
for atom in topology.atoms():
if atom.element not in (None, hydrogen) and atom not in existingAtomMap:
system.addParticle(0.0)
nonbonded.addParticle([])
newPositions.append(positions[atom.index])
# For efficiency, only compute interactions that involve a new atom.
nonbonded.addInteractionGroup([atom.index for atom in newTopology.atoms() if atom in newAtoms], range(system.getNumParticles()))
# Do an energy minimization.
integrator = mm.LangevinIntegrator(300*unit.kelvin, 10/unit.picosecond, 5*unit.femtosecond)
context = mm.Context(system, integrator)
context.setPositions(newPositions)
mm.LocalEnergyMinimizer.minimize(context)
state = context.getState(getPositions=True)
# Now create a new Topology, including all atoms from the original one and adding the missing atoms.
(newTopology2, newPositions2, newAtoms2, existingAtomMap2) = addMissingAtoms(topology, positions, templates, missingAtoms, False, False)
# Copy over the minimized positions for the new atoms.
for a1, a2 in zip(newAtoms, newAtoms2): if __name__=='__main__':
newPositions2[a2.index] = state.getPositions()[a1.index] fixer = PDBFixer(app.PDBFile(sys.argv[1]))
app.PDBFile.writeFile(newTopology2, newPositions2, open('output.pdb', 'w')) fixer.replaceNonstandardResidues(fixer.findNonstandardResidues())
fixer.addMissingAtoms(fixer.findMissingAtoms())
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment