Commit 892470c1 by Peter Eastman

Preliminary support for adding missing residues

parent f8d71ec9
...@@ -35,6 +35,7 @@ import ui ...@@ -35,6 +35,7 @@ import ui
import simtk.openmm as mm import simtk.openmm as mm
import simtk.openmm.app as app import simtk.openmm.app as app
import simtk.unit as unit import simtk.unit as unit
from simtk.openmm.app.internal.pdbstructure import PdbStructure
from simtk.openmm.app.element import hydrogen, oxygen from simtk.openmm.app.element import hydrogen, oxygen
import numpy as np import numpy as np
import numpy.linalg as lin import numpy.linalg as lin
...@@ -91,10 +92,12 @@ def _overlayPoints(points1, points2): ...@@ -91,10 +92,12 @@ def _overlayPoints(points1, points2):
return (-1*center2, np.dot(u, v).transpose(), center1) return (-1*center2, np.dot(u, v).transpose(), center1)
class PDBFixer(object): class PDBFixer(object):
def __init__(self, pdb): def __init__(self, structure):
self.pdb = pdb self.structure = structure
self.topology = pdb.topology self.pdb = app.PDBFile(structure)
self.positions = pdb.positions self.topology = self.pdb.topology
self.positions = self.pdb.positions
self.centroid = unit.sum(self.positions)/len(self.positions)
# Load the templates. # Load the templates.
...@@ -105,7 +108,7 @@ class PDBFixer(object): ...@@ -105,7 +108,7 @@ class PDBFixer(object):
name = templatePdb.topology.residues().next().name name = templatePdb.topology.residues().next().name
self.templates[name] = templatePdb self.templates[name] = templatePdb
def _addAtomsToTopology(self, missingAtoms, missingTerminals, heavyAtomsOnly, omitUnknownMolecules): def _addAtomsToTopology(self, heavyAtomsOnly, omitUnknownMolecules):
"""Create a new Topology in which missing atoms have been added.""" """Create a new Topology in which missing atoms have been added."""
newTopology = app.Topology() newTopology = app.Topology()
...@@ -117,18 +120,34 @@ class PDBFixer(object): ...@@ -117,18 +120,34 @@ class PDBFixer(object):
for chain in self.topology.chains(): for chain in self.topology.chains():
if omitUnknownMolecules and not any(residue.name in self.templates for residue in chain.residues()): if omitUnknownMolecules and not any(residue.name in self.templates for residue in chain.residues()):
continue continue
chainResidues = list(chain.residues())
newChain = newTopology.addChain() newChain = newTopology.addChain()
for residue in chain.residues(): for indexInChain, residue in enumerate(chain.residues()):
newResidue = newTopology.addResidue(residue.name, newChain)
# Insert missing residues here.
insertHere = [r[2] for r in self.missingResidues if r[0] == chain.index and r[1] == indexInChain]
if len(insertHere) > 0:
endPosition = self._computeResidueCenter(residue)
if indexInChain > 0:
startPosition = self._computeResidueCenter(chainResidues[indexInChain-1])
else:
outward = endPosition-self.centroid
norm = unit.norm(outward)
if norm > 0*unit.nanometer:
outward *= len(insertHere)*0.5*unit.nanometer/norm
startPosition = endPosition+outward
self._addMissingResiduesToChain(newChain, insertHere, startPosition, endPosition, residue, newAtoms, newPositions)
# Add the existing heavy atoms. # Create the new residue and add existing heavy atoms.
newResidue = newTopology.addResidue(residue.name, newChain)
for atom in residue.atoms(): for atom in residue.atoms():
if not heavyAtomsOnly or (atom.element is not None and atom.element != hydrogen): if not heavyAtomsOnly or (atom.element is not None and atom.element != hydrogen):
newAtom = newTopology.addAtom(atom.name, atom.element, newResidue) newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
existingAtomMap[atom] = newAtom existingAtomMap[atom] = newAtom
newPositions.append(self.positions[atom.index]) newPositions.append(self.positions[atom.index])
if residue in missingAtoms: if residue in self.missingAtoms:
# Find corresponding atoms in the residue and the template. # Find corresponding atoms in the residue and the template.
...@@ -148,18 +167,37 @@ class PDBFixer(object): ...@@ -148,18 +167,37 @@ class PDBFixer(object):
# Add the missing atoms. # Add the missing atoms.
addedAtomMap[residue] = {} addedAtomMap[residue] = {}
for atom in missingAtoms[residue]: for atom in self.missingAtoms[residue]:
newAtom = newTopology.addAtom(atom.name, atom.element, newResidue) newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
newAtoms.append(newAtom) newAtoms.append(newAtom)
addedAtomMap[residue][atom] = newAtom addedAtomMap[residue][atom] = newAtom
templatePosition = template.positions[atom.index].value_in_unit(unit.nanometer) templatePosition = template.positions[atom.index].value_in_unit(unit.nanometer)
newPositions.append((mm.Vec3(*np.dot(rotate, templatePosition+translate2))+translate1)*unit.nanometer) newPositions.append((mm.Vec3(*np.dot(rotate, templatePosition+translate2))+translate1)*unit.nanometer)
if residue in self.missingTerminals:
terminalsToAdd = self.missingTerminals[residue]
else:
terminalsToAdd = None
# If this is the end of the chain, add any missing residues that come after it.
if residue == chainResidues[-1]:
insertHere = [r[2] for r in self.missingResidues if r[0] == chain.index and r[1] > indexInChain]
if len(insertHere) > 0:
startPosition = self._computeResidueCenter(residue)
outward = startPosition-self.centroid
norm = unit.norm(outward)
if norm > 0*unit.nanometer:
outward *= len(insertHere)*0.5*unit.nanometer/norm
endPosition = startPosition+outward
self._addMissingResiduesToChain(newChain, insertHere, startPosition, endPosition, residue, newAtoms, newPositions)
newResidue = list(newChain.residues())[-1]
terminalsToAdd = ['OXT']
# If a terminal OXT is missing, add it. # If a terminal OXT is missing, add it.
if residue in missingTerminals: if terminalsToAdd is not None:
atomPositions = dict((atom.name, newPositions[atom.index].value_in_unit(unit.nanometer)) for atom in newResidue.atoms()) atomPositions = dict((atom.name, newPositions[atom.index].value_in_unit(unit.nanometer)) for atom in newResidue.atoms())
if 'OXT' in missingTerminals[residue]: if 'OXT' in terminalsToAdd:
newAtom = newTopology.addAtom('OXT', oxygen, newResidue) newAtom = newTopology.addAtom('OXT', oxygen, newResidue)
newAtoms.append(newAtom) newAtoms.append(newAtom)
addedOXT.append(newAtom) addedOXT.append(newAtom)
...@@ -168,48 +206,51 @@ class PDBFixer(object): ...@@ -168,48 +206,51 @@ class PDBFixer(object):
d_ca_c /= unit.sqrt(unit.dot(d_ca_c, d_ca_c)) d_ca_c /= unit.sqrt(unit.dot(d_ca_c, d_ca_c))
v = d_ca_o - d_ca_c*unit.dot(d_ca_c, d_ca_o) v = d_ca_o - d_ca_c*unit.dot(d_ca_c, d_ca_o)
newPositions.append((atomPositions['O']+2*v)*unit.nanometer) newPositions.append((atomPositions['O']+2*v)*unit.nanometer)
newTopology.createStandardBonds()
newTopology.createDisulfideBonds(newPositions)
# Add bonds from the original Topology. # Return the results.
for atom1, atom2 in self.topology.bonds(): return (newTopology, newPositions, newAtoms, existingAtomMap)
if atom1 in existingAtomMap and atom2 in existingAtomMap:
newTopology.addBond(existingAtomMap[atom1], existingAtomMap[atom2])
# Add bonds that connect to new atoms. def _computeResidueCenter(self, residue):
return unit.sum([self.pdb.positions[atom.index] for atom in residue.atoms()])/len(list(residue.atoms()))
for residue in missingAtoms: def _addMissingResiduesToChain(self, chain, residueNames, startPosition, endPosition, orientTo, newAtoms, newPositions):
template = self.templates[residue.name] orientToPositions = dict((atom.name, self.positions[atom.index]) for atom in orientTo.atoms())
atomsByName = dict((atom.name, atom) for atom in residue.atoms()) for i, residueName in enumerate(residueNames):
addedAtoms = addedAtomMap[residue] template = self.templates[residueName]
for atom1, atom2 in template.topology.bonds():
if atom1 in addedAtoms or atom2 in addedAtoms:
if atom1 in addedAtoms:
bondAtom1 = addedAtoms[atom1]
else:
bondAtom1 = existingAtomMap[atomsByName[atom1.name]]
if atom2 in addedAtoms:
bondAtom2 = addedAtoms[atom2]
else:
bondAtom2 = existingAtomMap[atomsByName[atom2.name]]
newTopology.addBond(bondAtom1, bondAtom2)
for atom1 in addedOXT:
atom2 = [atom for atom in atom1.residue.atoms() if atom.name == 'C'][0]
newTopology.addBond(atom1, atom2)
# Return the results. # Find a translation that best matches the adjacent residue.
return (newTopology, newPositions, newAtoms, existingAtomMap) points1 = []
points2 = []
for atom in template.topology.atoms():
if atom.name in orientToPositions:
points1.append(orientToPositions[atom.name].value_in_unit(unit.nanometer))
points2.append(template.positions[atom.index].value_in_unit(unit.nanometer))
(translate2, rotate, translate1) = _overlayPoints(points1, points2)
# Create the new residue.
newResidue = chain.topology.addResidue(residueName, chain)
translate = startPosition+(endPosition-startPosition)*(i+1.0)/(len(residueNames)+1.0)
for atom in template.topology.atoms():
newAtom = chain.topology.addAtom(atom.name, atom.element, newResidue)
newAtoms.append(newAtom)
templatePosition = template.positions[atom.index].value_in_unit(unit.nanometer)
newPositions.append(mm.Vec3(*np.dot(rotate, templatePosition))*unit.nanometer+translate)
def findNonstandardResidues(self): def findNonstandardResidues(self):
return [r for r in self.topology.residues() if r.name in substitutions] self.nonstandardResidues = [r for r in self.topology.residues() if r.name in substitutions]
def replaceNonstandardResidues(self, replaceResidues): def replaceNonstandardResidues(self):
if len(replaceResidues) > 0: if len(self.nonstandardResidues) > 0:
deleteAtoms = [] deleteAtoms = []
# Find atoms that should be deleted. # Find atoms that should be deleted.
for residue in replaceResidues: for residue in self.nonstandardResidues:
residue.name = substitutions[residue.name] residue.name = substitutions[residue.name]
template = self.templates[residue.name] template = self.templates[residue.name]
standardAtoms = set(atom.name for atom in template.topology.atoms()) standardAtoms = set(atom.name for atom in template.topology.atoms())
...@@ -224,6 +265,53 @@ class PDBFixer(object): ...@@ -224,6 +265,53 @@ class PDBFixer(object):
self.topology = modeller.topology self.topology = modeller.topology
self.positions = modeller.positions self.positions = modeller.positions
def findMissingResidues(self):
chains = [c for c in self.structure.iter_chains() if any(atom.record_name == 'ATOM' for atom in c.iter_atoms())]
chainWithGaps = {}
# Find the sequence of each chain, with gaps for missing residues.
for chain in chains:
minResidue = min(r.number for r in chain.iter_residues())
maxResidue = max(r.number for r in chain.iter_residues())
residues = [None]*(maxResidue-minResidue+1)
for r in chain.iter_residues():
residues[r.number-minResidue] = r.get_name()
chainWithGaps[chain] = residues
# Try to find the chain that matches each sequence.
chainSequence = {}
chainOffset = {}
for sequence in self.structure.sequences:
for chain in chains:
if chain.chain_id != sequence.chain_id:
continue
if chain in chainSequence:
continue
for offset in range(len(sequence.residues)-len(chainWithGaps[chain])+1):
if all(a == b or b == None for a,b in zip(sequence.residues[offset:], chainWithGaps[chain])):
chainSequence[chain] = sequence
chainOffset[chain] = offset
break
if chain in chainSequence:
break
# Now build the list of residues to add.
self.missingResidues = []
for structChain, topChain in zip(self.structure.iter_chains(), self.pdb.topology.chains()):
if structChain in chainSequence:
offset = chainOffset[structChain]
sequence = chainSequence[structChain].residues
gappedSequence = chainWithGaps[structChain]
index = 0
for i in range(len(sequence)):
if i < offset or i >= len(gappedSequence)+offset or gappedSequence[i-offset] is None:
self.missingResidues.append((topChain.index, index, sequence[i]))
else:
index += 1
def findMissingAtoms(self): def findMissingAtoms(self):
missingAtoms = {} missingAtoms = {}
missingTerminals = {} missingTerminals = {}
...@@ -249,18 +337,19 @@ class PDBFixer(object): ...@@ -249,18 +337,19 @@ class PDBFixer(object):
# Add missing terminal atoms. # Add missing terminal atoms.
terminals = [] terminals = []
if residue == chainResidues[-1]: if residue == chainResidues[-1] and not any(r[0] == chain.index and r[1] >= len(chainResidues) for r in self.missingResidues):
templateNames = set(atom.name for atom in template.topology.atoms()) templateNames = set(atom.name for atom in template.topology.atoms())
if 'OXT' not in atomNames and all(name in templateNames for name in ['C', 'O', 'CA']): if 'OXT' not in atomNames and all(name in templateNames for name in ['C', 'O', 'CA']):
terminals.append('OXT') terminals.append('OXT')
if len(terminals) > 0: if len(terminals) > 0:
missingTerminals[residue] = terminals missingTerminals[residue] = terminals
return (missingAtoms, missingTerminals) self.missingAtoms = missingAtoms
self.missingTerminals = missingTerminals
def addMissingAtoms(self, missingAtoms, missingTerminals): def addMissingAtoms(self):
# Create a Topology that 1) adds missing atoms, 2) removes all hydrogens, and 3) removes unknown molecules. # Create a Topology that 1) adds missing atoms, 2) removes all hydrogens, and 3) removes unknown molecules.
(newTopology, newPositions, newAtoms, existingAtomMap) = self._addAtomsToTopology(missingAtoms, missingTerminals, True, True) (newTopology, newPositions, newAtoms, existingAtomMap) = self._addAtomsToTopology(True, True)
if len(newAtoms) > 0: if len(newAtoms) > 0:
# Create a System for energy minimizing it. # Create a System for energy minimizing it.
...@@ -297,7 +386,7 @@ class PDBFixer(object): ...@@ -297,7 +386,7 @@ class PDBFixer(object):
# Now create a new Topology, including all atoms from the original one and adding the missing atoms. # Now create a new Topology, including all atoms from the original one and adding the missing atoms.
(newTopology2, newPositions2, newAtoms2, existingAtomMap2) = self._addAtomsToTopology(missingAtoms, missingTerminals, False, False) (newTopology2, newPositions2, newAtoms2, existingAtomMap2) = self._addAtomsToTopology(False, False)
# Copy over the minimized positions for the new atoms. # Copy over the minimized positions for the new atoms.
...@@ -365,8 +454,10 @@ if __name__=='__main__': ...@@ -365,8 +454,10 @@ if __name__=='__main__':
if len(sys.argv) < 2: if len(sys.argv) < 2:
ui.launchUI() ui.launchUI()
else: else:
fixer = PDBFixer(app.PDBFile(sys.argv[1])) fixer = PDBFixer(PdbStructure(open(sys.argv[1])))
fixer.replaceNonstandardResidues(fixer.findNonstandardResidues()) fixer.findMissingResidues()
missingAtoms, missingTerminals = fixer.findMissingAtoms() fixer.findNonstandardResidues()
fixer.addMissingAtoms(missingAtoms, missingTerminals) fixer.replaceNonstandardResidues()
fixer.findMissingAtoms()
fixer.addMissingAtoms()
app.PDBFile.writeFile(fixer.topology, fixer.positions, open('output.pdb', 'w')) app.PDBFile.writeFile(fixer.topology, fixer.positions, open('output.pdb', 'w'))
import simtk.openmm.app as app import simtk.openmm.app as app
from simtk.openmm.app.internal.pdbstructure import PdbStructure
from pdbfixer import PDBFixer, substitutions from pdbfixer import PDBFixer, substitutions
import uiserver import uiserver
import webbrowser import webbrowser
...@@ -7,19 +8,17 @@ from cStringIO import StringIO ...@@ -7,19 +8,17 @@ from cStringIO import StringIO
def startPageCallback(parameters, handler): def startPageCallback(parameters, handler):
if 'pdbfile' in parameters: if 'pdbfile' in parameters:
global fixer global fixer
pdb = app.PDBFile(parameters['pdbfile'].value.splitlines()) pdb = PdbStructure(parameters['pdbfile'].value.splitlines())
fixer = PDBFixer(pdb) fixer = PDBFixer(pdb)
displayConvertResiduesPage() displayConvertResiduesPage()
def convertResiduesPageCallback(parameters, handler): def convertResiduesPageCallback(parameters, handler):
global nonstandard fixer.nonstandardResidues = [residue for i, residue in enumerate(fixer.nonstandardResidues) if 'convert'+str(i) in parameters]
nonstandard = [residue for i, residue in enumerate(nonstandard) if 'convert'+str(i) in parameters] fixer.replaceNonstandardResidues()
fixer.replaceNonstandardResidues(nonstandard)
nonstandard = []
displayMissingAtomsPage() displayMissingAtomsPage()
def missingAtomsPageCallback(parameters, handler): def missingAtomsPageCallback(parameters, handler):
fixer.addMissingAtoms(missingAtoms, missingTerminals) fixer.addMissingAtoms()
displayDownloadPage() displayDownloadPage()
def downloadPageCallback(parameters, handler): def downloadPageCallback(parameters, handler):
...@@ -50,9 +49,9 @@ PDB File: <input type="file" name="pdbfile"/> ...@@ -50,9 +49,9 @@ PDB File: <input type="file" name="pdbfile"/>
def displayConvertResiduesPage(): def displayConvertResiduesPage():
uiserver.setCallback(convertResiduesPageCallback) uiserver.setCallback(convertResiduesPageCallback)
global nonstandard fixer.findMissingResidues()
nonstandard = fixer.findNonstandardResidues() fixer.findNonstandardResidues()
if len(nonstandard) == 0: if len(fixer.nonstandardResidues) == 0:
displayMissingAtomsPage() displayMissingAtomsPage()
return return
indexInChain = {} indexInChain = {}
...@@ -60,7 +59,7 @@ def displayConvertResiduesPage(): ...@@ -60,7 +59,7 @@ def displayConvertResiduesPage():
for i, residue in enumerate(chain.residues()): for i, residue in enumerate(chain.residues()):
indexInChain[residue] = i+1 indexInChain[residue] = i+1
table = "" table = ""
for i, residue in enumerate(nonstandard): for i, residue in enumerate(fixer.nonstandardResidues):
table += ' <tr><td>%d</td><td>%s %d</td><td>%s</td><td><input type="checkbox" name="convert%d" checked></td></tr>\n' % (residue.chain.index+1, residue.name, indexInChain[residue], substitutions[residue.name], i) table += ' <tr><td>%d</td><td>%s %d</td><td>%s</td><td><input type="checkbox" name="convert%d" checked></td></tr>\n' % (residue.chain.index+1, residue.name, indexInChain[residue], substitutions[residue.name], i)
uiserver.setContent(""" uiserver.setContent("""
<html> <html>
...@@ -82,10 +81,8 @@ This PDB file contains non-standard residues. Do you want to convert them to th ...@@ -82,10 +81,8 @@ This PDB file contains non-standard residues. Do you want to convert them to th
def displayMissingAtomsPage(): def displayMissingAtomsPage():
uiserver.setCallback(missingAtomsPageCallback) uiserver.setCallback(missingAtomsPageCallback)
global missingAtoms fixer.findMissingAtoms()
global missingTerminals allResidues = list(set(fixer.missingAtoms.iterkeys()).union(fixer.missingTerminals.iterkeys()))
missingAtoms, missingTerminals = fixer.findMissingAtoms()
allResidues = list(set(missingAtoms.iterkeys()).union(missingTerminals.iterkeys()))
allResidues.sort(key=lambda x: x.index) allResidues.sort(key=lambda x: x.index)
if len(allResidues) == 0: if len(allResidues) == 0:
displayDownloadPage() displayDownloadPage()
...@@ -97,10 +94,10 @@ def displayMissingAtomsPage(): ...@@ -97,10 +94,10 @@ def displayMissingAtomsPage():
table = "" table = ""
for residue in allResidues: for residue in allResidues:
atoms = [] atoms = []
if residue in missingAtoms: if residue in fixer.missingAtoms:
atoms.extend(atom.name for atom in missingAtoms[residue]) atoms.extend(atom.name for atom in fixer.missingAtoms[residue])
if residue in missingTerminals: if residue in fixer.missingTerminals:
atoms.extend(atom for atom in missingTerminals[residue]) atoms.extend(atom for atom in fixer.missingTerminals[residue])
table += ' <tr><td>%d</td><td>%s %d</td><td>%s</td></tr>\n' % (residue.chain.index+1, residue.name, indexInChain[residue], ', '.join(atoms)) table += ' <tr><td>%d</td><td>%s %d</td><td>%s</td></tr>\n' % (residue.chain.index+1, residue.name, indexInChain[residue], ', '.join(atoms))
uiserver.setContent(""" uiserver.setContent("""
<html> <html>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment