Commit cde53ba9 by Peter Eastman

Can add missing heavy atoms while preserving all existing atoms

parent 41c9f421
...@@ -72,40 +72,19 @@ def overlayPoints(points1, points2): ...@@ -72,40 +72,19 @@ def overlayPoints(points1, points2):
(u, s, v) = lin.svd(R) (u, s, v) = lin.svd(R)
return (-1*center2, np.dot(u, v).transpose(), center1) return (-1*center2, np.dot(u, v).transpose(), center1)
# Load the PDB file.
pdb = app.PDBFile(sys.argv[1])
# Load the templates.
templates = {}
for file in os.listdir('templates'):
templatePdb = app.PDBFile(os.path.join('templates', file))
name = templatePdb.topology.residues().next().name
templates[name] = templatePdb
# Loop over residues to see which ones have missing heavy atoms. def addMissingAtoms(pdb, templates, missingAtoms, heavyAtomsOnly, omitUnknownMolecules):
"""Create a new Topology in which missing atoms have been added."""
missingAtoms = {}
for residue in pdb.topology.residues(): newTopology = app.Topology()
if residue.name in templates: newPositions = []*unit.nanometer
template = templates[residue.name] newAtoms = []
atomNames = set(atom.name for atom in residue.atoms()) existingAtomMap = {}
missing = [] addedAtomMap = {}
for atom in template.topology.atoms(): addedOXT = []
if atom.name not in atomNames: for chain in pdb.topology.chains():
missing.append(atom) if omitUnknownMolecules and not any(residue.name in templates for residue in chain.residues()):
if len(missing) > 0: continue
missingAtoms[residue] = missing
# Create the new Topology.
newTopology = app.Topology()
newPositions = []*unit.nanometer
existingAtomMap = {}
addedAtomMap = {}
addedOXT = []
for chain in pdb.topology.chains():
newChain = newTopology.addChain() newChain = newTopology.addChain()
chainResidues = list(chain.residues()) chainResidues = list(chain.residues())
for residue in chain.residues(): for residue in chain.residues():
...@@ -114,8 +93,9 @@ for chain in pdb.topology.chains(): ...@@ -114,8 +93,9 @@ for chain in pdb.topology.chains():
# Add the existing heavy atoms. # Add the existing heavy atoms.
for atom in residue.atoms(): for atom in residue.atoms():
if atom.element is not None and atom.element != hydrogen: if not heavyAtomsOnly or (atom.element is not None and atom.element != hydrogen):
newAtom = newTopology.addAtom(atom.name, atom.element, newResidue) newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
newAtoms.append(newAtom)
existingAtomMap[atom] = newAtom existingAtomMap[atom] = newAtom
newPositions.append(pdb.positions[atom.index]) newPositions.append(pdb.positions[atom.index])
if residue in missingAtoms: if residue in missingAtoms:
...@@ -140,6 +120,7 @@ for chain in pdb.topology.chains(): ...@@ -140,6 +120,7 @@ for chain in pdb.topology.chains():
addedAtomMap[residue] = {} addedAtomMap[residue] = {}
for atom in missingAtoms[residue]: for atom in missingAtoms[residue]:
newAtom = newTopology.addAtom(atom.name, atom.element, newResidue) newAtom = newTopology.addAtom(atom.name, atom.element, newResidue)
newAtoms.append(newAtom)
addedAtomMap[residue][atom] = newAtom addedAtomMap[residue][atom] = newAtom
templatePosition = template.positions[atom.index].value_in_unit(unit.nanometer) templatePosition = template.positions[atom.index].value_in_unit(unit.nanometer)
newPositions.append((mm.Vec3(*np.dot(rotate, templatePosition+translate2))+translate1)*unit.nanometer) newPositions.append((mm.Vec3(*np.dot(rotate, templatePosition+translate2))+translate1)*unit.nanometer)
...@@ -150,6 +131,7 @@ for chain in pdb.topology.chains(): ...@@ -150,6 +131,7 @@ for chain in pdb.topology.chains():
atomPositions = dict((atom.name, pdb.positions[atom.index].value_in_unit(unit.nanometer)) for atom in residue.atoms()) atomPositions = dict((atom.name, pdb.positions[atom.index].value_in_unit(unit.nanometer)) for atom in residue.atoms())
if 'OXT' not in atomPositions and all(name in atomPositions for name in ['C', 'O', 'CA']): if 'OXT' not in atomPositions and all(name in atomPositions for name in ['C', 'O', 'CA']):
newAtom = newTopology.addAtom('OXT', oxygen, newResidue) newAtom = newTopology.addAtom('OXT', oxygen, newResidue)
newAtoms.append(newAtom)
addedOXT.append(newAtom) addedOXT.append(newAtom)
d_ca_o = atomPositions['O']-atomPositions['CA'] d_ca_o = atomPositions['O']-atomPositions['CA']
d_ca_c = atomPositions['C']-atomPositions['CA'] d_ca_c = atomPositions['C']-atomPositions['CA']
...@@ -157,15 +139,15 @@ for chain in pdb.topology.chains(): ...@@ -157,15 +139,15 @@ for chain in pdb.topology.chains():
v = d_ca_o - d_ca_c*unit.dot(d_ca_c, d_ca_o) v = d_ca_o - d_ca_c*unit.dot(d_ca_c, d_ca_o)
newPositions.append((atomPositions['O']+2*v)*unit.nanometer) newPositions.append((atomPositions['O']+2*v)*unit.nanometer)
# Add bonds from the original Topology. # Add bonds from the original Topology.
for atom1, atom2 in pdb.topology.bonds(): for atom1, atom2 in pdb.topology.bonds():
if atom1 in existingAtomMap and atom2 in existingAtomMap: if atom1 in existingAtomMap and atom2 in existingAtomMap:
newTopology.addBond(existingAtomMap[atom1], existingAtomMap[atom2]) newTopology.addBond(existingAtomMap[atom1], existingAtomMap[atom2])
# Add bonds that connect to new atoms. # Add bonds that connect to new atoms.
for residue in missingAtoms: for residue in missingAtoms:
template = templates[residue.name] template = templates[residue.name]
atomsByName = dict((atom.name, atom) for atom in residue.atoms()) atomsByName = dict((atom.name, atom) for atom in residue.atoms())
addedAtoms = addedAtomMap[residue] addedAtoms = addedAtomMap[residue]
...@@ -180,11 +162,82 @@ for residue in missingAtoms: ...@@ -180,11 +162,82 @@ for residue in missingAtoms:
else: else:
bondAtom2 = existingAtomMap[atomsByName[atom2.name]] bondAtom2 = existingAtomMap[atomsByName[atom2.name]]
newTopology.addBond(bondAtom1, bondAtom2) newTopology.addBond(bondAtom1, bondAtom2)
for atom1 in addedOXT: for atom1 in addedOXT:
atom2 = [atom for atom in atom1.residue.atoms() if atom.name == 'C'][0] atom2 = [atom for atom in atom1.residue.atoms() if atom.name == 'C'][0]
newTopology.addBond(atom1, atom2) newTopology.addBond(atom1, atom2)
app.PDBFile.writeFile(newTopology, newPositions, open('output.pdb', 'w')) # Return the results.
return (newTopology, newPositions, newAtoms, existingAtomMap)
# Load the PDB file.
pdb = app.PDBFile(sys.argv[1])
# Load the templates.
templates = {}
for file in os.listdir('templates'):
templatePdb = app.PDBFile(os.path.join('templates', file))
name = templatePdb.topology.residues().next().name
templates[name] = templatePdb
# Loop over residues to see which ones have missing heavy atoms.
missingAtoms = {}
for residue in pdb.topology.residues():
if residue.name in templates:
template = templates[residue.name]
atomNames = set(atom.name for atom in residue.atoms())
missing = []
for atom in template.topology.atoms():
if atom.name not in atomNames:
missing.append(atom)
if len(missing) > 0:
missingAtoms[residue] = missing
# Create a Topology that 1) adds missing atoms, 2) removes all hydrogens, and 3) removes unknown molecules.
(newTopology, newPositions, newAtoms, existingAtomMap) = addMissingAtoms(pdb, templates, missingAtoms, True, True)
# Create a System for energy minimizing it.
forcefield = app.ForceField('soft.xml') forcefield = app.ForceField('soft.xml')
forcefield.createSystem(newTopology) system = forcefield.createSystem(newTopology)
\ No newline at end of file
# Set any previously existing atoms to be massless, they so won't move.
for atom in existingAtomMap.itervalues():
system.setParticleMass(atom.index, 0.0)
# If any heavy atoms were omitted, add them back to avoid steric clashes.
nonbonded = [f for f in system.getForces() if isinstance(f, mm.CustomNonbondedForce)][0]
for atom in pdb.topology.atoms():
if atom.element is not None and atom.element != hydrogen and atom not in existingAtomMap:
system.addParticle(0.0)
nonbonded.addParticle([])
newPositions.append(pdb.positions[atom.index])
# For efficiency, only compute interactions that involve a new atom.
nonbonded.addInteractionGroup([atom.index for atom in newTopology.atoms() if atom in newAtoms], range(system.getNumParticles()))
# Do an energy minimization.
integrator = mm.LangevinIntegrator(300*unit.kelvin, 10/unit.picosecond, 5*unit.femtosecond)
context = mm.Context(system, integrator)
context.setPositions(newPositions)
mm.LocalEnergyMinimizer.minimize(context)
state = context.getState(getPositions=True)
# Now create a new Topology, including all atoms from the original one and adding the missing atoms.
(newTopology2, newPositions2, newAtoms2, existingAtomMap2) = addMissingAtoms(pdb, templates, missingAtoms, False, False)
# Copy over the minimized positions for the new atoms.
for a1, a2 in zip(newAtoms, newAtoms2):
newPositions2[a2.index] = state.getPositions()[a1.index]
app.PDBFile.writeFile(newTopology2, newPositions2, open('output.pdb', 'w'))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment