Commit f7df6c00 by peastman

Merge pull request #39 from kyleabeauchamp/applymutation

Added applyMutations and tests.
parents 4bbe112c 1f4b9ed7
...@@ -545,6 +545,96 @@ class PDBFixer(object): ...@@ -545,6 +545,96 @@ class PDBFixer(object):
self.topology = modeller.topology self.topology = modeller.topology
self.positions = modeller.positions self.positions = modeller.positions
def applyMutations(self, mutations, chain_id, which_model=0):
"""Apply a list of amino acid substitutions to make a mutant protein.
Parameters
----------
mutations : list of strings
Each string must include the resName (original), index,
and resName (target). For example, ALA-133-GLY will mutate
alanine 133 to glycine.
chain_id : str
String based chain ID of the single chain you wish to mutate.
which_model : int, default = 0
Which model to use in the pdb structure.
Notes
-----
We require three letter codes to avoid possible ambiguitities.
We can't guarnatee that the resulting model is a good one; for
significant changes in sequence, you should probably be using
a standalone homology modelling tool.
Examples
--------
Find nonstandard residues.
>>> fixer = PDBFixer(pdbid='1VII')
>>> fixer.applyMutations(["ALA-57-GLY"])
>>> fixer.findMissingResidues()
>>> fixer.findMissingAtoms()
>>> fixer.addMissingAtoms()
>>> fixer.addMissingHydrogens(7.0)
"""
# First find residues based on our table of standard substitutions.
index_to_old_name = dict((r.index, r.name) for r in self.topology.residues())
index_to_new_residues = {}
chain_id_to_chain_number = dict((c.chain_id, k) for k, c in enumerate(self.structure.models[which_model].chains))
chain_number = chain_id_to_chain_number[chain_id]
resSeq_to_index = dict((r.number, k) for k, r in enumerate(self.structure.models[which_model].chains[chain_number]))
for mut_str in mutations:
old_name, resSeq, new_name = mut_str.split("-")
resSeq = int(resSeq)
index = resSeq_to_index[resSeq]
if index not in index_to_old_name:
raise(KeyError("Cannot find index %d in system!" % index))
if index_to_old_name[index] != old_name:
raise(ValueError("You asked to mutate %s %d, but that residue is actually %s!" % (old_name, index, index_to_old_name[index])))
try:
template = self.templates[new_name]
except KeyError:
raise(KeyError("Cannot find residue %s in template library!" % new_name))
index_to_new_residues[index] = new_name
residue_map = [(r, index_to_new_residues[r.index]) for r in self.topology.residues() if r.index in index_to_new_residues]
if len(residue_map) > 0:
deleteAtoms = []
# Find atoms that should be deleted.
for residue, replaceWith in residue_map:
if residue.chain.index != chain_number:
continue # Only modify specified chain
residue.name = replaceWith
template = self.templates[replaceWith]
standardAtoms = set(atom.name for atom in template.topology.atoms())
for atom in residue.atoms():
if atom.element in (None, hydrogen) or atom.name not in standardAtoms:
deleteAtoms.append(atom)
# Delete them.
modeller = app.Modeller(self.topology, self.positions)
modeller.delete(deleteAtoms)
self.topology = modeller.topology
self.positions = modeller.positions
def findMissingAtoms(self): def findMissingAtoms(self):
"""Find heavy atoms that are missing from the structure. """Find heavy atoms that are missing from the structure.
......
from nose.tools import ok_, eq_, raises
import simtk.openmm.app as app
import pdbfixer
import tempfile
def test_mutate_1():
fixer = pdbfixer.PDBFixer(pdbid='1VII')
fixer.applyMutations(["ALA-57-GLY"], "A")
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.addMissingAtoms()
fixer.addMissingHydrogens(7.0)
temp_pdb = tempfile.NamedTemporaryFile()
app.PDBFile.writeFile(fixer.topology, fixer.positions, temp_pdb)
pdb = app.PDBFile(temp_pdb.name)
new_residue57 = list(fixer.topology.residues())[16]
assert new_residue57.name == "GLY", "Name of mutated residue did not change correctly!"
assert len(list(new_residue57.atoms())) == 7, "Should have 7 atoms in GLY 56"
atom_names = set([atom.name for atom in new_residue57.atoms()])
desired_atom_names = set(["N", "CA", "C", "O", "H", "HA3", "HA2"])
assert atom_names == desired_atom_names, "Atom Names did not match for GLY 56"
def test_mutate_2():
fixer = pdbfixer.PDBFixer(pdbid='1VII')
fixer.applyMutations(["ALA-57-LEU", "SER-56-ALA"], "A")
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.addMissingAtoms()
fixer.addMissingHydrogens(7.0)
temp_pdb = tempfile.NamedTemporaryFile()
new_residue57 = list(fixer.topology.residues())[16]
new_residue56 = list(fixer.topology.residues())[15]
assert new_residue57.name == "LEU", "Name of mutated residue did not change correctly!"
assert new_residue56.name == "ALA", "Name of mutated residue did not change correctly!"
assert len(list(new_residue56.atoms())) == 10, "Should have 10 atoms in ALA 56"
assert len(list(new_residue57.atoms())) == 19, "Should have 19 atoms in LEU 57"
atom_names = set([atom.name for atom in new_residue56.atoms()])
desired_atom_names = set(["N", "CA", "CB", "C", "O", "H", "HA", "HB1", "HB2", "HB3"])
assert atom_names == desired_atom_names, "Atom Names did not match for ALA 56"
atom_names = set([atom.name for atom in new_residue57.atoms()])
desired_atom_names = set(["C", "N", "CA", "CB", "CG", "CD1", "CD2", "O", "H", "HA", "HB2", "HB3", "HD11", "HD12", "HD13", "HD21", "HD22", "HD23", "HG"])
assert atom_names == desired_atom_names, "Atom Names did not match for LEU 57"
@raises(ValueError)
def test_mutate_3_fails():
fixer = pdbfixer.PDBFixer(pdbid='1VII')
fixer.applyMutations(["ALA-57-GLY", "SER-57-ALA"], "A")
@raises(KeyError)
def test_mutate_4_fails():
fixer = pdbfixer.PDBFixer(pdbid='1VII')
fixer.applyMutations(["ALA-57-WTF", "SER-56-ALA"], "A")
@raises(KeyError)
def test_mutate_5_fails():
fixer = pdbfixer.PDBFixer(pdbid='1VII')
fixer.applyMutations(["ALA-1000-GLY", "SER-56-ALA"], "A")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment