Merge pull request #39 from kyleabeauchamp/applymutation

Added applyMutations and tests.

Merge pull request #39 from kyleabeauchamp/applymutation
Added applyMutations and tests.
f7df6c00 · peastman · 4bbe112c · 1f4b9ed7 · f7df6c00 · f7df6c00
Commit f7df6c00 authored May 29, 2014 by peastman
Show whitespace changes
Inline Side-by-side

Showing with 157 additions and 0 deletions

pdbfixer/pdbfixer.py
+90 -0

tests/test_mutate.py
+67 -0

No files found.
--- a/pdbfixer/pdbfixer.py
+++ b/pdbfixer/pdbfixer.py
@@ -545,6 +545,96 @@ class PDBFixer(object):
            self.topology = modeller.topology
            self.positions = modeller.positions
+    def applyMutations(self, mutations, chain_id, which_model=0):
+        """Apply a list of amino acid substitutions to make a mutant protein.
+        Parameters
+        ----------
+        mutations : list of strings
+            Each string must include the resName (original), index, 
+            and resName (target).  For example, ALA-133-GLY will mutate
+            alanine 133 to glycine.  
+        chain_id : str
+            String based chain ID of the single chain you wish to mutate.
+        which_model : int, default = 0
+            Which model to use in the pdb structure.
+        Notes
+        -----
+        We require three letter codes to avoid possible ambiguitities.
+        We can't guarnatee that the resulting model is a good one; for 
+        significant changes in sequence, you should probably be using
+        a standalone homology modelling tool.
+        Examples
+        --------
+        Find nonstandard residues.
+        >>> fixer = PDBFixer(pdbid='1VII')
+        >>> fixer.applyMutations(["ALA-57-GLY"])
+        >>> fixer.findMissingResidues()     
+        >>> fixer.findMissingAtoms()
+        >>> fixer.addMissingAtoms()        
+        >>> fixer.addMissingHydrogens(7.0)
+        """
+        # First find residues based on our table of standard substitutions.
+        index_to_old_name = dict((r.index, r.name) for r in self.topology.residues())
+        index_to_new_residues = {}
+        chain_id_to_chain_number = dict((c.chain_id, k) for k, c in enumerate(self.structure.models[which_model].chains))
+        chain_number = chain_id_to_chain_number[chain_id]
+        resSeq_to_index = dict((r.number, k) for k, r in enumerate(self.structure.models[which_model].chains[chain_number]))
+        for mut_str in mutations:
+            old_name, resSeq, new_name = mut_str.split("-")
+            resSeq = int(resSeq)
+            index = resSeq_to_index[resSeq]
+            if index not in index_to_old_name:
+                raise(KeyError("Cannot find index %d in system!" % index))
+            if index_to_old_name[index] != old_name:
+                raise(ValueError("You asked to mutate %s %d, but that residue is actually %s!" % (old_name, index, index_to_old_name[index])))
+            try:
+                template = self.templates[new_name]
+            except KeyError:
+                raise(KeyError("Cannot find residue %s in template library!" % new_name))
+            index_to_new_residues[index] = new_name
+        residue_map = [(r, index_to_new_residues[r.index]) for r in self.topology.residues() if r.index in index_to_new_residues]
+        if len(residue_map) > 0:
+            deleteAtoms = []
+            # Find atoms that should be deleted.
+            for residue, replaceWith in residue_map:
+                if residue.chain.index != chain_number:
+                    continue  # Only modify specified chain
+                residue.name = replaceWith
+                template = self.templates[replaceWith]
+                standardAtoms = set(atom.name for atom in template.topology.atoms())
+                for atom in residue.atoms():
+                    if atom.element in (None, hydrogen) or atom.name not in standardAtoms:
+                        deleteAtoms.append(atom)
+            # Delete them.
+            modeller = app.Modeller(self.topology, self.positions)
+            modeller.delete(deleteAtoms)
+            self.topology = modeller.topology
+            self.positions = modeller.positions        
    def findMissingAtoms(self):
        """Find heavy atoms that are missing from the structure.

--- a/tests/test_mutate.py
+++ b/tests/test_mutate.py
+from nose.tools import ok_, eq_, raises
+import simtk.openmm.app as app
+import pdbfixer
+import tempfile
+def test_mutate_1():
+    fixer = pdbfixer.PDBFixer(pdbid='1VII')
+    fixer.applyMutations(["ALA-57-GLY"], "A")
+    fixer.findMissingResidues()     
+    fixer.findMissingAtoms()
+    fixer.addMissingAtoms()        
+    fixer.addMissingHydrogens(7.0)
+    temp_pdb = tempfile.NamedTemporaryFile()
+    app.PDBFile.writeFile(fixer.topology, fixer.positions, temp_pdb)
+    pdb = app.PDBFile(temp_pdb.name)
+    new_residue57 = list(fixer.topology.residues())[16]
+    assert new_residue57.name == "GLY", "Name of mutated residue did not change correctly!"
+    assert len(list(new_residue57.atoms())) == 7, "Should have 7 atoms in GLY 56"
+    atom_names = set([atom.name for atom in new_residue57.atoms()])
+    desired_atom_names = set(["N", "CA", "C", "O", "H", "HA3", "HA2"])
+    assert atom_names == desired_atom_names, "Atom Names did not match for GLY 56"    
+def test_mutate_2():
+    fixer = pdbfixer.PDBFixer(pdbid='1VII')
+    fixer.applyMutations(["ALA-57-LEU", "SER-56-ALA"], "A")
+    fixer.findMissingResidues()     
+    fixer.findMissingAtoms()
+    fixer.addMissingAtoms()        
+    fixer.addMissingHydrogens(7.0)
+    temp_pdb = tempfile.NamedTemporaryFile()
+    new_residue57 = list(fixer.topology.residues())[16]
+    new_residue56 = list(fixer.topology.residues())[15]
+    assert new_residue57.name == "LEU", "Name of mutated residue did not change correctly!"
+    assert new_residue56.name == "ALA", "Name of mutated residue did not change correctly!"
+    assert len(list(new_residue56.atoms())) == 10, "Should have 10 atoms in ALA 56"
+    assert len(list(new_residue57.atoms())) == 19, "Should have 19 atoms in LEU 57"
+    atom_names = set([atom.name for atom in new_residue56.atoms()])
+    desired_atom_names = set(["N", "CA", "CB", "C", "O", "H", "HA", "HB1", "HB2", "HB3"])
+    assert atom_names == desired_atom_names, "Atom Names did not match for ALA 56"
+    atom_names = set([atom.name for atom in new_residue57.atoms()])
+    desired_atom_names = set(["C", "N", "CA", "CB", "CG", "CD1", "CD2", "O", "H", "HA", "HB2", "HB3", "HD11", "HD12", "HD13", "HD21", "HD22", "HD23", "HG"])
+    assert atom_names == desired_atom_names, "Atom Names did not match for LEU 57"
+@raises(ValueError)
+def test_mutate_3_fails():
+    fixer = pdbfixer.PDBFixer(pdbid='1VII')
+    fixer.applyMutations(["ALA-57-GLY", "SER-57-ALA"], "A")
+@raises(KeyError)
+def test_mutate_4_fails():
+    fixer = pdbfixer.PDBFixer(pdbid='1VII')
+    fixer.applyMutations(["ALA-57-WTF", "SER-56-ALA"], "A")
+@raises(KeyError)
+def test_mutate_5_fails():
+    fixer = pdbfixer.PDBFixer(pdbid='1VII')
+    fixer.applyMutations(["ALA-1000-GLY", "SER-56-ALA"], "A")