Commit 5cf78a2b by Peter Eastman Committed by GitHub

Can add hydrogens to nonstandard residues (#295)

* Can add hydrogens to nonstandard residues

* Build against OpenMM dev version

* Fixed typo

* Updated Python versions for CI

* Added required quotes
parent 268f1d7b
......@@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]
python-version: ["3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v2
......
name: pdbfixer-dev
channels:
- conda-forge/label/openmm_dev
- conda-forge
dependencies:
- pytest
- openmm
- openmm=8.1.1dev0
- numpy
- pip
\ No newline at end of file
......@@ -1058,10 +1058,8 @@ class PDBFixer(object):
Notes
-----
No extensive electrostatic analysis is performed; only default residue pKas are used.
Examples
--------
No extensive electrostatic analysis is performed; only default residue pKas are used. The pH is only
taken into account for standard amino acids.
Examples
--------
......@@ -1070,13 +1068,104 @@ class PDBFixer(object):
>>> fixer = PDBFixer(pdbid='1VII')
>>> fixer.addMissingHydrogens(pH=8.0)
"""
extraDefinitions = self._downloadNonstandardDefinitions()
variants = [self._describeVariant(res, extraDefinitions) for res in self.topology.residues()]
modeller = app.Modeller(self.topology, self.positions)
modeller.addHydrogens(pH=pH, forcefield=forcefield)
modeller.addHydrogens(pH=pH, forcefield=forcefield, variants=variants)
self.topology = modeller.topology
self.positions = modeller.positions
def _downloadNonstandardDefinitions(self):
"""If the file contains any nonstandard residues, download their definitions and build
the information needed to add hydrogens to them.
"""
app.Modeller._loadStandardHydrogenDefinitions()
resnames = set(residue.name for residue in self.topology.residues())
definitions = {}
for name in resnames:
if name not in app.Modeller._residueHydrogens:
# Try to download the definition.
try:
file = urlopen(f'https://files.rcsb.org/ligands/download/{name}.cif')
contents = file.read().decode('utf-8')
file.close()
except:
continue
# Record the atoms and bonds.
from openmm.app.internal.pdbx.reader.PdbxReader import PdbxReader
reader = PdbxReader(StringIO(contents))
data = []
reader.read(data)
block = data[0]
atomData = block.getObj('chem_comp_atom')
atomNameCol = atomData.getAttributeIndex('atom_id')
symbolCol = atomData.getAttributeIndex('type_symbol')
leavingCol = atomData.getAttributeIndex('pdbx_leaving_atom_flag')
atoms = [(row[atomNameCol], row[symbolCol].upper(), row[leavingCol] == 'Y') for row in atomData.getRowList()]
bondData = block.getObj('chem_comp_bond')
if bondData is None:
bonds = []
else:
atom1Col = bondData.getAttributeIndex('atom_id_1')
atom2Col = bondData.getAttributeIndex('atom_id_2')
bonds = [(row[atom1Col], row[atom2Col]) for row in bondData.getRowList()]
definitions[name] = (atoms, bonds)
return definitions
def _describeVariant(self, residue, definitions):
"""Build the variant description to pass to addHydrogens() for a residue."""
if residue.name not in definitions:
return None
atoms, bonds = definitions[residue.name]
# See if the heavy atoms are identical.
topologyHeavy = dict((atom.name, atom) for atom in residue.atoms() if atom.element is not None and atom.element != app.element.hydrogen)
definitionHeavy = dict((atom[0], atom) for atom in atoms if atom[1] != '' and atom[1] != 'H')
for name in topologyHeavy:
if name not in definitionHeavy or definitionHeavy[name][1] != topologyHeavy[name].element.symbol.upper():
# This atom isn't present in the definition
return None
for name in definitionHeavy:
if name not in topologyHeavy and not definitionHeavy[name][2]:
# This isn't a leaving atom, and it isn't present in the topology.
return None
# Build the list of hydrogens.
variant = []
definitionAtoms = dict((atom[0], atom) for atom in atoms)
topologyBonds = list(residue.bonds())
for name1, name2 in bonds:
if definitionAtoms[name1][1] == 'H':
h, parent = name1, name2
elif definitionAtoms[name2][1] == 'H':
h, parent = name2, name1
else:
continue
if definitionAtoms[h][2]:
# The hydrogen is marked as a leaving atom. Omit it if the parent is not present,
# or if the parent has an external bond.
if parent not in topologyHeavy:
continue
parentAtom = topologyHeavy[parent]
exclude = False
for atom1, atom2 in topologyBonds:
if atom1 == parentAtom and atom2.residue != residue:
exclude = True
break
if atom2 == parentAtom and atom1.residue != residue:
exclude = True
break
if exclude:
continue
variant.append((h, parent))
return variant
def addSolvent(self, boxSize=None, padding=None, boxVectors=None, positiveIon='Na+', negativeIon='Cl-', ionicStrength=0*unit.molar, boxShape='cube'):
"""Add a solvent box surrounding the structure.
......
This source diff could not be displayed because it is too large. You can view the blob instead.
import pdbfixer
from pathlib import Path
from io import StringIO
def test_nonstandard():
"""Test adding hydrogens to nonstandard residues."""
content = (Path(__file__).parent / "data" / "4JSV.pdb").read_text()
fixer = pdbfixer.PDBFixer(pdbfile=StringIO(content))
fixer.removeChains(chainIndices=[0, 1, 2])
fixer.addMissingHydrogens()
for residue in fixer.topology.residues():
count = sum(1 for atom in residue.atoms() if atom.element.symbol == 'H')
if residue.name == 'ADP':
assert count == 15
if residue.name in ('MG', 'MGF'):
assert count == 0
def test_leaving_atoms():
"""Test adding hydrogens to a nonstandard residue with leaving atoms."""
content = (Path(__file__).parent / "data" / "1BHL.pdb").read_text()
fixer = pdbfixer.PDBFixer(pdbfile=StringIO(content))
fixer.addMissingHydrogens()
for residue in fixer.topology.residues():
count = sum(1 for atom in residue.atoms() if atom.element.symbol == 'H')
if residue.name == 'CAS':
assert count == 10
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment