Я понимаю, что это старый вопрос, но я обновляю его в надежде, что мой вопрос будет яснее.
Я получаю сообщение об ошибке, когда пытаюсь выполнить этот код, упомянутый ранее. Я делаю следующее:
import Bio.PDB.ResidueDepth as res_depth
surface = res_depth.get_surface(model)
Появляется следующая ошибка:
Traceback (most recent call last):
File "setupPpiDb.py", line 489, in <module>
calcEnergyTerms(pdbsToAnalyzeWithChains)
File "setupPpiDb.py", line 446, in calcEnergyTerms
surface = res_depth.get_surface(model)
AttributeError: type object 'ResidueDepth' has no attribute 'get_surface'
Я пытаюсь импортировать модуль и вызвать функцию в этом модуле. В модуле есть класс ResidueDepth, но мне не нужно это использовать. Есть ли способ просто вызвать функцию без вызова класса?
Вот исходный код ResidueDepth.py, если он полезен:
# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
# Copyright (C) 2017, Joao Rodrigues (j.p.g.l.m.rodrigues@gmail.com)
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Calculation of residue depth using command line tool MSMS.
This module uses Michel Sanner's MSMS program for the surface calculation.
See: http://mgltools.scripps.edu/packages/MSMS
Residue depth is the average distance of the atoms of a residue from
the solvent accessible surface.
Residue Depth:
>>> rd = ResidueDepth(model)
>>> print(rd[(chain_id, res_id)])
Direct MSMS interface, typical use:
>>> parser = PDBParser()
>>> structure = parser.get_structure('1FAT.pdb')
>>> model = structure[0]
>>> surface = get_surface(model)
The surface is a Numeric array with all the surface vertices.
Distance to surface:
>>> dist = min_dist(coord, surface)
where coord is the coord of an atom within the volume bound by
the surface (ie. atom depth).
To calculate the residue depth (average atom depth of the atoms
in a residue):
>>> rd = residue_depth(residue, surface)
"""
from __future__ import print_function
import os
import tempfile
import warnings
import numpy
from Bio.PDB import PDBParser
from Bio.PDB import Selection
from Bio.PDB.AbstractPropertyMap import AbstractPropertyMap
from Bio.PDB.Polypeptide import is_aa
from Bio import BiopythonWarning
from Bio import BiopythonDeprecationWarning
# PDB_TO_XYZR is a BASH script and will not run on Windows
# Since it only reads atmtypenumbers to a mapping structure we can replicate
# that functionality here and avoid this dependency altogether.
#
# Description of PDB_TO_XYZR
# Maps residue type and atom name pairs into Connolly ".atm" numeric codes
# as used in MS and AMS, and into actual radius values
#
# In case of missing radius, use 0.01
#
# Table 1: Atom Type to radius
_atomic_radii = {
# atom num dist Rexplicit Runited-atom
1: (0.57, 1.40, 1.40),
2: (0.66, 1.40, 1.60),
3: (0.57, 1.40, 1.40),
4: (0.70, 1.54, 1.70),
5: (0.70, 1.54, 1.80),
6: (0.70, 1.54, 2.00),
7: (0.77, 1.74, 2.00),
8: (0.77, 1.74, 2.00),
9: (0.77, 1.74, 2.00),
10: (0.67, 1.74, 1.74),
11: (0.70, 1.74, 1.86),
12: (1.04, 1.80, 1.85),
13: (1.04, 1.80, 1.80), # P, S, and LonePairs
14: (0.70, 1.54, 1.54), # non-protonated nitrogens
15: (0.37, 1.20, 1.20), # H, D hydrogen and deuterium
16: (0.70, 0.00, 1.50), # obsolete entry, purpose unknown
17: (3.50, 5.00, 5.00), # pseudoatom - big ball
18: (1.74, 1.97, 1.97), # Ca calcium
19: (1.25, 1.40, 1.40), # Zn zinc (traditional radius)
20: (1.17, 1.40, 1.40), # Cu copper (traditional radius)
21: (1.45, 1.30, 1.30), # Fe heme iron
22: (1.41, 1.49, 1.49), # Cd cadmium
23: (0.01, 0.01, 0.01), # pseudoatom - tiny dot
24: (0.37, 1.20, 0.00), # hydrogen vanishing if united-atoms
25: (1.16, 1.24, 1.24), # Fe not in heme
26: (1.36, 1.60, 1.60), # Mg magnesium
27: (1.17, 1.24, 1.24), # Mn manganese
28: (1.16, 1.25, 1.25), # Co cobalt
29: (1.17, 2.15, 2.15), # Se selenium
30: (3.00, 3.00, 3.00), # obsolete entry, original purpose unknown
31: (1.15, 1.15, 1.15), # Yb ytterbium +3 ion --- wild guess only
38: (0.95, 1.80, 1.80), # obsolete entry, original purpose unknown
}
# Table 2: Resname/Aname to Atom Type
# MSMS uses an awk/gawk pattern matching strategy that we cannot replicate
# We will take advantage of our parser to help us in the mapping.
def _get_atom_radius(atom, rtype='united'):
"""Translate an atom object to an atomic radius defined in MSMS (PRIVATE).
Uses information from the parent residue and the atom object to define
the atom type.
Returns the radius (float) according to the selected type:
- explicit (reads hydrogens)
- united (default)
"""
if rtype == 'explicit':
typekey = 1
elif rtype == 'united':
typekey = 2
else:
raise ValueError("Radius type (%r) not understood. "
"Must be 'explicit' or 'united'" % rtype)
resname = atom.parent.resname
het_atm = atom.parent.id[0]
at_name = atom.name
at_elem = atom.element
# Hydrogens
if at_elem == 'H' or at_elem == 'D':
return _atomic_radii[15][typekey]
# HETATMs
elif het_atm == 'W' and at_elem == 'O':
return _atomic_radii[2][typekey]
elif het_atm != ' ' and at_elem == 'CA':
return _atomic_radii[18][typekey]
elif het_atm != ' ' and at_elem == 'CD':
return _atomic_radii[22][typekey]
elif resname == 'ACE' and at_name == 'CA':
return _atomic_radii[9][typekey]
# Main chain atoms
elif at_name == 'N':
return _atomic_radii[4][typekey]
elif at_name == 'CA':
return _atomic_radii[7][typekey]
elif at_name == 'C':
return _atomic_radii[10][typekey]
elif at_name == 'O':
return _atomic_radii[1][typekey]
elif at_name == 'P':
return _atomic_radii[13][typekey]
# CB atoms
elif at_name == 'CB' and resname == 'ALA':
return _atomic_radii[9][typekey]
elif at_name == 'CB' and resname in set(('ILE', 'THR', 'VAL')):
return _atomic_radii[7][typekey]
elif at_name == 'CB':
return _atomic_radii[8][typekey]
# CG atoms
elif at_name == 'CG' and resname in set(('ASN', 'ASP', 'ASX', 'HIS', 'HIP',
'HIE', 'HID', 'HISN', 'HISL',
'LEU', 'PHE', 'TRP', 'TYR')):
return _atomic_radii[10][typekey]
elif at_name == 'CG' and resname == 'LEU':
return _atomic_radii[7][typekey]
elif at_name == 'CG':
return _atomic_radii[8][typekey]
# General amino acids in alphabetical order
elif resname == 'GLN' and at_elem == 'O':
return _atomic_radii[3][typekey]
elif resname == 'ACE' and at_name == 'CH3':
return _atomic_radii[9][typekey]
elif resname == 'ARG' and at_name == 'CD':
return _atomic_radii[8][typekey]
elif resname == 'ARG' and at_name in set(('NE', 'RE')):
return _atomic_radii[4][typekey]
elif resname == 'ARG' and at_name == 'CZ':
return _atomic_radii[10][typekey]
elif resname == 'ARG' and at_name.startswith(('NH', 'RH')):
return _atomic_radii[5][typekey]
elif resname == 'ASN' and at_name == 'OD1':
return _atomic_radii[1][typekey]
elif resname == 'ASN' and at_name == 'ND2':
return _atomic_radii[5][typekey]
elif resname == 'ASN' and at_name.startswith('AD'):
return _atomic_radii[3][typekey]
elif resname == 'ASP' and at_name.startswith(('OD', 'ED')):
return _atomic_radii[3][typekey]
elif resname == 'ASX' and at_name.startswith('OD1'):
return _atomic_radii[1][typekey]
elif resname == 'ASX' and at_name == 'ND2':
return _atomic_radii[3][typekey]
elif resname == 'ASX' and at_name.startswith(('OD', 'AD')):
return _atomic_radii[3][typekey]
elif resname in set(('CYS', 'CYX', 'CYM')) and at_name == 'SG':
return _atomic_radii[13][typekey]
elif resname in set(('CYS', 'MET')) and at_name.startswith('LP'):
return _atomic_radii[13][typekey]
elif resname == 'CUH' and at_name == 'SG':
return _atomic_radii[12][typekey]
elif resname == 'GLU' and at_name.startswith(('OE', 'EE')):
return _atomic_radii[3][typekey]
elif resname in set(('GLU', 'GLN', 'GLX')) and at_name == 'CD':
return _atomic_radii[10][typekey]
elif resname == 'GLN' and at_name == 'OE1':
return _atomic_radii[1][typekey]
elif resname == 'GLN' and at_name == 'NE2':
return _atomic_radii[5][typekey]
elif resname in set(('GLN', 'GLX')) and at_name.startswith('AE'):
return _atomic_radii[3][typekey]
# Histdines and friends
# There are 4 kinds of HIS rings: HIS (no protons), HID (proton on Delta),
# HIE (proton on epsilon), and HIP (protons on both)
# Protonated nitrogens are numbered 4, else 14
# HIS is treated here as the same as HIE
#
# HISL is a deprotonated HIS (the L means liganded)
elif resname in set(('HIS', 'HID', 'HIE', 'HIP', 'HISL')) and at_name in set(('CE1', 'CD2')): # noqa: E501
return _atomic_radii[11][typekey]
elif resname in set(('HIS', 'HID', 'HIE', 'HISL')) and at_name == 'ND1':
return _atomic_radii[14][typekey]
elif resname in set(('HID', 'HIP')) and at_name in set(('ND1', 'RD1')):
return _atomic_radii[4][typekey]
elif resname in set(('HIS', 'HIE', 'HIP')) and at_name in set(('NE2', 'RE2')):
return _atomic_radii[4][typekey]
elif resname in set(('HID', 'HISL')) and at_name in set(('NE2', 'RE2')):
return _atomic_radii[14][typekey]
elif resname in set(('HIS', 'HID', 'HIP', 'HISL')) and at_name.startswith(('AD', 'AE')): # noqa: E501
return _atomic_radii[4][typekey]
# More amino acids
elif resname == 'ILE' and at_name == 'CG1':
return _atomic_radii[8][typekey]
elif resname == 'ILE' and at_name == 'CG2':
return _atomic_radii[9][typekey]
elif resname == 'ILE' and at_name in set(('CD', 'CD1')):
return _atomic_radii[9][typekey]
elif resname == 'LEU' and at_name.startswith('CD'):
return _atomic_radii[9][typekey]
elif resname == 'LYS' and at_name in set(('CG', 'CD', 'CE')):
return _atomic_radii[8][typekey]
elif resname == 'LYS' and at_name in set(('NZ', 'KZ')):
return _atomic_radii[6][typekey]
elif resname == 'MET' and at_name == 'SD':
return _atomic_radii[13][typekey]
elif resname == 'MET' and at_name == 'CE':
return _atomic_radii[9][typekey]
elif resname == 'PHE' and at_name.startswith(('CD', 'CE', 'CZ')):
return _atomic_radii[11][typekey]
elif resname == 'PRO' and at_name in set(('CG', 'CD')):
return _atomic_radii[8][typekey]
elif resname == 'CSO' and at_name in set(('SE', 'SEG')):
return _atomic_radii[9][typekey]
elif resname == 'CSO' and at_name.startswith('OD'):
return _atomic_radii[3][typekey]
elif resname == 'SER' and at_name == 'OG':
return _atomic_radii[2][typekey]
elif resname == 'THR' and at_name == 'OG1':
return _atomic_radii[2][typekey]
elif resname == 'THR' and at_name == 'CG2':
return _atomic_radii[9][typekey]
elif resname == 'TRP' and at_name == 'CD1':
return _atomic_radii[11][typekey]
elif resname == 'TRP' and at_name in set(('CD2', 'CE2')):
return _atomic_radii[10][typekey]
elif resname == 'TRP' and at_name == 'NE1':
return _atomic_radii[4][typekey]
elif resname == 'TRP' and at_name in set(('CE3', 'CZ2', 'CZ3', 'CH2')):
return _atomic_radii[11][typekey]
elif resname == 'TYR' and at_name in set(('CD1', 'CD2', 'CE1', 'CE2')):
return _atomic_radii[11][typekey]
elif resname == 'TYR' and at_name == 'CZ':
return _atomic_radii[10][typekey]
elif resname == 'TYR' and at_name == 'OH':
return _atomic_radii[2][typekey]
elif resname == 'VAL' and at_name in set(('CG1', 'CG2')):
return _atomic_radii[9][typekey]
elif at_name in set(('CD', 'CD')):
return _atomic_radii[8][typekey]
# Co-factors, and other weirdos
elif resname in set(('FS3', 'FS4')) and at_name.startswith('FE') \
and at_name.endswith(('1', '2', '3', '4', '5', '6', '7')):
return _atomic_radii[21][typekey]
elif resname in set(('FS3', 'FS4')) and at_name.startswith('S') \
and at_name.endswith(('1', '2', '3', '4', '5', '6', '7')):
return _atomic_radii[13][typekey]
elif resname == 'FS3' and at_name == 'OXO':
return _atomic_radii[1][typekey]
elif resname == 'FEO' and at_name in set(('FE1', 'FE2')):
return _atomic_radii[21][typekey]
elif resname == 'HEM' and at_name in set(('O1', 'O2')):
return _atomic_radii[1][typekey]
elif resname == 'HEM' and at_name == 'FE':
return _atomic_radii[21][typekey]
elif resname == 'HEM' and at_name in set(('CHA', 'CHB', 'CHC', 'CHD',
'CAB', 'CAC', 'CBB', 'CBC')):
return _atomic_radii[11][typekey]
elif resname == 'HEM' and at_name in set(('NA', 'NB', 'NC', 'ND',
'N A', 'N B', 'N C', 'N D')):
return _atomic_radii[14][typekey]
elif resname == 'HEM' and at_name in set(('C1A', 'C1B', 'C1C', 'C1D',
'C2A', 'C2B', 'C2C', 'C2D',
'C3A', 'C3B', 'C3C', 'C3D',
'C4A', 'C4B', 'C4C', 'C4D',
'CGA', 'CGD')):
return _atomic_radii[10][typekey]
elif resname == 'HEM' and at_name in set(('CMA', 'CMB', 'CMC', 'CMD')):
return _atomic_radii[9][typekey]
elif resname == 'HEM' and at_name == 'OH2':
return _atomic_radii[2][typekey]
elif resname == 'AZI' and at_name in set(('N1', 'N2', 'N3')):
return _atomic_radii[14][typekey]
elif resname == 'MPD' and at_name in set(('C1', 'C5', 'C6')):
return _atomic_radii[9][typekey]
elif resname == 'MPD' and at_name == 'C2':
return _atomic_radii[10][typekey]
elif resname == 'MPD' and at_name == 'C3':
return _atomic_radii[8][typekey]
elif resname == 'MPD' and at_name == 'C4':
return _atomic_radii[7][typekey]
elif resname == 'MPD' and at_name in set(('O7', 'O8')):
return _atomic_radii[2][typekey]
elif resname in set(('SO4', 'SUL')) and at_name == 'S':
return _atomic_radii[13][typekey]
elif resname in set(('SO4', 'SUL', 'PO4', 'PHO')) and at_name in set(('O1', 'O2', 'O3', 'O4')): # noqa: E501
return _atomic_radii[3][typekey]
elif resname == 'PC ' and at_name in set(('O1', 'O2', 'O3', 'O4')):
return _atomic_radii[3][typekey]
elif resname == 'PC ' and at_name == 'P1':
return _atomic_radii[13][typekey]
elif resname == 'PC ' and at_name in set(('C1', 'C2')):
return _atomic_radii[8][typekey]
elif resname == 'PC ' and at_name in set(('C3', 'C4', 'C5')):
return _atomic_radii[9][typekey]
elif resname == 'PC ' and at_name == 'N1':
return _atomic_radii[14][typekey]
elif resname == 'BIG' and at_name == 'BAL':
return _atomic_radii[17][typekey]
elif resname in set(('POI', 'DOT')) and at_name in set(('POI', 'DOT')):
return _atomic_radii[23][typekey]
elif resname == 'FMN' and at_name in set(('N1', 'N5', 'N10')):
return _atomic_radii[4][typekey]
elif resname == 'FMN' and at_name in set(('C2', 'C4', 'C7', 'C8', 'C10',
'C4A', 'C5A', 'C9A')):
return _atomic_radii[10][typekey]
elif resname == 'FMN' and at_name in set(('O2', 'O4')):
return _atomic_radii[1][typekey]
elif resname == 'FMN' and at_name == 'N3':
return _atomic_radii[14][typekey]
elif resname == 'FMN' and at_name in set(('C6', 'C9')):
return _atomic_radii[11][typekey]
elif resname == 'FMN' and at_name in set(('C7M', 'C8M')):
return _atomic_radii[9][typekey]
elif resname == 'FMN' and at_name.startswith(('C1', 'C2', 'C3', 'C4', 'C5')): # noqa: E501
return _atomic_radii[8][typekey]
elif resname == 'FMN' and at_name.startswith(('O2', 'O3', 'O4')):
return _atomic_radii[2][typekey]
elif resname == 'FMN' and at_name.startswith('O5'):
return _atomic_radii[3][typekey]
elif resname == 'FMN' and at_name in set(('OP1', 'OP2', 'OP3')):
return _atomic_radii[3][typekey]
elif resname in set(('ALK', 'MYR')) and at_name == 'OT1':
return _atomic_radii[3][typekey]
elif resname in set(('ALK', 'MYR')) and at_name == 'C01':
return _atomic_radii[10][typekey]
elif resname == 'ALK' and at_name == 'C16':
return _atomic_radii[9][typekey]
elif resname == 'MYR' and at_name == 'C14':
return _atomic_radii[9][typekey]
elif resname in set(('ALK', 'MYR')) and at_name.startswith('C'):
return _atomic_radii[8][typekey]
# Metals
elif at_elem == 'CU':
return _atomic_radii[20][typekey]
elif at_elem == 'ZN':
return _atomic_radii[19][typekey]
elif at_elem == 'MN':
return _atomic_radii[27][typekey]
elif at_elem == 'FE':
return _atomic_radii[25][typekey]
elif at_elem == 'MG':
return _atomic_radii[26][typekey]
elif at_elem == 'CO':
return _atomic_radii[28][typekey]
elif at_elem == 'SE':
return _atomic_radii[29][typekey]
elif at_elem == 'YB':
return _atomic_radii[31][typekey]
# Others
elif at_name == 'SEG':
return _atomic_radii[9][typekey]
elif at_name == 'OXT':
return _atomic_radii[3][typekey]
# Catch-alls
elif at_name.startswith(('OT', 'E')):
return _atomic_radii[3][typekey]
elif at_name.startswith('S'):
return _atomic_radii[13][typekey]
elif at_name.startswith('C'):
return _atomic_radii[7][typekey]
elif at_name.startswith('A'):
return _atomic_radii[11][typekey]
elif at_name.startswith('O'):
return _atomic_radii[1][typekey]
elif at_name.startswith(('N', 'R')):
return _atomic_radii[4][typekey]
elif at_name.startswith('K'):
return _atomic_radii[6][typekey]
elif at_name in set(('PA', 'PB', 'PC', 'PD')):
return _atomic_radii[13][typekey]
elif at_name.startswith('P'):
return _atomic_radii[13][typekey]
elif resname in set(('FAD', 'NAD', 'AMX', 'APU')) and at_name.startswith('O'): # noqa: E501
return _atomic_radii[1][typekey]
elif resname in set(('FAD', 'NAD', 'AMX', 'APU')) and at_name.startswith('N'): # noqa: E501
return _atomic_radii[4][typekey]
elif resname in set(('FAD', 'NAD', 'AMX', 'APU')) and at_name.startswith('C'): # noqa: E501
return _atomic_radii[7][typekey]
elif resname in set(('FAD', 'NAD', 'AMX', 'APU')) and at_name.startswith('P'): # noqa: E501
return _atomic_radii[13][typekey]
elif resname in set(('FAD', 'NAD', 'AMX', 'APU')) and at_name.startswith('H'): # noqa: E501
return _atomic_radii[15][typekey]
else:
warnings.warn('{}:{} not in radii library.'.format(at_name, resname),
BiopythonWarning)
return 0.01
def _read_vertex_array(filename):
"""Read the vertex list into a Numeric array (PRIVATE)."""
with open(filename, "r") as fp:
vertex_list = []
for l in fp:
sl = l.split()
if len(sl) != 9:
# skip header
continue
vl = [float(x) for x in sl[0:3]]
vertex_list.append(vl)
return numpy.array(vertex_list)
def get_surface(model, PDB_TO_XYZR=None, MSMS="msms"):
"""Represent molecular surface as a vertex list array.
Return a Numpy array that represents the vertex list of the
molecular surface.
Arguments:
- PDB_TO_XYZR - deprecated, ignore this.
- MSMS - msms executable (used as argument to os.system)
"""
# Issue warning if PDB_TO_XYZR is given
if PDB_TO_XYZR is not None:
warnings.warn(("PDB_TO_XYZR argument will be deprecated soon"
" in favor of an internal mapping algorithm."),
BiopythonDeprecationWarning)
# Replace pdb_to_xyzr
# Make x,y,z,radius file
atom_list = Selection.unfold_entities(model, 'A')
xyz_tmp = tempfile.mktemp()
with open(xyz_tmp, 'w') as pdb_to_xyzr:
for atom in atom_list:
x, y, z = atom.coord
radius = _get_atom_radius(atom, rtype='united')
print('{:6.3f}\t{:6.3f}\t{:6.3f}\t{:1.2f}'.format(x, y, z, radius),
file=pdb_to_xyzr)
# make surface
surface_tmp = tempfile.mktemp()
MSMS = MSMS + " -probe_radius 1.5 -if %s -of %s > " + tempfile.mktemp()
make_surface = MSMS % (xyz_tmp, surface_tmp)
os.system(make_surface)
surface_file = surface_tmp + ".vert"
assert os.path.isfile(surface_file), \
"Failed to generate surface file using command:\n%s" % make_surface
# read surface vertices from vertex file
surface = _read_vertex_array(surface_file)
return surface
def min_dist(coord, surface):
"""Return minimum distance between coord and surface."""
d = surface - coord
d2 = numpy.sum(d * d, 1)
return numpy.sqrt(min(d2))
def residue_depth(residue, surface):
"""Residue depth as average depth of all its atoms.
Return average distance to surface for all atoms in a residue,
ie. the residue depth.
"""
atom_list = residue.get_unpacked_list()
length = len(atom_list)
d = 0
for atom in atom_list:
coord = atom.get_coord()
d = d + min_dist(coord, surface)
return d / length
def ca_depth(residue, surface):
if not residue.has_id("CA"):
return None
ca = residue["CA"]
coord = ca.get_coord()
return min_dist(coord, surface)
class ResidueDepth(AbstractPropertyMap):
"""Calculate residue and CA depth for all residues."""
def __init__(self, model, pdb_file=None):
"""Initialize the class."""
# Issue warning if pdb_file is given
if pdb_file is not None:
warnings.warn("ResidueDepth no longer requires a pdb file. "
"This argument will be removed in a future release "
"of Biopython.", BiopythonDeprecationWarning)
depth_dict = {}
depth_list = []
depth_keys = []
# get_residue
residue_list = Selection.unfold_entities(model, 'R')
# make surface from PDB file using MSMS
surface = get_surface(model)
# calculate rdepth for each residue
for residue in residue_list:
if not is_aa(residue):
continue
rd = residue_depth(residue, surface)
ca_rd = ca_depth(residue, surface)
# Get the key
res_id = residue.get_id()
chain_id = residue.get_parent().get_id()
depth_dict[(chain_id, res_id)] = (rd, ca_rd)
depth_list.append((residue, (rd, ca_rd)))
depth_keys.append((chain_id, res_id))
# Update xtra information
residue.xtra['EXP_RD'] = rd
residue.xtra['EXP_RD_CA'] = ca_rd
AbstractPropertyMap.__init__(self, depth_dict, depth_keys, depth_list)