import MDAnalysis
from MDAnalysis.analysis.rms import * #this pulls in an rmsd function
universe = MDAnalysis.Universe('shmt2.prmtop', 'shmt2.dcd')
prot = universe.select_atoms('protein')
startref = prot.positions
universe.trajectory[-1]
endref = prot.positions
startrmsd = []
endrmsd = []
for ts in universe.trajectory:
startrmsd.append(rmsd(startref,prot.positions))
endrmsd.append(rmsd(endref,prot.positions))
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(startrmsd,label='start')
plt.plot(endrmsd,label='end')
plt.legend(loc='lower center')
plt.ylabel('RMSD')
plt.xlabel("Frame");
Analysis of native contacts q over a trajectory.
What counts as a contact?
x = np.arange(0.0, 6.0, 0.01)
y = 1.0/(1.0 + np.exp(5.0*(x-1.8*2)))
plt.axvline(2,color='grey',linewidth=1)
plt.plot(x,y);
from MDAnalysis.analysis.contacts import *
help(MDAnalysis.analysis.contacts.Contacts)
Help on class Contacts in module MDAnalysis.analysis.contacts: class Contacts(MDAnalysis.analysis.base.AnalysisBase) | Contacts(u, select, refgroup, method='hard_cut', radius=4.5, pbc=True, kwargs=None, **basekwargs) | | Calculate contacts based observables. | | The standard methods used in this class calculate the fraction of native | contacts *Q* from a trajectory. | | | .. rubric:: Contact API | | By defining your own method it is possible to calculate other observables | that only depend on the distances and a possible reference distance. The | **Contact API** prescribes that this method must be a function with call | signature ``func(r, r0, **kwargs)`` and must be provided in the keyword | argument `method`. | | Attributes | ---------- | results.timeseries : numpy.ndarray | 2D array containing *Q* for all refgroup pairs and analyzed frames | | timeseries : numpy.ndarray | Alias to the :attr:`results.timeseries` attribute. | | .. deprecated:: 2.0.0 | Will be removed in MDAnalysis 3.0.0. Please use | :attr:`results.timeseries` instead. | | | .. versionchanged:: 1.0.0 | ``save()`` method has been removed. Use ``np.savetxt()`` on | :attr:`Contacts.results.timeseries` instead. | .. versionchanged:: 1.0.0 | added ``pbc`` attribute to calculate distances using PBC. | .. versionchanged:: 2.0.0 | :attr:`timeseries` results are now stored in a | :class:`MDAnalysis.analysis.base.Results` instance. | .. versionchanged:: 2.2.0 | :class:`Contacts` accepts both AtomGroup and string for `select` | | Method resolution order: | Contacts | MDAnalysis.analysis.base.AnalysisBase | builtins.object | | Methods defined here: | | __init__(self, u, select, refgroup, method='hard_cut', radius=4.5, pbc=True, kwargs=None, **basekwargs) | Parameters | ---------- | u : Universe | trajectory | select : tuple(AtomGroup, AtomGroup) | tuple(string, string) | two contacting groups that change over time | refgroup : tuple(AtomGroup, AtomGroup) | two contacting atomgroups in their reference conformation. This | can also be a list of tuples containing different atom groups | radius : float, optional (4.5 Angstroms) | radius within which contacts exist in refgroup | method : string | callable (optional) | Can either be one of ``['hard_cut' , 'soft_cut', 'radius_cut']`` or a callable | with call signature ``func(r, r0, **kwargs)`` (the "Contacts API"). | pbc : bool (optional) | Uses periodic boundary conditions to calculate distances if set to ``True``; the | default is ``True``. | kwargs : dict, optional | dictionary of additional kwargs passed to `method`. Check | respective functions for reasonable values. | verbose : bool (optional) | Show detailed progress of the calculation if set to ``True``; the | default is ``False``. | | Notes | ----- | | .. versionchanged:: 1.0.0 | Changed `selection` keyword to `select` | | ---------------------------------------------------------------------- | Readonly properties defined here: | | timeseries | | ---------------------------------------------------------------------- | Methods inherited from MDAnalysis.analysis.base.AnalysisBase: | | run(self, start=None, stop=None, step=None, frames=None, verbose=None) | Perform the calculation | | Parameters | ---------- | start : int, optional | start frame of analysis | stop : int, optional | stop frame of analysis | step : int, optional | number of frames to skip between each analysed frame | frames : array_like, optional | array of integers or booleans to slice trajectory; `frames` can | only be used *instead* of `start`, `stop`, and `step`. Setting | *both* `frames` and at least one of `start`, `stop`, `step` to a | non-default value will raise a :exc:`ValueError`. | | .. versionadded:: 2.2.0 | | verbose : bool, optional | Turn on verbosity | | | .. versionchanged:: 2.2.0 | Added ability to analyze arbitrary frames by passing a list of | frame indices in the `frames` keyword argument. | | ---------------------------------------------------------------------- | Data descriptors inherited from MDAnalysis.analysis.base.AnalysisBase: | | __dict__ | dictionary for instance variables (if defined) | | __weakref__ | list of weak references to the object (if defined)
import MDAnalysis
from MDAnalysis.analysis.contacts import *
u = MDAnalysis.Universe('shmt2.prmtop','shmt2.dcd')
sel_basic = "(resname ARG or resname LYS) and (name NH* or name NZ)"
sel_acidic = "(resname ASP or resname GLU) and (name OE* or name OD*)"
acidic = u.select_atoms(sel_acidic)
basic = u.select_atoms(sel_basic)
CA = Contacts(u, select=(sel_acidic, sel_basic), refgroup=(acidic, basic))
CA.run()
plt.plot(CA.results.timeseries[:, 0], CA.results.timeseries[:, 1])
plt.ylim(0,1)
plt.xlabel("Frame"); plt.ylabel("Fraction native contacts");
CAsoft = Contacts(u, select=(sel_acidic, sel_basic), refgroup=(acidic, basic),method='soft_cut')
CAsoft.run()
plt.plot(CAsoft.results.timeseries[:, 0], CAsoft.results.timeseries[:, 1],label="soft")
plt.plot(CA.results.timeseries[:, 0], CA.results.timeseries[:, 1],label="hard")
plt.legend(loc='best')
plt.ylim(0,1)
plt.xlabel("Frame"); plt.ylabel("Fraction native contacts");
Analysis of distances between groups of atoms. Precursor to contact analysis.
from MDAnalysis.analysis.distances import *
distarr = distance_array(acidic.positions,basic.positions)
import MDAnalysis.analysis.distances
help(MDAnalysis.analysis.distances.distance_array)
Help on function distance_array in module MDAnalysis.lib.distances: distance_array(reference: Union[numpy.ndarray, ForwardRef('AtomGroup')], configuration: Union[numpy.ndarray, ForwardRef('AtomGroup')], box: Optional[numpy.ndarray] = None, result: Optional[numpy.ndarray] = None, backend: str = 'serial') -> numpy.ndarray Calculate all possible distances between a reference set and another configuration. If there are ``n`` positions in `reference` and ``m`` positions in `configuration`, a distance array of shape ``(n, m)`` will be computed. If the optional argument `box` is supplied, the minimum image convention is applied when calculating distances. Either orthogonal or triclinic boxes are supported. If a 2D numpy array of dtype ``numpy.float64`` with the shape ``(n, m)`` is provided in `result`, then this preallocated array is filled. This can speed up calculations. Parameters ---------- reference :numpy.ndarray or :class:`~MDAnalysis.core.groups.AtomGroup` Reference coordinate array of shape ``(3,)`` or ``(n, 3)`` (dtype is arbitrary, will be converted to ``numpy.float32`` internally). Also accepts an :class:`~MDAnalysis.core.groups.AtomGroup`. configuration : numpy.ndarray or :class:`~MDAnalysis.core.groups.AtomGroup` Configuration coordinate array of shape ``(3,)`` or ``(m, 3)`` (dtype is arbitrary, will be converted to ``numpy.float32`` internally). Also accepts an :class:`~MDAnalysis.core.groups.AtomGroup`. box : array_like, optional The unitcell dimensions of the system, which can be orthogonal or triclinic and must be provided in the same format as returned by :attr:`MDAnalysis.coordinates.timestep.Timestep.dimensions`: ``[lx, ly, lz, alpha, beta, gamma]``. result : numpy.ndarray, optional Preallocated result array which must have the shape ``(n, m)`` and dtype ``numpy.float64``. Avoids creating the array which saves time when the function is called repeatedly. backend : {'serial', 'OpenMP'}, optional Keyword selecting the type of acceleration. Returns ------- d : numpy.ndarray (``dtype=numpy.float64``, ``shape=(n, m)``) Array containing the distances ``d[i,j]`` between reference coordinates ``i`` and configuration coordinates ``j``. .. versionchanged:: 0.13.0 Added *backend* keyword. .. versionchanged:: 0.19.0 Internal dtype conversion of input coordinates to ``numpy.float32``. Now also accepts single coordinates as input. .. versionchanged:: 2.3.0 Can now accept an :class:`~MDAnalysis.core.groups.AtomGroup` as an argument in any position and checks inputs using type hinting.
plt.matshow(distarr)
plt.colorbar()
plt.xlabel('Basic'); plt.ylabel('Acidic');
%%html
<div id="mdmatrix" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="http://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');
var divid = '#mdmatrix';
jQuery(divid).asker({
id: divid,
question: "The previous matrix is?",
answers: ["Gaussian",'Symmetric','Diagonal',
'All of the above','None of the above'],
server: "http://bits.csb.pitt.edu/asker.js/example/asker.cgi",
charter: chartmaker})
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();
</script>
Really only interested in short distances
origdistarr = distance_array(acidic.positions,basic.positions)
origdistarr[origdistarr > 6] = 0
plt.matshow(origdistarr)
plt.colorbar()
plt.xlabel('Basic'); plt.ylabel('Acidic');
distarr = np.zeros((acidic.n_atoms, basic.n_atoms))
for ts in u.trajectory:
distarr += distance_array(acidic.positions,basic.positions)
distarr /= universe.trajectory.n_frames
distarr[distarr > 6] = 0
plt.matshow(distarr)
plt.colorbar()
plt.xlabel('Basic'); plt.ylabel('Acidic');
%%html
<div id="dmat" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="http://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');
var divid = '#dmat';
jQuery(divid).asker({
id: divid,
question: "What are the elements of this matrix?",
answers: ["Max distance",'Final Distance','Average Distance',
'Minimum distance'],
server: "http://bits.csb.pitt.edu/asker.js/example/asker.cgi",
charter: chartmaker})
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();
</script>
diff = origdistarr - distarr
plt.matshow(diff,cmap='seismic')
clb = plt.colorbar()
clb.set_label('')
plt.xlabel('Basic'); plt.ylabel('Acidic');
I'm most interested in the loop from residue 368 to 383 (resid 368:383
).
How is this loop changing over the course of the simulation (RMSD to first)?
What is happening with the number of hydrogen bonds it is making with the protein?
u.trajectory[0]
loop = u.select_atoms('resid 368:383')
ref = loop.positions
rmsds = []
for ts in u.trajectory:
rmsds.append(rmsd(ref,loop.positions))
plt.plot(rmsds);
from MDAnalysis.analysis.hydrogenbonds import HydrogenBondAnalysis
h = HydrogenBondAnalysis(u,'resid 368:383','protein')
h.run();
counts = h.count_by_time()
plt.plot(h.frames,h.count_by_time())
plt.xlabel("Frame #"); plt.ylabel("HBond Count");
%%html
<div id="hbond1" style="width: 500px"></div>
<script>
$('head').append('<link rel="stylesheet" href="http://bits.csb.pitt.edu/asker.js/themes/asker.default.css" />');
var divid = '#hbond1';
jQuery(divid).asker({
id: divid,
question: "Hydrogen bonds are identified based on?",
answers: ["Atom types",'Distances','Angles',
'All of the above'],
server: "http://bits.csb.pitt.edu/asker.js/example/asker.cgi",
charter: chartmaker})
$(".jp-InputArea .o:contains(html)").closest('.jp-InputArea').hide();
</script>
Actually, what I'm really interested in is comparing SHMT2 and SHMT1...
!wget https://asinansaglam.github.io/python_bio_2022/files/shmt1.prmtop
!wget https://asinansaglam.github.io/python_bio_2022/files/shmt1.dcd
--2022-10-11 08:26:38-- https://asinansaglam.github.io/python_bio_2022/files/shmt1.prmtop Resolving asinansaglam.github.io (asinansaglam.github.io)... 2606:50c0:8000::153, 2606:50c0:8001::153, 2606:50c0:8002::153, ... Connecting to asinansaglam.github.io (asinansaglam.github.io)|2606:50c0:8000::153|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 16909535 (16M) [application/octet-stream] Saving to: ‘shmt1.prmtop’ shmt1.prmtop 100%[===================>] 16.13M 32.2MB/s in 0.5s 2022-10-11 08:26:38 (32.2 MB/s) - ‘shmt1.prmtop’ saved [16909535/16909535] --2022-10-11 08:26:39-- https://asinansaglam.github.io/python_bio_2022/files/shmt1.dcd Resolving asinansaglam.github.io (asinansaglam.github.io)... 2606:50c0:8003::153, 2606:50c0:8002::153, 2606:50c0:8001::153, ... Connecting to asinansaglam.github.io (asinansaglam.github.io)|2606:50c0:8003::153|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 97842196 (93M) [application/octet-stream] Saving to: ‘shmt1.dcd’ shmt1.dcd 100%[===================>] 93.31M 29.3MB/s in 3.3s 2022-10-11 08:26:42 (28.4 MB/s) - ‘shmt1.dcd’ saved [97842196/97842196]
Compare the loop regions (resid 368:383 in SHMT2 vs resid 377:392 in SHMT1)
shmt1 = MDAnalysis.Universe('shmt1.prmtop', 'shmt1.dcd')
prot1 = shmt1.select_atoms('protein')
prot1.select_atoms('name CA and resid 377:392').resnames
array(['ASN', 'THR', 'CYS', 'PRO', 'GLY', 'ASP', 'ARG', 'SER', 'ALA', 'LEU', 'ARG', 'PRO', 'SER', 'GLY', 'LEU', 'ARG'], dtype=object)
prot.select_atoms('name CA and resid 368:383').resnames
array(['ASN', 'THR', 'CYS', 'PRO', 'GLY', 'ASP', 'ARG', 'SER', 'ALA', 'ILE', 'THR', 'PRO', 'GLY', 'GLY', 'LEU', 'ARG'], dtype=object)
import MDAnalysis
shmt2 = MDAnalysis.Universe('shmt2.prmtop','shmt2.dcd')
shmt1 = MDAnalysis.Universe('shmt1.prmtop','shmt1.dcd')
shmt2loopbackbone = shmt2.select_atoms('name CA and resid 368:383')
shmt1loopbackbone = shmt1.select_atoms('name CA and resid 377:392')
ref1 = shmt1loopbackbone.positions
ref2 = shmt2loopbackbone.positions
from MDAnalysis.analysis.rms import rmsd
rmsds1 = []
rmsds2 = []
for ts in shmt1.trajectory:
rmsds1.append(rmsd(ref1,shmt1loopbackbone.positions))
for ts in shmt2.trajectory:
rmsds2.append(rmsd(ref2,shmt2loopbackbone.positions))
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(rmsds1,label='shmt1',linewidth=3)
plt.plot(rmsds2,'--',label='shmt2',linewidth=3)
plt.xlabel('Frame')
plt.ylabel('RMSD)')
plt.legend(loc='best');
h1 = HydrogenBondAnalysis(shmt1,'resid 377:392','protein')
h1.run();
h2 = HydrogenBondAnalysis(shmt2,'resid 368:383','protein')
h2.run();
plt.plot(h1.count_by_time(),'-',label='shmt1',linewidth=2)
plt.plot(h2.count_by_time(),'--',label='shmt2',linewidth=2)
plt.xlabel('Frame'); plt.ylabel('HBonds')
plt.legend();
loop1 = shmt1.select_atoms('resid 377:392')
prot1 = shmt1.select_atoms('protein')
CA1 = Contacts(shmt1, select=('resid 377:392','protein'), refgroup=(loop1, prot1),method='soft_cut')
CA1.run()
loop2 = shmt2.select_atoms('resid 368:383')
prot2 = shmt2.select_atoms('protein')
CA2 = Contacts(shmt2, select=('resid 368:383','protein'), refgroup=(loop2, prot2),method='soft_cut')
CA2.run()
plt.plot(CA1.results.timeseries[:, 0], CA1.results.timeseries[:, 1],label="shmt1")
plt.plot(CA2.results.timeseries[:, 0], CA2.results.timeseries[:, 1],label="shmt2")
plt.legend(loc='best')
plt.xlabel("Frame"); plt.ylabel("Fraction native contacts");